From f5895943d91b41b0368830cdb6eaffb8eda0f4c8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Mar 2014 17:44:49 +0000 Subject: KEYS: Move the flags representing required permission to linux/key.h Move the flags representing required permission to linux/key.h as the perm parameter of security_key_permission() is in terms of them - and not the permissions mask flags used in key->perm. Whilst we're at it: (1) Rename them to be KEY_NEED_xxx rather than KEY_xxx to avoid collisions with symbols in uapi/linux/input.h. (2) Don't use key_perm_t for a mask of required permissions, but rather limit it to the permissions mask attached to the key and arguments related directly to that. Signed-off-by: David Howells Tested-by: Dmitry Kasatkin --- include/linux/key.h | 11 +++++++++++ include/linux/security.h | 6 +++--- 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/key.h b/include/linux/key.h index 80d677483e31..cd0abb8c9c33 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -309,6 +309,17 @@ static inline key_serial_t key_serial(const struct key *key) extern void key_set_timeout(struct key *, unsigned); +/* + * The permissions required on a key that we're looking up. + */ +#define KEY_NEED_VIEW 0x01 /* Require permission to view attributes */ +#define KEY_NEED_READ 0x02 /* Require permission to read content */ +#define KEY_NEED_WRITE 0x04 /* Require permission to update / modify */ +#define KEY_NEED_SEARCH 0x08 /* Require permission to search (keyring) or find (key) */ +#define KEY_NEED_LINK 0x10 /* Require permission to link */ +#define KEY_NEED_SETATTR 0x20 /* Require permission to change attributes */ +#define KEY_NEED_ALL 0x3f /* All the above permissions */ + /** * key_is_instantiated - Determine if a key has been positively instantiated * @key: The key to check. diff --git a/include/linux/security.h b/include/linux/security.h index 5623a7f965b7..3a5ed0cd2751 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1707,7 +1707,7 @@ struct security_operations { void (*key_free) (struct key *key); int (*key_permission) (key_ref_t key_ref, const struct cred *cred, - key_perm_t perm); + unsigned perm); int (*key_getsecurity)(struct key *key, char **_buffer); #endif /* CONFIG_KEYS */ @@ -3026,7 +3026,7 @@ static inline int security_path_chroot(struct path *path) int security_key_alloc(struct key *key, const struct cred *cred, unsigned long flags); void security_key_free(struct key *key); int security_key_permission(key_ref_t key_ref, - const struct cred *cred, key_perm_t perm); + const struct cred *cred, unsigned perm); int security_key_getsecurity(struct key *key, char **_buffer); #else @@ -3044,7 +3044,7 @@ static inline void security_key_free(struct key *key) static inline int security_key_permission(key_ref_t key_ref, const struct cred *cred, - key_perm_t perm) + unsigned perm) { return 0; } -- cgit v1.2.3-71-gd317 From 5d2e9fadf43e87e690bfbe607313bf9be47867e4 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 25 Mar 2014 10:30:47 +0200 Subject: Bluetooth: Add scan_rsp parameter to mgmt_device_found() In preparation for being able to merge ADV_IND/ADV_SCAN_IND and SCAN_RSP together into a single device found event add a second parameter to the mgmt_device_found function. For now all callers pass NULL as this parameters since we don't yet have storing of the last received advertising report. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 5 +++-- net/bluetooth/hci_event.c | 10 +++++----- net/bluetooth/mgmt.c | 16 +++++++++++----- 3 files changed, 19 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 5f8bc05694ac..a1b8eab8a47d 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1264,8 +1264,9 @@ void mgmt_read_local_oob_data_complete(struct hci_dev *hdev, u8 *hash192, u8 *randomizer192, u8 *hash256, u8 *randomizer256, u8 status); void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, - u8 addr_type, u8 *dev_class, s8 rssi, u8 cfm_name, - u8 ssp, u8 *eir, u16 eir_len); + u8 addr_type, u8 *dev_class, s8 rssi, u8 cfm_name, u8 + ssp, u8 *eir, u16 eir_len, u8 *scan_rsp, + u8 scan_rsp_len); void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, s8 rssi, u8 *name, u8 name_len); void mgmt_discovering(struct hci_dev *hdev, u8 discovering); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 2388f2c09887..4a2c919d5908 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1827,7 +1827,7 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) name_known = hci_inquiry_cache_update(hdev, &data, false, &ssp); mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00, info->dev_class, 0, !name_known, ssp, NULL, - 0); + 0, NULL, 0); } hci_dev_unlock(hdev); @@ -3102,7 +3102,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, false, &ssp); mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00, info->dev_class, info->rssi, - !name_known, ssp, NULL, 0); + !name_known, ssp, NULL, 0, NULL, 0); } } else { struct inquiry_info_with_rssi *info = (void *) (skb->data + 1); @@ -3120,7 +3120,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, false, &ssp); mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00, info->dev_class, info->rssi, - !name_known, ssp, NULL, 0); + !name_known, ssp, NULL, 0, NULL, 0); } } @@ -3309,7 +3309,7 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev, eir_len = eir_get_length(info->data, sizeof(info->data)); mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00, info->dev_class, info->rssi, !name_known, - ssp, info->data, eir_len); + ssp, info->data, eir_len, NULL, 0); } hci_dev_unlock(hdev); @@ -3972,7 +3972,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, } mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL, rssi, 0, 1, - data, len); + data, len, NULL, 0); } static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index d2d4e0d5aed0..a0ef5c076880 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5669,7 +5669,8 @@ void mgmt_read_local_oob_data_complete(struct hci_dev *hdev, u8 *hash192, void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 *dev_class, s8 rssi, u8 cfm_name, u8 - ssp, u8 *eir, u16 eir_len) + ssp, u8 *eir, u16 eir_len, u8 *scan_rsp, + u8 scan_rsp_len) { char buf[512]; struct mgmt_ev_device_found *ev = (void *) buf; @@ -5679,8 +5680,10 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, if (!hci_discovery_active(hdev)) return; - /* Leave 5 bytes for a potential CoD field */ - if (sizeof(*ev) + eir_len + 5 > sizeof(buf)) + /* Make sure that the buffer is big enough. The 5 extra bytes + * are for the potential CoD field. + */ + if (sizeof(*ev) + eir_len + scan_rsp_len + 5 > sizeof(buf)) return; memset(buf, 0, sizeof(buf)); @@ -5707,8 +5710,11 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, eir_len = eir_append_data(ev->eir, eir_len, EIR_CLASS_OF_DEV, dev_class, 3); - ev->eir_len = cpu_to_le16(eir_len); - ev_size = sizeof(*ev) + eir_len; + if (scan_rsp_len > 0) + memcpy(ev->eir + eir_len, scan_rsp, scan_rsp_len); + + ev->eir_len = cpu_to_le16(eir_len + scan_rsp_len); + ev_size = sizeof(*ev) + eir_len + scan_rsp_len; mgmt_event(MGMT_EV_DEVICE_FOUND, hdev, ev, ev_size, NULL); } -- cgit v1.2.3-71-gd317 From 3c857757ef6e5a4e472bd3e5c934709c2eb482af Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 25 Mar 2014 10:30:49 +0200 Subject: Bluetooth: Add directed advertising support through connect() When we're in peripheral mode (HCI_ADVERTISING flag is set) the most natural mapping of connect() is to perform directed advertising to the peer device. This patch does the necessary changes to enable directed advertising and keeps the hci_conn state as BT_CONNECT in a similar way as is done for central or BR/EDR connection initiation. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 1 + net/bluetooth/hci_conn.c | 77 ++++++++++++++++++++++++++++++++++++++++----- net/bluetooth/hci_event.c | 19 +++++++++-- 3 files changed, 87 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index be150cf8cd43..4261a67682c0 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -367,6 +367,7 @@ enum { #define HCI_ERROR_REMOTE_POWER_OFF 0x15 #define HCI_ERROR_LOCAL_HOST_TERM 0x16 #define HCI_ERROR_PAIRING_NOT_ALLOWED 0x18 +#define HCI_ERROR_ADVERTISING_TIMEOUT 0x3c /* Flow control modes */ #define HCI_FLOW_CTL_MODE_PACKET_BASED 0x00 diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 129c22a85ccf..55a174317925 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -367,9 +367,23 @@ static void le_conn_timeout(struct work_struct *work) { struct hci_conn *conn = container_of(work, struct hci_conn, le_conn_timeout.work); + struct hci_dev *hdev = conn->hdev; BT_DBG(""); + /* We could end up here due to having done directed advertising, + * so clean up the state if necessary. This should however only + * happen with broken hardware or if low duty cycle was used + * (which doesn't have a timeout of its own). + */ + if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) { + u8 enable = 0x00; + hci_send_cmd(hdev, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), + &enable); + hci_le_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT); + return; + } + hci_le_create_connection_cancel(conn); } @@ -549,6 +563,11 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status) * favor of connection establishment, we should restart it. */ hci_update_background_scan(hdev); + + /* Re-enable advertising in case this was a failed connection + * attempt as a peripheral. + */ + mgmt_reenable_advertising(hdev); } static void create_le_conn_complete(struct hci_dev *hdev, u8 status) @@ -609,6 +628,45 @@ static void hci_req_add_le_create_conn(struct hci_request *req, conn->state = BT_CONNECT; } +static void hci_req_directed_advertising(struct hci_request *req, + struct hci_conn *conn) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_le_set_adv_param cp; + u8 own_addr_type; + u8 enable; + + enable = 0x00; + hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); + + /* Clear the HCI_ADVERTISING bit temporarily so that the + * hci_update_random_address knows that it's safe to go ahead + * and write a new random address. The flag will be set back on + * as soon as the SET_ADV_ENABLE HCI command completes. + */ + clear_bit(HCI_ADVERTISING, &hdev->dev_flags); + + /* Set require_privacy to false so that the remote device has a + * chance of identifying us. + */ + if (hci_update_random_address(req, false, &own_addr_type) < 0) + return; + + memset(&cp, 0, sizeof(cp)); + cp.type = LE_ADV_DIRECT_IND; + cp.own_address_type = own_addr_type; + cp.direct_addr_type = conn->dst_type; + bacpy(&cp.direct_addr, &conn->dst); + cp.channel_map = hdev->le_adv_channel_map; + + hci_req_add(req, HCI_OP_LE_SET_ADV_PARAM, sizeof(cp), &cp); + + enable = 0x01; + hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); + + conn->state = BT_CONNECT; +} + struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, u8 sec_level, u8 auth_type) { @@ -618,9 +676,6 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, struct hci_request req; int err; - if (test_bit(HCI_ADVERTISING, &hdev->flags)) - return ERR_PTR(-ENOTSUPP); - /* Some devices send ATT messages as soon as the physical link is * established. To be able to handle these ATT messages, the user- * space first establishes the connection and then starts the pairing @@ -668,13 +723,20 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, return ERR_PTR(-ENOMEM); conn->dst_type = dst_type; - - conn->out = true; - conn->link_mode |= HCI_LM_MASTER; conn->sec_level = BT_SECURITY_LOW; conn->pending_sec_level = sec_level; conn->auth_type = auth_type; + hci_req_init(&req, hdev); + + if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) { + hci_req_directed_advertising(&req, conn); + goto create_conn; + } + + conn->out = true; + conn->link_mode |= HCI_LM_MASTER; + params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); if (params) { conn->le_conn_min_interval = params->conn_min_interval; @@ -684,8 +746,6 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, conn->le_conn_max_interval = hdev->le_conn_max_interval; } - hci_req_init(&req, hdev); - /* If controller is scanning, we stop it since some controllers are * not able to scan and connect at the same time. Also set the * HCI_LE_SCAN_INTERRUPTED flag so that the command complete @@ -699,6 +759,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, hci_req_add_le_create_conn(&req, conn); +create_conn: err = hci_req_run(&req, create_le_conn_complete); if (err) { hci_conn_del(conn); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 4a2c919d5908..81e5236a0119 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -991,10 +991,25 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb) if (!sent) return; + if (status) + return; + hci_dev_lock(hdev); - if (!status) - mgmt_advertising(hdev, *sent); + /* If we're doing connection initation as peripheral. Set a + * timeout in case something goes wrong. + */ + if (*sent) { + struct hci_conn *conn; + + conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); + if (conn) + queue_delayed_work(hdev->workqueue, + &conn->le_conn_timeout, + HCI_LE_CONN_TIMEOUT); + } + + mgmt_advertising(hdev, *sent); hci_dev_unlock(hdev); } -- cgit v1.2.3-71-gd317 From b9a6328f2a7f15490de7e45eabb025f8b74a81af Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 25 Mar 2014 10:51:52 +0200 Subject: Bluetooth: Merge ADV_IND/ADV_SCAN_IND and SCAN_RSP together To avoid too many events being sent to user space and to help parsing of all available remote device data it makes sense for us to wait for the scan response and send a single merged Device Found event to user space. This patch adds a few new variables to hci_dev to track the last received ADV_IND/ADV_SCAN_IND, i.e. those which will cause a SCAN_REQ to be send in the case of active scanning. When the SCAN_RSP is received the pending data is passed together with the SCAN_RSP to the mgmt_device_found function which takes care of merging them into a single Device Found event. We also need a bit of extra logic to handle situations where we don't receive a SCAN_RSP after caching some data. In such a scenario we simply have to send out the pending data as it is and then operate on the new report as if there was no pending data. We also need to send out any pending data when scanning stops as well as ensure that the storage is empty at the start of a new active scanning session. These both cases are covered by the update to the hci_cc_le_set_scan_enable function in this patch. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 4 ++ net/bluetooth/hci_event.c | 103 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 105 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a1b8eab8a47d..59b112397d39 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -68,6 +68,10 @@ struct discovery_state { struct list_head unknown; /* Name state not known */ struct list_head resolve; /* Name needs to be resolved */ __u32 timestamp; + bdaddr_t last_adv_addr; + u8 last_adv_addr_type; + u8 last_adv_data[HCI_MAX_AD_LENGTH]; + u8 last_adv_data_len; }; struct hci_conn_hash { diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 81e5236a0119..5816a13c5342 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1033,6 +1033,32 @@ static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_unlock(hdev); } +static bool has_pending_adv_report(struct hci_dev *hdev) +{ + struct discovery_state *d = &hdev->discovery; + + return bacmp(&d->last_adv_addr, BDADDR_ANY); +} + +static void clear_pending_adv_report(struct hci_dev *hdev) +{ + struct discovery_state *d = &hdev->discovery; + + bacpy(&d->last_adv_addr, BDADDR_ANY); + d->last_adv_data_len = 0; +} + +static void store_pending_adv_report(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 bdaddr_type, u8 *data, u8 len) +{ + struct discovery_state *d = &hdev->discovery; + + bacpy(&d->last_adv_addr, bdaddr); + d->last_adv_addr_type = bdaddr_type; + memcpy(d->last_adv_data, data, len); + d->last_adv_data_len = len; +} + static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) { @@ -1051,9 +1077,24 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, switch (cp->enable) { case LE_SCAN_ENABLE: set_bit(HCI_LE_SCAN, &hdev->dev_flags); + if (hdev->le_scan_type == LE_SCAN_ACTIVE) + clear_pending_adv_report(hdev); break; case LE_SCAN_DISABLE: + /* We do this here instead of when setting DISCOVERY_STOPPED + * since the latter would potentially require waiting for + * inquiry to stop too. + */ + if (has_pending_adv_report(hdev)) { + struct discovery_state *d = &hdev->discovery; + + mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK, + d->last_adv_addr_type, NULL, 0, 0, + 1, d->last_adv_data, + d->last_adv_data_len, NULL, 0); + } + /* Cancel this timer so that we don't try to disable scanning * when it's already disabled. */ @@ -3979,6 +4020,8 @@ static void check_pending_le_conn(struct hci_dev *hdev, bdaddr_t *addr, static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, u8 bdaddr_type, s8 rssi, u8 *data, u8 len) { + struct discovery_state *d = &hdev->discovery; + /* Passive scanning shouldn't trigger any device found events */ if (hdev->le_scan_type == LE_SCAN_PASSIVE) { if (type == LE_ADV_IND || type == LE_ADV_DIRECT_IND) @@ -3986,8 +4029,64 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, return; } - mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL, rssi, 0, 1, - data, len, NULL, 0); + /* If there's nothing pending either store the data from this + * event or send an immediate device found event if the data + * should not be stored for later. + */ + if (!has_pending_adv_report(hdev)) { + /* If the report will trigger a SCAN_REQ store it for + * later merging. + */ + if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) { + store_pending_adv_report(hdev, bdaddr, bdaddr_type, + data, len); + return; + } + + mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL, + rssi, 0, 1, data, len, NULL, 0); + return; + } + + /* If the pending data doesn't match this report or this isn't a + * scan response (e.g. we got a duplicate ADV_IND) then force + * sending of the pending data. + */ + if (type != LE_ADV_SCAN_RSP || bacmp(bdaddr, &d->last_adv_addr) || + bdaddr_type != d->last_adv_addr_type) { + /* Send out whatever is in the cache */ + mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK, + d->last_adv_addr_type, NULL, 0, 0, 1, + d->last_adv_data, d->last_adv_data_len, + NULL, 0); + + /* If the new report will trigger a SCAN_REQ store it for + * later merging. + */ + if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) { + store_pending_adv_report(hdev, bdaddr, bdaddr_type, + data, len); + return; + } + + /* The advertising reports cannot be merged, so clear + * the pending report and send out a device found event. + */ + clear_pending_adv_report(hdev); + mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK, + d->last_adv_addr_type, NULL, rssi, 0, 1, + data, len, NULL, 0); + return; + } + + /* If we get here we've got a pending ADV_IND or ADV_SCAN_IND and + * the new event is a SCAN_RSP. We can therefore proceed with + * sending a merged device found event. + */ + mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK, + d->last_adv_addr_type, NULL, rssi, 0, 1, data, len, + d->last_adv_data, d->last_adv_data_len); + clear_pending_adv_report(hdev); } static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) -- cgit v1.2.3-71-gd317 From 73cf71d9865ad83c2ab7d09bc71be129088e4ded Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 25 Mar 2014 12:06:19 +0200 Subject: Bluetooth: Fix line splitting of mgmt_device_found parameters The line was incorrectly split between the variable type and its name. This patch fixes the issue. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 4 ++-- net/bluetooth/mgmt.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 59b112397d39..0ba7617ceb27 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1268,8 +1268,8 @@ void mgmt_read_local_oob_data_complete(struct hci_dev *hdev, u8 *hash192, u8 *randomizer192, u8 *hash256, u8 *randomizer256, u8 status); void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, - u8 addr_type, u8 *dev_class, s8 rssi, u8 cfm_name, u8 - ssp, u8 *eir, u16 eir_len, u8 *scan_rsp, + u8 addr_type, u8 *dev_class, s8 rssi, u8 cfm_name, + u8 ssp, u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len); void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, s8 rssi, u8 *name, u8 name_len); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index a0ef5c076880..37706e89af50 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5668,8 +5668,8 @@ void mgmt_read_local_oob_data_complete(struct hci_dev *hdev, u8 *hash192, } void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, - u8 addr_type, u8 *dev_class, s8 rssi, u8 cfm_name, u8 - ssp, u8 *eir, u16 eir_len, u8 *scan_rsp, + u8 addr_type, u8 *dev_class, s8 rssi, u8 cfm_name, + u8 ssp, u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len) { char buf[512]; -- cgit v1.2.3-71-gd317 From ff5cd29f5cb8de0f0bc9016874ddde467d4b0c85 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 25 Mar 2014 14:40:52 +0200 Subject: Bluetooth: Store also RSSI for pending advertising reports Especially in crowded environments it can become frequent that we have to send out whatever pending event there is stored. Since user space has its own filtering of small RSSI changes sending a 0 value will essentially force user space to wake up the higher layers (e.g. over D-Bus) even though the RSSI didn't actually change more than the threshold value. This patch adds storing also of the RSSI for pending advertising reports so that we report an as accurate RSSI as possible when we have to send out the stored information to user space. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_event.c | 12 +++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 0ba7617ceb27..c2a419c2c5c7 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -70,6 +70,7 @@ struct discovery_state { __u32 timestamp; bdaddr_t last_adv_addr; u8 last_adv_addr_type; + s8 last_adv_rssi; u8 last_adv_data[HCI_MAX_AD_LENGTH]; u8 last_adv_data_len; }; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 1b2221d62007..2ecb34f2e2ad 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1049,12 +1049,13 @@ static void clear_pending_adv_report(struct hci_dev *hdev) } static void store_pending_adv_report(struct hci_dev *hdev, bdaddr_t *bdaddr, - u8 bdaddr_type, u8 *data, u8 len) + u8 bdaddr_type, s8 rssi, u8 *data, u8 len) { struct discovery_state *d = &hdev->discovery; bacpy(&d->last_adv_addr, bdaddr); d->last_adv_addr_type = bdaddr_type; + d->last_adv_rssi = rssi; memcpy(d->last_adv_data, data, len); d->last_adv_data_len = len; } @@ -4040,7 +4041,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, */ if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) { store_pending_adv_report(hdev, bdaddr, bdaddr_type, - data, len); + rssi, data, len); return; } @@ -4061,8 +4062,9 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, /* Send out whatever is in the cache, but skip duplicates */ if (!match) mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK, - d->last_adv_addr_type, NULL, 0, - 0, 1, d->last_adv_data, + d->last_adv_addr_type, NULL, + d->last_adv_rssi, 0, 1, + d->last_adv_data, d->last_adv_data_len, NULL, 0); /* If the new report will trigger a SCAN_REQ store it for @@ -4070,7 +4072,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, */ if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) { store_pending_adv_report(hdev, bdaddr, bdaddr_type, - data, len); + rssi, data, len); return; } -- cgit v1.2.3-71-gd317 From ae55f5982a8bc6adbafb337e0b781d30d5617782 Mon Sep 17 00:00:00 2001 From: Lukasz Rymanowski Date: Thu, 27 Mar 2014 20:55:19 +0100 Subject: Bluetooth: Keep msec in DISCOV_INTERLEAVED_TIMEOUT Keep msec instead of jiffies in this define. This is needed by following patch where we want this timeout to be exposed in debugfs. Note: Value of this timeout comes from recommendation in BT Core Spec.4.0, Vol 3, Part C, chapter 13.2.1. Signed-off-by: Lukasz Rymanowski Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/mgmt.c | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index c2a419c2c5c7..08a1d44eeab0 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1210,7 +1210,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event); #define DISCOV_LE_SCAN_WIN 0x12 #define DISCOV_LE_SCAN_INT 0x12 #define DISCOV_LE_TIMEOUT msecs_to_jiffies(10240) -#define DISCOV_INTERLEAVED_TIMEOUT msecs_to_jiffies(5120) +#define DISCOV_INTERLEAVED_TIMEOUT 5120 /* msec */ #define DISCOV_INTERLEAVED_INQUIRY_LEN 0x04 #define DISCOV_BREDR_INQUIRY_LEN 0x08 diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 37706e89af50..944c4fc87905 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3351,6 +3351,8 @@ static int mgmt_start_discovery_failed(struct hci_dev *hdev, u8 status) static void start_discovery_complete(struct hci_dev *hdev, u8 status) { + unsigned long timeout = 0; + BT_DBG("status %d", status); if (status) { @@ -3366,13 +3368,11 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status) switch (hdev->discovery.type) { case DISCOV_TYPE_LE: - queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, - DISCOV_LE_TIMEOUT); + timeout = DISCOV_LE_TIMEOUT; break; case DISCOV_TYPE_INTERLEAVED: - queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, - DISCOV_INTERLEAVED_TIMEOUT); + timeout = msecs_to_jiffies(DISCOV_INTERLEAVED_TIMEOUT); break; case DISCOV_TYPE_BREDR: @@ -3381,6 +3381,11 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status) default: BT_ERR("Invalid discovery type %d", hdev->discovery.type); } + + if (!timeout) + return; + + queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, timeout); } static int start_discovery(struct sock *sk, struct hci_dev *hdev, -- cgit v1.2.3-71-gd317 From b9a7a61e5c3e2f6316c2aedf4ca171bdee7a4804 Mon Sep 17 00:00:00 2001 From: Lukasz Rymanowski Date: Thu, 27 Mar 2014 20:55:20 +0100 Subject: Bluetooth: Add new debugfs parameter With this patch it is possible to control discovery interleaved timeout value from debugfs. It is for fine tuning of this timeout. Signed-off-by: Lukasz Rymanowski Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_core.c | 4 ++++ net/bluetooth/mgmt.c | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 08a1d44eeab0..e0c26bc144e5 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -199,6 +199,7 @@ struct hci_dev { __u16 le_scan_window; __u16 le_conn_min_interval; __u16 le_conn_max_interval; + __u16 discov_interleaved_timeout; __u8 ssp_debug_mode; __u16 devid_source; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 7cf511cb1171..d31f144860d1 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1823,6 +1823,9 @@ static int __hci_init(struct hci_dev *hdev) &lowpan_debugfs_fops); debugfs_create_file("le_auto_conn", 0644, hdev->debugfs, hdev, &le_auto_conn_fops); + debugfs_create_u16("discov_interleaved_timeout", 0644, + hdev->debugfs, + &hdev->discov_interleaved_timeout); } return 0; @@ -3785,6 +3788,7 @@ struct hci_dev *hci_alloc_dev(void) hdev->le_conn_max_interval = 0x0038; hdev->rpa_timeout = HCI_DEFAULT_RPA_TIMEOUT; + hdev->discov_interleaved_timeout = DISCOV_INTERLEAVED_TIMEOUT; mutex_init(&hdev->lock); mutex_init(&hdev->req_lock); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 944c4fc87905..b34e13aed51c 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3372,7 +3372,7 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status) break; case DISCOV_TYPE_INTERLEAVED: - timeout = msecs_to_jiffies(DISCOV_INTERLEAVED_TIMEOUT); + timeout = msecs_to_jiffies(hdev->discov_interleaved_timeout); break; case DISCOV_TYPE_BREDR: -- cgit v1.2.3-71-gd317 From 3d5a76f08bbac55305da87f4c810279189f64297 Mon Sep 17 00:00:00 2001 From: Lukasz Rymanowski Date: Thu, 27 Mar 2014 20:55:21 +0100 Subject: Bluetooth: Keep msec in DISCOV_LE_TIMEOUT To be consistent, lets use msec for this timeout as well. Note: This define value is a minimum scan time taken from BT Core spec 4.0, Vol 3, Part C, chapter 9.2.6 Signed-off-by: Lukasz Rymanowski Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/mgmt.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index e0c26bc144e5..d73f41855ada 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1210,7 +1210,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event); */ #define DISCOV_LE_SCAN_WIN 0x12 #define DISCOV_LE_SCAN_INT 0x12 -#define DISCOV_LE_TIMEOUT msecs_to_jiffies(10240) +#define DISCOV_LE_TIMEOUT 10240 /* msec */ #define DISCOV_INTERLEAVED_TIMEOUT 5120 /* msec */ #define DISCOV_INTERLEAVED_INQUIRY_LEN 0x04 #define DISCOV_BREDR_INQUIRY_LEN 0x08 diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index b34e13aed51c..11cb00a2befb 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3368,7 +3368,7 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status) switch (hdev->discovery.type) { case DISCOV_TYPE_LE: - timeout = DISCOV_LE_TIMEOUT; + timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); break; case DISCOV_TYPE_INTERLEAVED: -- cgit v1.2.3-71-gd317 From d728c8ef8bea6e81f44933c0237531cda499577e Mon Sep 17 00:00:00 2001 From: Brad Volkin Date: Tue, 18 Feb 2014 10:15:56 -0800 Subject: drm/i915: Add a CMD_PARSER_VERSION getparam So userspace can query the kernel for command parser support. v2: Add i915_cmd_parser_get_version(), history log, and kerneldoc OTC-Tracker: AXIA-4631 Change-Id: I58af650db9f6753c2dcac9c54ab432fd31db302f Signed-off-by: Brad Volkin Reviewed-by: Jani Nikula Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_cmd_parser.c | 19 +++++++++++++++++++ drivers/gpu/drm/i915/i915_dma.c | 3 +++ drivers/gpu/drm/i915/i915_drv.h | 1 + include/uapi/drm/i915_drm.h | 1 + 4 files changed, 24 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index a8f00dbc0dde..bae7c2f33692 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -896,3 +896,22 @@ int i915_parse_cmds(struct intel_ring_buffer *ring, return ret; } + +/** + * i915_cmd_parser_get_version() - get the cmd parser version number + * + * The cmd parser maintains a simple increasing integer version number suitable + * for passing to userspace clients to determine what operations are permitted. + * + * Return: the current version number of the cmd parser + */ +int i915_cmd_parser_get_version(void) +{ + /* + * Command parser version history + * + * 1. Initial version. Checks batches and reports violations, but leaves + * hardware parsing enabled (so does not allow new use cases). + */ + return 1; +} diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 96177eec0a0e..0b38f88c35f0 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1017,6 +1017,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_HANDLE_LUT: value = 1; break; + case I915_PARAM_CMD_PARSER_VERSION: + value = i915_cmd_parser_get_version(); + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8e1576cf6d63..0801e157a7ff 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2385,6 +2385,7 @@ void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone); const char *i915_cache_level_str(int type); /* i915_cmd_parser.c */ +int i915_cmd_parser_get_version(void); void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring); bool i915_needs_cmd_parser(struct intel_ring_buffer *ring); int i915_parse_cmds(struct intel_ring_buffer *ring, diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 126bfaa8bb6b..8a3e4ef00c3d 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -337,6 +337,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_EXEC_NO_RELOC 25 #define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 #define I915_PARAM_HAS_WT 27 +#define I915_PARAM_CMD_PARSER_VERSION 28 typedef struct drm_i915_getparam { int param; -- cgit v1.2.3-71-gd317 From c2d15359c4b5476c181d8a51bdeba4cead15c120 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 1 Apr 2014 12:59:08 +0300 Subject: drm: Make drm_clflush_virt_range() void* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently drm_cflush_virt_rage() takes a char* so the caller probably has to do pointless casting to avoid compiler warnings. Make the argument void* instead to avoid such issues. v2: Use void* arithmetic (Chris) Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Acked-by: Dave Airlie Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_cache.c | 4 ++-- include/drm/drmP.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c index bb8f58012189..4bfad5f5a843 100644 --- a/drivers/gpu/drm/drm_cache.c +++ b/drivers/gpu/drm/drm_cache.c @@ -125,11 +125,11 @@ drm_clflush_sg(struct sg_table *st) EXPORT_SYMBOL(drm_clflush_sg); void -drm_clflush_virt_range(char *addr, unsigned long length) +drm_clflush_virt_range(void *addr, unsigned long length) { #if defined(CONFIG_X86) if (cpu_has_clflush) { - char *end = addr + length; + void *end = addr + length; mb(); for (; addr < end; addr += boot_cpu_data.x86_clflush_size) clflush(addr); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index daac00a93126..150780bdd66b 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1322,7 +1322,7 @@ extern int drm_remove_magic(struct drm_master *master, drm_magic_t magic); /* Cache management (drm_cache.c) */ void drm_clflush_pages(struct page *pages[], unsigned long num_pages); void drm_clflush_sg(struct sg_table *st); -void drm_clflush_virt_range(char *addr, unsigned long length); +void drm_clflush_virt_range(void *addr, unsigned long length); /* Locking IOCTL support (drm_lock.h) */ extern int drm_lock(struct drm_device *dev, void *data, -- cgit v1.2.3-71-gd317 From c50b960ccc5981627628302701e93e6aceccdb1c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 28 Mar 2014 10:19:47 +0000 Subject: netfilter: nf_tables: implement proper set selection The current set selection simply choses the first set type that provides the requested features, which always results in the rbtree being chosen by virtue of being the first set in the list. What we actually want to do is choose the implementation that can provide the requested features and is optimal from either a performance or memory perspective depending on the characteristics of the elements and the preferences specified by the user. The elements are not known when creating a set. Even if we would provide them for anonymous (literal) sets, we'd still have standalone sets where the elements are not known in advance. We therefore need an abstract description of the data charcteristics. The kernel already knows the size of the key, this patch starts by introducing a nested set description which so far contains only the maximum amount of elements. Based on this the set implementations are changed to provide an estimate of the required amount of memory and the lookup complexity class. The set ops have a new callback ->estimate() that is invoked during set selection. It receives a structure containing the attributes known to the kernel and is supposed to populate a struct nft_set_estimate with the complexity class and, in case the size is known, the complete amount of memory required, or the amount of memory required per element otherwise. Based on the policy specified by the user (performance/memory, defaulting to performance) the kernel will then select the best suited implementation. Even if the set implementation would allow to add more than the specified maximum amount of elements, they are enforced since new implementations might not be able to add more than maximum based on which they were selected. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 46 ++++++++++++ include/uapi/linux/netfilter/nf_tables.h | 27 +++++++ net/netfilter/nf_tables_api.c | 121 +++++++++++++++++++++++++++---- net/netfilter/nft_hash.c | 45 +++++++++++- net/netfilter/nft_rbtree.c | 21 ++++++ 5 files changed, 242 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e6bc14d8fa9a..29ff1dc41ef3 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -145,6 +145,44 @@ struct nft_set_iter { const struct nft_set_elem *elem); }; +/** + * struct nft_set_desc - description of set elements + * + * @klen: key length + * @dlen: data length + * @size: number of set elements + */ +struct nft_set_desc { + unsigned int klen; + unsigned int dlen; + unsigned int size; +}; + +/** + * enum nft_set_class - performance class + * + * @NFT_LOOKUP_O_1: constant, O(1) + * @NFT_LOOKUP_O_LOG_N: logarithmic, O(log N) + * @NFT_LOOKUP_O_N: linear, O(N) + */ +enum nft_set_class { + NFT_SET_CLASS_O_1, + NFT_SET_CLASS_O_LOG_N, + NFT_SET_CLASS_O_N, +}; + +/** + * struct nft_set_estimate - estimation of memory and performance + * characteristics + * + * @size: required memory + * @class: lookup performance class + */ +struct nft_set_estimate { + unsigned int size; + enum nft_set_class class; +}; + /** * struct nft_set_ops - nf_tables set operations * @@ -174,7 +212,11 @@ struct nft_set_ops { struct nft_set_iter *iter); unsigned int (*privsize)(const struct nlattr * const nla[]); + bool (*estimate)(const struct nft_set_desc *desc, + u32 features, + struct nft_set_estimate *est); int (*init)(const struct nft_set *set, + const struct nft_set_desc *desc, const struct nlattr * const nla[]); void (*destroy)(const struct nft_set *set); @@ -194,6 +236,8 @@ void nft_unregister_set(struct nft_set_ops *ops); * @name: name of the set * @ktype: key type (numeric type defined by userspace, not used in the kernel) * @dtype: data type (verdict or numeric type defined by userspace) + * @size: maximum set size + * @nelems: number of elements * @ops: set ops * @flags: set flags * @klen: key length @@ -206,6 +250,8 @@ struct nft_set { char name[IFNAMSIZ]; u32 ktype; u32 dtype; + u32 size; + u32 nelems; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; u16 flags; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index c88ccbfda5f1..160159274cab 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -211,6 +211,29 @@ enum nft_set_flags { NFT_SET_MAP = 0x8, }; +/** + * enum nft_set_policies - set selection policy + * + * @NFT_SET_POL_PERFORMANCE: prefer high performance over low memory use + * @NFT_SET_POL_MEMORY: prefer low memory use over high performance + */ +enum nft_set_policies { + NFT_SET_POL_PERFORMANCE, + NFT_SET_POL_MEMORY, +}; + +/** + * enum nft_set_desc_attributes - set element description + * + * @NFTA_SET_DESC_SIZE: number of elements in set (NLA_U32) + */ +enum nft_set_desc_attributes { + NFTA_SET_DESC_UNSPEC, + NFTA_SET_DESC_SIZE, + __NFTA_SET_DESC_MAX +}; +#define NFTA_SET_DESC_MAX (__NFTA_SET_DESC_MAX - 1) + /** * enum nft_set_attributes - nf_tables set netlink attributes * @@ -221,6 +244,8 @@ enum nft_set_flags { * @NFTA_SET_KEY_LEN: key data length (NLA_U32) * @NFTA_SET_DATA_TYPE: mapping data type (NLA_U32) * @NFTA_SET_DATA_LEN: mapping data length (NLA_U32) + * @NFTA_SET_POLICY: selection policy (NLA_U32) + * @NFTA_SET_DESC: set description (NLA_NESTED) */ enum nft_set_attributes { NFTA_SET_UNSPEC, @@ -231,6 +256,8 @@ enum nft_set_attributes { NFTA_SET_KEY_LEN, NFTA_SET_DATA_TYPE, NFTA_SET_DATA_LEN, + NFTA_SET_POLICY, + NFTA_SET_DESC, __NFTA_SET_MAX }; #define NFTA_SET_MAX (__NFTA_SET_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 33045a562297..bd3381e16084 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1912,9 +1912,18 @@ void nft_unregister_set(struct nft_set_ops *ops) } EXPORT_SYMBOL_GPL(nft_unregister_set); -static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const nla[]) +/* + * Select a set implementation based on the data characteristics and the + * given policy. The total memory use might not be known if no size is + * given, in that case the amount of memory per element is used. + */ +static const struct nft_set_ops * +nft_select_set_ops(const struct nlattr * const nla[], + const struct nft_set_desc *desc, + enum nft_set_policies policy) { - const struct nft_set_ops *ops; + const struct nft_set_ops *ops, *bops; + struct nft_set_estimate est, best; u32 features; #ifdef CONFIG_MODULES @@ -1932,15 +1941,45 @@ static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const features &= NFT_SET_INTERVAL | NFT_SET_MAP; } - // FIXME: implement selection properly + bops = NULL; + best.size = ~0; + best.class = ~0; + list_for_each_entry(ops, &nf_tables_set_ops, list) { if ((ops->features & features) != features) continue; + if (!ops->estimate(desc, features, &est)) + continue; + + switch (policy) { + case NFT_SET_POL_PERFORMANCE: + if (est.class < best.class) + break; + if (est.class == best.class && est.size < best.size) + break; + continue; + case NFT_SET_POL_MEMORY: + if (est.size < best.size) + break; + if (est.size == best.size && est.class < best.class) + break; + continue; + default: + break; + } + if (!try_module_get(ops->owner)) continue; - return ops; + if (bops != NULL) + module_put(bops->owner); + + bops = ops; + best = est; } + if (bops != NULL) + return bops; + return ERR_PTR(-EOPNOTSUPP); } @@ -1952,6 +1991,12 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { [NFTA_SET_KEY_LEN] = { .type = NLA_U32 }, [NFTA_SET_DATA_TYPE] = { .type = NLA_U32 }, [NFTA_SET_DATA_LEN] = { .type = NLA_U32 }, + [NFTA_SET_POLICY] = { .type = NLA_U32 }, + [NFTA_SET_DESC] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { + [NFTA_SET_DESC_SIZE] = { .type = NLA_U32 }, }; static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, @@ -2043,6 +2088,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, { struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; + struct nlattr *desc; u32 portid = NETLINK_CB(ctx->skb).portid; u32 seq = ctx->nlh->nlmsg_seq; @@ -2076,6 +2122,14 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, goto nla_put_failure; } + desc = nla_nest_start(skb, NFTA_SET_DESC); + if (desc == NULL) + goto nla_put_failure; + if (set->size && + nla_put_be32(skb, NFTA_SET_DESC_SIZE, htonl(set->size))) + goto nla_put_failure; + nla_nest_end(skb, desc); + return nlmsg_end(skb, nlh); nla_put_failure: @@ -2304,6 +2358,23 @@ err: return err; } +static int nf_tables_set_desc_parse(const struct nft_ctx *ctx, + struct nft_set_desc *desc, + const struct nlattr *nla) +{ + struct nlattr *da[NFTA_SET_DESC_MAX + 1]; + int err; + + err = nla_parse_nested(da, NFTA_SET_DESC_MAX, nla, nft_set_desc_policy); + if (err < 0) + return err; + + if (da[NFTA_SET_DESC_SIZE] != NULL) + desc->size = ntohl(nla_get_be32(da[NFTA_SET_DESC_SIZE])); + + return 0; +} + static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -2318,7 +2389,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, char name[IFNAMSIZ]; unsigned int size; bool create; - u32 ktype, klen, dlen, dtype, flags; + u32 ktype, dtype, flags, policy; + struct nft_set_desc desc; int err; if (nla[NFTA_SET_TABLE] == NULL || @@ -2326,6 +2398,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, nla[NFTA_SET_KEY_LEN] == NULL) return -EINVAL; + memset(&desc, 0, sizeof(desc)); + ktype = NFT_DATA_VALUE; if (nla[NFTA_SET_KEY_TYPE] != NULL) { ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE])); @@ -2333,8 +2407,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, return -EINVAL; } - klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); - if (klen == 0 || klen > FIELD_SIZEOF(struct nft_data, data)) + desc.klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); + if (desc.klen == 0 || desc.klen > FIELD_SIZEOF(struct nft_data, data)) return -EINVAL; flags = 0; @@ -2346,7 +2420,6 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, } dtype = 0; - dlen = 0; if (nla[NFTA_SET_DATA_TYPE] != NULL) { if (!(flags & NFT_SET_MAP)) return -EINVAL; @@ -2359,15 +2432,25 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, if (dtype != NFT_DATA_VERDICT) { if (nla[NFTA_SET_DATA_LEN] == NULL) return -EINVAL; - dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN])); - if (dlen == 0 || - dlen > FIELD_SIZEOF(struct nft_data, data)) + desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN])); + if (desc.dlen == 0 || + desc.dlen > FIELD_SIZEOF(struct nft_data, data)) return -EINVAL; } else - dlen = sizeof(struct nft_data); + desc.dlen = sizeof(struct nft_data); } else if (flags & NFT_SET_MAP) return -EINVAL; + policy = NFT_SET_POL_PERFORMANCE; + if (nla[NFTA_SET_POLICY] != NULL) + policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY])); + + if (nla[NFTA_SET_DESC] != NULL) { + err = nf_tables_set_desc_parse(&ctx, &desc, nla[NFTA_SET_DESC]); + if (err < 0) + return err; + } + create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create); @@ -2398,7 +2481,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, if (!(nlh->nlmsg_flags & NLM_F_CREATE)) return -ENOENT; - ops = nft_select_set_ops(nla); + ops = nft_select_set_ops(nla, &desc, policy); if (IS_ERR(ops)) return PTR_ERR(ops); @@ -2419,12 +2502,13 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, INIT_LIST_HEAD(&set->bindings); set->ops = ops; set->ktype = ktype; - set->klen = klen; + set->klen = desc.klen; set->dtype = dtype; - set->dlen = dlen; + set->dlen = desc.dlen; set->flags = flags; + set->size = desc.size; - err = ops->init(set, nla); + err = ops->init(set, &desc, nla); if (err < 0) goto err2; @@ -2733,6 +2817,9 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set, enum nft_registers dreg; int err; + if (set->size && set->nelems == set->size) + return -ENFILE; + err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, nft_set_elem_policy); if (err < 0) @@ -2798,6 +2885,7 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set, err = set->ops->insert(set, &elem); if (err < 0) goto err3; + set->nelems++; return 0; @@ -2867,6 +2955,7 @@ static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set, goto err2; set->ops->remove(set, &elem); + set->nelems--; nft_data_uninit(&elem.key, NFT_DATA_VALUE); if (set->flags & NFT_SET_MAP) diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 3b1ad876d6b0..01884b315c4a 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -19,7 +20,7 @@ #include #include -#define NFT_HASH_MIN_SIZE 4 +#define NFT_HASH_MIN_SIZE 4UL struct nft_hash { struct nft_hash_table __rcu *tbl; @@ -82,6 +83,11 @@ static void nft_hash_tbl_free(const struct nft_hash_table *tbl) kfree(tbl); } +static unsigned int nft_hash_tbl_size(unsigned int nelem) +{ + return max(roundup_pow_of_two(nelem * 4 / 3), NFT_HASH_MIN_SIZE); +} + static struct nft_hash_table *nft_hash_tbl_alloc(unsigned int nbuckets) { struct nft_hash_table *tbl; @@ -335,17 +341,23 @@ static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) } static int nft_hash_init(const struct nft_set *set, + const struct nft_set_desc *desc, const struct nlattr * const tb[]) { struct nft_hash *priv = nft_set_priv(set); struct nft_hash_table *tbl; + unsigned int size; if (unlikely(!nft_hash_rnd_initted)) { get_random_bytes(&nft_hash_rnd, 4); nft_hash_rnd_initted = true; } - tbl = nft_hash_tbl_alloc(NFT_HASH_MIN_SIZE); + size = NFT_HASH_MIN_SIZE; + if (desc->size) + size = nft_hash_tbl_size(desc->size); + + tbl = nft_hash_tbl_alloc(size); if (tbl == NULL) return -ENOMEM; RCU_INIT_POINTER(priv->tbl, tbl); @@ -369,8 +381,37 @@ static void nft_hash_destroy(const struct nft_set *set) kfree(tbl); } +static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, + struct nft_set_estimate *est) +{ + unsigned int esize; + + esize = sizeof(struct nft_hash_elem); + if (features & NFT_SET_MAP) + esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]); + + if (desc->size) { + est->size = sizeof(struct nft_hash) + + nft_hash_tbl_size(desc->size) * + sizeof(struct nft_hash_elem *) + + desc->size * esize; + } else { + /* Resizing happens when the load drops below 30% or goes + * above 75%. The average of 52.5% load (approximated by 50%) + * is used for the size estimation of the hash buckets, + * meaning we calculate two buckets per element. + */ + est->size = esize + 2 * sizeof(struct nft_hash_elem *); + } + + est->class = NFT_SET_CLASS_O_1; + + return true; +} + static struct nft_set_ops nft_hash_ops __read_mostly = { .privsize = nft_hash_privsize, + .estimate = nft_hash_estimate, .init = nft_hash_init, .destroy = nft_hash_destroy, .get = nft_hash_get, diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index e21d69d13506..072e611e9f71 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -201,6 +201,7 @@ static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[]) } static int nft_rbtree_init(const struct nft_set *set, + const struct nft_set_desc *desc, const struct nlattr * const nla[]) { struct nft_rbtree *priv = nft_set_priv(set); @@ -222,8 +223,28 @@ static void nft_rbtree_destroy(const struct nft_set *set) } } +static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, + struct nft_set_estimate *est) +{ + unsigned int nsize; + + nsize = sizeof(struct nft_rbtree_elem); + if (features & NFT_SET_MAP) + nsize += FIELD_SIZEOF(struct nft_rbtree_elem, data[0]); + + if (desc->size) + est->size = sizeof(struct nft_rbtree) + desc->size * nsize; + else + est->size = nsize; + + est->class = NFT_SET_CLASS_O_LOG_N; + + return true; +} + static struct nft_set_ops nft_rbtree_ops __read_mostly = { .privsize = nft_rbtree_privsize, + .estimate = nft_rbtree_estimate, .init = nft_rbtree_init, .destroy = nft_rbtree_destroy, .insert = nft_rbtree_insert, -- cgit v1.2.3-71-gd317 From 78f22b6a3a9254460d23060530b48ae02a9394e3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 24 Mar 2014 17:57:27 +0100 Subject: cfg80211: allow userspace to take ownership of interfaces When dynamically creating interfaces from userspace, e.g. for P2P usage, such interfaces are usually owned by the process that created them, i.e. wpa_supplicant. Should wpa_supplicant crash, such interfaces will often cease operating properly and cause problems on restarting the process. To avoid this problem, introduce an ownership concept for interfaces. If an interface is owned by a netlink socket, then it will be destroyed if the netlink socket is closed for any reason, including if the process it belongs to crashed. This gives us a race-free way to get rid of any such interfaces. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 +++ include/uapi/linux/nl80211.h | 6 ++++++ net/wireless/core.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ net/wireless/core.h | 11 +++++++++++ net/wireless/nl80211.c | 28 +++++++++++++++++++++++++++- 5 files changed, 91 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f3539a15c411..6510ccf53a54 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3194,6 +3194,7 @@ struct cfg80211_cached_keys; * @ibss_dfs_possible: (private) IBSS may change to a DFS channel * @event_list: (private) list for internal event processing * @event_lock: (private) lock for event list + * @owner_nlportid: (private) owner socket port ID */ struct wireless_dev { struct wiphy *wiphy; @@ -3241,6 +3242,8 @@ struct wireless_dev { unsigned long cac_start_time; unsigned int cac_time_ms; + u32 owner_nlportid; + #ifdef CONFIG_CFG80211_WEXT /* wext data */ struct { diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 1ba9d626aa83..5e405fd55a71 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1579,6 +1579,10 @@ enum nl80211_commands { * @NL80211_ATTR_TDLS_PEER_CAPABILITY: flags for TDLS peer capabilities, u32. * As specified in the &enum nl80211_tdls_peer_capability. * + * @NL80211_ATTR_IFACE_SOCKET_OWNER: flag attribute, if set during interface + * creation then the new interface will be owned by the netlink socket + * that created it and will be destroyed when the socket is closed + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1914,6 +1918,8 @@ enum nl80211_attrs { NL80211_ATTR_TDLS_PEER_CAPABILITY, + NL80211_ATTR_IFACE_SOCKET_OWNER, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/core.c b/net/wireless/core.c index 086cddd03ba6..5a63c3cbda2e 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -260,6 +260,45 @@ static void cfg80211_event_work(struct work_struct *work) rtnl_unlock(); } +void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev) +{ + struct cfg80211_iface_destroy *item; + + ASSERT_RTNL(); + + spin_lock_irq(&rdev->destroy_list_lock); + while ((item = list_first_entry_or_null(&rdev->destroy_list, + struct cfg80211_iface_destroy, + list))) { + struct wireless_dev *wdev, *tmp; + u32 nlportid = item->nlportid; + + list_del(&item->list); + kfree(item); + spin_unlock_irq(&rdev->destroy_list_lock); + + list_for_each_entry_safe(wdev, tmp, &rdev->wdev_list, list) { + if (nlportid == wdev->owner_nlportid) + rdev_del_virtual_intf(rdev, wdev); + } + + spin_lock_irq(&rdev->destroy_list_lock); + } + spin_unlock_irq(&rdev->destroy_list_lock); +} + +static void cfg80211_destroy_iface_wk(struct work_struct *work) +{ + struct cfg80211_registered_device *rdev; + + rdev = container_of(work, struct cfg80211_registered_device, + destroy_work); + + rtnl_lock(); + cfg80211_destroy_ifaces(rdev); + rtnl_unlock(); +} + /* exported functions */ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) @@ -318,6 +357,10 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->wiphy.dev.class = &ieee80211_class; rdev->wiphy.dev.platform_data = rdev; + INIT_LIST_HEAD(&rdev->destroy_list); + spin_lock_init(&rdev->destroy_list_lock); + INIT_WORK(&rdev->destroy_work, cfg80211_destroy_iface_wk); + #ifdef CONFIG_CFG80211_DEFAULT_PS rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT; #endif @@ -675,6 +718,7 @@ void wiphy_unregister(struct wiphy *wiphy) cancel_work_sync(&rdev->conn_work); flush_work(&rdev->event_work); cancel_delayed_work_sync(&rdev->dfs_update_channels_wk); + flush_work(&rdev->destroy_work); #ifdef CONFIG_PM if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup) diff --git a/net/wireless/core.h b/net/wireless/core.h index 5b1fdcadd469..6f6f75609852 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -80,6 +80,10 @@ struct cfg80211_registered_device { struct cfg80211_coalesce *coalesce; + spinlock_t destroy_list_lock; + struct list_head destroy_list; + struct work_struct destroy_work; + /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ struct wiphy wiphy __aligned(NETDEV_ALIGN); @@ -232,6 +236,13 @@ struct cfg80211_beacon_registration { u32 nlportid; }; +struct cfg80211_iface_destroy { + struct list_head list; + u32 nlportid; +}; + +void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev); + /* free object */ void cfg80211_dev_free(struct cfg80211_registered_device *rdev); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 052c1bf8ffac..b25b5ce4076d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -385,6 +385,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_MAC_HINT] = { .len = ETH_ALEN }, [NL80211_ATTR_WIPHY_FREQ_HINT] = { .type = NLA_U32 }, [NL80211_ATTR_TDLS_PEER_CAPABILITY] = { .type = NLA_U32 }, + [NL80211_ATTR_IFACE_SOCKET_OWNER] = { .type = NLA_FLAG }, }; /* policy for the key attributes */ @@ -2514,6 +2515,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED; u32 flags; + /* to avoid failing a new interface creation due to pending removal */ + cfg80211_destroy_ifaces(rdev); + memset(¶ms, 0, sizeof(params)); if (!info->attrs[NL80211_ATTR_IFNAME]) @@ -2563,6 +2567,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) return PTR_ERR(wdev); } + if (info->attrs[NL80211_ATTR_IFACE_SOCKET_OWNER]) + wdev->owner_nlportid = info->snd_portid; + switch (type) { case NL80211_IFTYPE_MESH_POINT: if (!info->attrs[NL80211_ATTR_MESH_ID]) @@ -11649,9 +11656,15 @@ static int nl80211_netlink_notify(struct notifier_block * nb, rcu_read_lock(); list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) { - list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) + bool schedule_destroy_work = false; + + list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) { cfg80211_mlme_unregister_socket(wdev, notify->portid); + if (wdev->owner_nlportid == notify->portid) + schedule_destroy_work = true; + } + spin_lock_bh(&rdev->beacon_registrations_lock); list_for_each_entry_safe(reg, tmp, &rdev->beacon_registrations, list) { @@ -11662,6 +11675,19 @@ static int nl80211_netlink_notify(struct notifier_block * nb, } } spin_unlock_bh(&rdev->beacon_registrations_lock); + + if (schedule_destroy_work) { + struct cfg80211_iface_destroy *destroy; + + destroy = kzalloc(sizeof(*destroy), GFP_ATOMIC); + if (destroy) { + destroy->nlportid = notify->portid; + spin_lock(&rdev->destroy_list_lock); + list_add(&destroy->list, &rdev->destroy_list); + spin_unlock(&rdev->destroy_list_lock); + schedule_work(&rdev->destroy_work); + } + } } rcu_read_unlock(); -- cgit v1.2.3-71-gd317 From 77be2c54c5bd26279abc13807398771d80cda37a Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 27 Mar 2014 11:30:29 +0200 Subject: mac80211: add vif to flush call This will allow the low level driver to make decision based on the vif such as queues etc... Since the vif might be NULL, we can't add it to the tracing functions. Signed-off-by: Emmanuel Grumbach [fix staging rtl8821ae driver] Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ar5523/ar5523.c | 3 ++- drivers/net/wireless/ath/ath10k/mac.c | 3 ++- drivers/net/wireless/ath/ath9k/main.c | 3 ++- drivers/net/wireless/ath/carl9170/main.c | 4 +++- drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c | 3 ++- drivers/net/wireless/cw1200/sta.c | 3 ++- drivers/net/wireless/cw1200/sta.h | 3 ++- drivers/net/wireless/iwlegacy/common.c | 3 ++- drivers/net/wireless/iwlegacy/common.h | 3 ++- drivers/net/wireless/iwlwifi/dvm/mac80211.c | 3 ++- drivers/net/wireless/mac80211_hwsim.c | 4 +++- drivers/net/wireless/p54/main.c | 3 ++- drivers/net/wireless/rt2x00/rt2x00.h | 3 ++- drivers/net/wireless/rt2x00/rt2x00mac.c | 3 ++- drivers/net/wireless/rtlwifi/core.c | 3 ++- drivers/net/wireless/ti/wlcore/main.c | 3 ++- drivers/staging/rtl8821ae/core.c | 14 +++----------- include/net/mac80211.h | 4 +++- net/mac80211/driver-ops.h | 8 +++++++- net/mac80211/util.c | 2 +- 20 files changed, 48 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c index 507d9a9ee69a..f92050617ae6 100644 --- a/drivers/net/wireless/ath/ar5523/ar5523.c +++ b/drivers/net/wireless/ath/ar5523/ar5523.c @@ -1090,7 +1090,8 @@ static int ar5523_set_rts_threshold(struct ieee80211_hw *hw, u32 value) return ret; } -static void ar5523_flush(struct ieee80211_hw *hw, u32 queues, bool drop) +static void ar5523_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + u32 queues, bool drop) { struct ar5523 *ar = hw->priv; diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 511a2f81e7af..d1df99350147 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -3544,7 +3544,8 @@ static int ath10k_set_frag_threshold(struct ieee80211_hw *hw, u32 value) return ret; } -static void ath10k_flush(struct ieee80211_hw *hw, u32 queues, bool drop) +static void ath10k_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + u32 queues, bool drop) { struct ath10k *ar = hw->priv; bool skip; diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index d69853b848ce..49265c6a1a7e 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1883,7 +1883,8 @@ static bool ath9k_has_tx_pending(struct ath_softc *sc) return !!npend; } -static void ath9k_flush(struct ieee80211_hw *hw, u32 queues, bool drop) +static void ath9k_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + u32 queues, bool drop) { struct ath_softc *sc = hw->priv; struct ath_hw *ah = sc->sc_ah; diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index 4c8cdb097b65..f8ded84b7be8 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -1707,7 +1707,9 @@ found: return 0; } -static void carl9170_op_flush(struct ieee80211_hw *hw, u32 queues, bool drop) +static void carl9170_op_flush(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + u32 queues, bool drop) { struct ar9170 *ar = hw->priv; unsigned int vid; diff --git a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c index 8c5fa4e58139..43c71bfaa474 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c +++ b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c @@ -897,7 +897,8 @@ static bool brcms_tx_flush_completed(struct brcms_info *wl) return result; } -static void brcms_ops_flush(struct ieee80211_hw *hw, u32 queues, bool drop) +static void brcms_ops_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + u32 queues, bool drop) { struct brcms_info *wl = hw->priv; int ret; diff --git a/drivers/net/wireless/cw1200/sta.c b/drivers/net/wireless/cw1200/sta.c index 103f7bce8932..cd0cad7f7759 100644 --- a/drivers/net/wireless/cw1200/sta.c +++ b/drivers/net/wireless/cw1200/sta.c @@ -936,7 +936,8 @@ static int __cw1200_flush(struct cw1200_common *priv, bool drop) return ret; } -void cw1200_flush(struct ieee80211_hw *hw, u32 queues, bool drop) +void cw1200_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + u32 queues, bool drop) { struct cw1200_common *priv = hw->priv; diff --git a/drivers/net/wireless/cw1200/sta.h b/drivers/net/wireless/cw1200/sta.h index 35babb62cc6a..b7e386b7662b 100644 --- a/drivers/net/wireless/cw1200/sta.h +++ b/drivers/net/wireless/cw1200/sta.h @@ -40,7 +40,8 @@ int cw1200_set_key(struct ieee80211_hw *dev, enum set_key_cmd cmd, int cw1200_set_rts_threshold(struct ieee80211_hw *hw, u32 value); -void cw1200_flush(struct ieee80211_hw *hw, u32 queues, bool drop); +void cw1200_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + u32 queues, bool drop); u64 cw1200_prepare_multicast(struct ieee80211_hw *hw, struct netdev_hw_addr_list *mc_list); diff --git a/drivers/net/wireless/iwlegacy/common.c b/drivers/net/wireless/iwlegacy/common.c index 4f42174d9994..ecc674627e6e 100644 --- a/drivers/net/wireless/iwlegacy/common.c +++ b/drivers/net/wireless/iwlegacy/common.c @@ -4755,7 +4755,8 @@ out: } EXPORT_SYMBOL(il_mac_change_interface); -void il_mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop) +void il_mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + u32 queues, bool drop) { struct il_priv *il = hw->priv; unsigned long timeout = jiffies + msecs_to_jiffies(500); diff --git a/drivers/net/wireless/iwlegacy/common.h b/drivers/net/wireless/iwlegacy/common.h index dfb13c70efe8..ea5c0f863c4e 100644 --- a/drivers/net/wireless/iwlegacy/common.h +++ b/drivers/net/wireless/iwlegacy/common.h @@ -1723,7 +1723,8 @@ void il_mac_remove_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif); int il_mac_change_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum nl80211_iftype newtype, bool newp2p); -void il_mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop); +void il_mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + u32 queues, bool drop); int il_alloc_txq_mem(struct il_priv *il); void il_free_txq_mem(struct il_priv *il); diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c index dd55c9cf7ba8..d4fae9fb2ddb 100644 --- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c @@ -1091,7 +1091,8 @@ static void iwlagn_configure_filter(struct ieee80211_hw *hw, FIF_BCN_PRBRESP_PROMISC | FIF_CONTROL; } -static void iwlagn_mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop) +static void iwlagn_mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + u32 queues, bool drop) { struct iwl_priv *priv = IWL_MAC80211_GET_DVM(hw); diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 9d7a52f5a410..31c7a1c9ef5f 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1676,7 +1676,9 @@ static int mac80211_hwsim_ampdu_action(struct ieee80211_hw *hw, return 0; } -static void mac80211_hwsim_flush(struct ieee80211_hw *hw, u32 queues, bool drop) +static void mac80211_hwsim_flush(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + u32 queues, bool drop) { /* Not implemented, queues only on kernel side */ } diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c index eede90b63f84..7be3a4839640 100644 --- a/drivers/net/wireless/p54/main.c +++ b/drivers/net/wireless/p54/main.c @@ -669,7 +669,8 @@ static unsigned int p54_flush_count(struct p54_common *priv) return total; } -static void p54_flush(struct ieee80211_hw *dev, u32 queues, bool drop) +static void p54_flush(struct ieee80211_hw *dev, struct ieee80211_vif *vif, + u32 queues, bool drop) { struct p54_common *priv = dev->priv; unsigned int total, i; diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h index e3b885d8f7db..010b76505243 100644 --- a/drivers/net/wireless/rt2x00/rt2x00.h +++ b/drivers/net/wireless/rt2x00/rt2x00.h @@ -1448,7 +1448,8 @@ int rt2x00mac_conf_tx(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u16 queue, const struct ieee80211_tx_queue_params *params); void rt2x00mac_rfkill_poll(struct ieee80211_hw *hw); -void rt2x00mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop); +void rt2x00mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + u32 queues, bool drop); int rt2x00mac_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant); int rt2x00mac_get_antenna(struct ieee80211_hw *hw, u32 *tx_ant, u32 *rx_ant); void rt2x00mac_get_ringparam(struct ieee80211_hw *hw, diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c index ddeb5a709aa3..30a2367ba8d6 100644 --- a/drivers/net/wireless/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/rt2x00/rt2x00mac.c @@ -747,7 +747,8 @@ void rt2x00mac_rfkill_poll(struct ieee80211_hw *hw) } EXPORT_SYMBOL_GPL(rt2x00mac_rfkill_poll); -void rt2x00mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop) +void rt2x00mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + u32 queues, bool drop) { struct rt2x00_dev *rt2x00dev = hw->priv; struct data_queue *queue; diff --git a/drivers/net/wireless/rtlwifi/core.c b/drivers/net/wireless/rtlwifi/core.c index 4ec424f26672..b1ed6d0796f6 100644 --- a/drivers/net/wireless/rtlwifi/core.c +++ b/drivers/net/wireless/rtlwifi/core.c @@ -1387,7 +1387,8 @@ static void rtl_op_rfkill_poll(struct ieee80211_hw *hw) * before switch channel or power save, or tx buffer packet * maybe send after offchannel or rf sleep, this may cause * dis-association by AP */ -static void rtl_op_flush(struct ieee80211_hw *hw, u32 queues, bool drop) +static void rtl_op_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + u32 queues, bool drop) { struct rtl_priv *rtlpriv = rtl_priv(hw); diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index ed88d3913483..077eb5b9cd74 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -5184,7 +5184,8 @@ out: mutex_unlock(&wl->mutex); } -static void wlcore_op_flush(struct ieee80211_hw *hw, u32 queues, bool drop) +static void wlcore_op_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + u32 queues, bool drop) { struct wl1271 *wl = hw->priv; diff --git a/drivers/staging/rtl8821ae/core.c b/drivers/staging/rtl8821ae/core.c index ff3139b6da65..63ae2d1997d3 100644 --- a/drivers/staging/rtl8821ae/core.c +++ b/drivers/staging/rtl8821ae/core.c @@ -1414,23 +1414,15 @@ static void rtl_op_rfkill_poll(struct ieee80211_hw *hw) * before switch channel or power save, or tx buffer packet * maybe send after offchannel or rf sleep, this may cause * dis-association by AP */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0)) -static void rtl_op_flush(struct ieee80211_hw *hw, u32 queues, bool drop) +static void rtl_op_flush(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + u32 queues, bool drop) { struct rtl_priv *rtlpriv = rtl_priv(hw); if (rtlpriv->intf_ops->flush) rtlpriv->intf_ops->flush(hw, queues, drop); } -#else -static void rtl_op_flush(struct ieee80211_hw *hw, bool drop) -{ - struct rtl_priv *rtlpriv = rtl_priv(hw); - - if (rtlpriv->intf_ops->flush) - rtlpriv->intf_ops->flush(hw, drop); -} -#endif const struct ieee80211_ops rtl_ops = { .start = rtl_op_start, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8248e3909fdf..faa7b9cf9cc7 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2609,6 +2609,7 @@ enum ieee80211_roc_type { * of queues to flush, which is useful if different virtual interfaces * use different hardware queues; it may also indicate all queues. * If the parameter @drop is set to %true, pending frames may be dropped. + * Note that vif can be NULL. * The callback can sleep. * * @channel_switch: Drivers that need (or want) to offload the channel @@ -2871,7 +2872,8 @@ struct ieee80211_ops { struct netlink_callback *cb, void *data, int len); #endif - void (*flush)(struct ieee80211_hw *hw, u32 queues, bool drop); + void (*flush)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + u32 queues, bool drop); void (*channel_switch)(struct ieee80211_hw *hw, struct ieee80211_channel_switch *ch_switch); int (*set_antenna)(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index fc689f5d971e..5331582a2c81 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -726,13 +726,19 @@ static inline void drv_rfkill_poll(struct ieee80211_local *local) } static inline void drv_flush(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, u32 queues, bool drop) { + struct ieee80211_vif *vif = sdata ? &sdata->vif : NULL; + might_sleep(); + if (sdata) + check_sdata_in_driver(sdata); + trace_drv_flush(local, queues, drop); if (local->ops->flush) - local->ops->flush(&local->hw, queues, drop); + local->ops->flush(&local->hw, vif, queues, drop); trace_drv_return_void(local); } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 275c94f995f7..5cf62ec74c14 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -554,7 +554,7 @@ void ieee80211_flush_queues(struct ieee80211_local *local, ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_FLUSH); - drv_flush(local, queues, false); + drv_flush(local, sdata, queues, false); ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_FLUSH); -- cgit v1.2.3-71-gd317 From 570dbde137d4604e4e682a5855b4425233344c19 Mon Sep 17 00:00:00 2001 From: David Spinadel Date: Sun, 23 Feb 2014 09:12:59 +0200 Subject: cfg80211: Add indoor only and GO concurrent channel attributes The FCC are clarifying some soft configuration requirements, which among other include the following: 1. Indoor operation, where a device can use channels requiring indoor operation, subject to that it can guarantee indoor operation, i.e., the device is connected to AC Power or the device is under the control of a local master that is acting as an AP and is connected to AC Power. 2. Concurrent GO operation, where devices may instantiate a P2P GO while they are under the guidance of an authorized master. For example, on a channel on which a BSS is connected to an authorized master, i.e., with DFS and radar detection capability in the UNII band. See https://apps.fcc.gov/eas/comments/GetPublishedDocument.html?id=327&tn=528122 Add support for advertising Indoor-only and GO-Concurrent channel properties. Signed-off-by: David Spinadel Signed-off-by: Ilan Peer Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 5 +++++ include/uapi/linux/nl80211.h | 23 +++++++++++++++++++++++ net/wireless/nl80211.c | 6 ++++++ net/wireless/reg.c | 2 ++ 4 files changed, 36 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 6510ccf53a54..14d8d3417735 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -109,6 +109,9 @@ enum ieee80211_band { * channel as the control or any of the secondary channels. * This may be due to the driver or due to regulatory bandwidth * restrictions. + * @IEEE80211_CHAN_INDOOR_ONLY: see %NL80211_FREQUENCY_ATTR_INDOOR_ONLY + * @IEEE80211_CHAN_GO_CONCURRENT: see %NL80211_FREQUENCY_ATTR_GO_CONCURRENT + * */ enum ieee80211_channel_flags { IEEE80211_CHAN_DISABLED = 1<<0, @@ -120,6 +123,8 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_NO_OFDM = 1<<6, IEEE80211_CHAN_NO_80MHZ = 1<<7, IEEE80211_CHAN_NO_160MHZ = 1<<8, + IEEE80211_CHAN_INDOOR_ONLY = 1<<9, + IEEE80211_CHAN_GO_CONCURRENT = 1<<10, }; #define IEEE80211_CHAN_NO_HT40 \ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 5e405fd55a71..ac5b2d25f0fc 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2342,9 +2342,30 @@ enum nl80211_band_attr { * using this channel as the primary or any of the secondary channels * isn't possible * @NL80211_FREQUENCY_ATTR_DFS_CAC_TIME: DFS CAC time in milliseconds. + * @NL80211_FREQUENCY_ATTR_INDOOR_ONLY: Only indoor use is permitted on this + * channel. A channel that has the INDOOR_ONLY attribute can only be + * used when there is a clear assessment that the device is operating in + * an indoor surroundings, i.e., it is connected to AC power (and not + * through portable DC inverters) or is under the control of a master + * that is acting as an AP and is connected to AC power. + * @NL80211_FREQUENCY_ATTR_GO_CONCURRENT: GO operation is allowed on this + * channel if it's connected concurrently to a BSS on the same channel on + * the 2 GHz band or to a channel in the same UNII band (on the 5 GHz + * band), and IEEE80211_CHAN_RADAR is not set. Instantiating a GO on a + * channel that has the GO_CONCURRENT attribute set can be done when there + * is a clear assessment that the device is operating under the guidance of + * an authorized master, i.e., setting up a GO while the device is also + * connected to an AP with DFS and radar detection on the UNII band (it is + * up to user-space, i.e., wpa_supplicant to perform the required + * verifications) * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use + * + * See https://apps.fcc.gov/eas/comments/GetPublishedDocument.html?id=327&tn=528122 + * for more information on the FCC description of the relaxations allowed + * by NL80211_FREQUENCY_ATTR_INDOOR_ONLY and + * NL80211_FREQUENCY_ATTR_GO_CONCURRENT. */ enum nl80211_frequency_attr { __NL80211_FREQUENCY_ATTR_INVALID, @@ -2361,6 +2382,8 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_NO_80MHZ, NL80211_FREQUENCY_ATTR_NO_160MHZ, NL80211_FREQUENCY_ATTR_DFS_CAC_TIME, + NL80211_FREQUENCY_ATTR_INDOOR_ONLY, + NL80211_FREQUENCY_ATTR_GO_CONCURRENT, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b25b5ce4076d..c5ead18ad3ab 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -614,6 +614,12 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, if ((chan->flags & IEEE80211_CHAN_NO_160MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_160MHZ)) goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_INDOOR_ONLY) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_INDOOR_ONLY)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_GO_CONCURRENT) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_GO_CONCURRENT)) + goto nla_put_failure; } if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index e0a746d19061..1e9dc1cba335 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -873,6 +873,8 @@ static u32 map_regdom_flags(u32 rd_flags) channel_flags |= IEEE80211_CHAN_RADAR; if (rd_flags & NL80211_RRF_NO_OFDM) channel_flags |= IEEE80211_CHAN_NO_OFDM; + if (rd_flags & NL80211_RRF_NO_OUTDOOR) + channel_flags |= IEEE80211_CHAN_INDOOR_ONLY; return channel_flags; } -- cgit v1.2.3-71-gd317 From 174e0cd28af0fe3c6c634c3e4d9e042c683bd7f7 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Sun, 23 Feb 2014 09:13:01 +0200 Subject: cfg80211: Enable GO operation on additional channels Allow GO operation on a channel marked with IEEE80211_CHAN_GO_CONCURRENT iff there is an active station interface that is associated to an AP operating on the same channel in the 2 GHz band or the same UNII band (in the 5 GHz band). This relaxation is not allowed if the channel is marked with IEEE80211_CHAN_RADAR. Note that this is a permissive approach to the FCC definitions, that require a clear assessment that the device operating the AP is an authorized master, i.e., with radar detection and DFS capabilities. It is assumed that such restrictions are enforced by user space. Furthermore, it is assumed, that if the conditions that allowed for the operation of the GO on such a channel change, i.e., the station interface disconnected from the AP, it is the responsibility of user space to evacuate the GO from the channel. Signed-off-by: Ilan Peer Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++- include/net/regulatory.h | 6 ++++ net/mac80211/ibss.c | 9 ++++-- net/wireless/Kconfig | 24 +++++++++++++++ net/wireless/chan.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++-- net/wireless/mesh.c | 3 +- net/wireless/nl80211.c | 11 ++++--- net/wireless/reg.c | 29 ++++++++++++++++++ net/wireless/reg.h | 12 ++++++++ net/wireless/trace.h | 11 ++++--- 10 files changed, 169 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 14d8d3417735..5640dc028bfa 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4539,12 +4539,14 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, * cfg80211_reg_can_beacon - check if beaconing is allowed * @wiphy: the wiphy * @chandef: the channel definition + * @iftype: interface type * * Return: %true if there is no secondary channel or the secondary channel(s) * can be used for beaconing (i.e. is not a radar channel etc.) */ bool cfg80211_reg_can_beacon(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef); + struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype); /* * cfg80211_ch_switch_notify - update wdev channel and notify userspace diff --git a/include/net/regulatory.h b/include/net/regulatory.h index 75fc1f5a948d..259992444e80 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -131,6 +131,11 @@ struct regulatory_request { * all country IE information processed by the regulatory core. This will * override %REGULATORY_COUNTRY_IE_FOLLOW_POWER as all country IEs will * be ignored. + * @REGULATORY_ENABLE_RELAX_NO_IR: for devices that wish to allow the + * NO_IR relaxation, which enables transmissions on channels on which + * otherwise initiating radiation is not allowed. This will enable the + * relaxations enabled under the CFG80211_REG_RELAX_NO_IR configuration + * option */ enum ieee80211_regulatory_flags { REGULATORY_CUSTOM_REG = BIT(0), @@ -138,6 +143,7 @@ enum ieee80211_regulatory_flags { REGULATORY_DISABLE_BEACON_HINTS = BIT(2), REGULATORY_COUNTRY_IE_FOLLOW_POWER = BIT(3), REGULATORY_COUNTRY_IE_IGNORE = BIT(4), + REGULATORY_ENABLE_RELAX_NO_IR = BIT(5), }; struct ieee80211_freq_range { diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 35e4f94d7ba1..741445e498b0 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -262,7 +262,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, /* make a copy of the chandef, it could be modified below. */ chandef = *req_chandef; chan = chandef.chan; - if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) { + if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef, + NL80211_IFTYPE_ADHOC)) { if (chandef.width == NL80211_CHAN_WIDTH_5 || chandef.width == NL80211_CHAN_WIDTH_10 || chandef.width == NL80211_CHAN_WIDTH_20_NOHT || @@ -274,7 +275,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, chandef.width = NL80211_CHAN_WIDTH_20; chandef.center_freq1 = chan->center_freq; /* check again for downgraded chandef */ - if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) { + if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef, + NL80211_IFTYPE_ADHOC)) { sdata_info(sdata, "Failed to join IBSS, beacons forbidden\n"); return; @@ -861,7 +863,8 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, goto disconnect; } - if (!cfg80211_reg_can_beacon(sdata->local->hw.wiphy, ¶ms.chandef)) { + if (!cfg80211_reg_can_beacon(sdata->local->hw.wiphy, ¶ms.chandef, + NL80211_IFTYPE_ADHOC)) { sdata_info(sdata, "IBSS %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n", ifibss->bssid, diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 2a891bc6315c..405f3c4cf70c 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -108,6 +108,30 @@ config CFG80211_REG_CELLULAR_HINTS feature if you have tested and validated this feature on your systems. +config CFG80211_REG_RELAX_NO_IR + bool "cfg80211 support for NO_IR relaxation" + depends on CFG80211_CERTIFICATION_ONUS + ---help--- + This option enables support for relaxation of the NO_IR flag for + situations that certain regulatory bodies have provided clarifications + on how relaxation can occur. This feature has an inherent dependency on + userspace features which must have been properly tested and as such is + not enabled by default. + + A relaxation feature example is allowing the operation of a P2P group + owner (GO) on channels marked with NO_IR if there is an additional BSS + interface which associated to an AP which userspace assumes or confirms + to be an authorized master, i.e., with radar detection support and DFS + capabilities. However, note that in order to not create daisy chain + scenarios, this relaxation is not allowed in cases that the BSS client + is associated to P2P GO and in addition the P2P GO instantiated on + a channel due to this relaxation should not allow connection from + non P2P clients. + + The regulatory core will apply these relaxations only for drivers that + support this feature by declaring the appropriate channel flags and + capabilities in their registration flow. + config CFG80211_DEFAULT_PS bool "enable powersave by default" depends on CFG80211 diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 9c9501a35fb5..50202af7fba3 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -661,15 +661,85 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_chandef_usable); +/* + * For GO only, check if the channel can be used under permissive conditions + * mandated by the some regulatory bodies, i.e., the channel is marked with + * IEEE80211_CHAN_GO_CONCURRENT and there is an additional station interface + * associated to an AP on the same channel or on the same UNII band + * (assuming that the AP is an authorized master). + */ +static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev, + struct ieee80211_channel *chan) +{ + struct wireless_dev *wdev_iter; + struct wiphy *wiphy = wiphy_idx_to_wiphy(rdev->wiphy_idx); + + ASSERT_RTNL(); + + if (!config_enabled(CONFIG_CFG80211_REG_RELAX_NO_IR) || + !(wiphy->regulatory_flags & REGULATORY_ENABLE_RELAX_NO_IR) || + !(chan->flags & IEEE80211_CHAN_GO_CONCURRENT)) + return false; + + /* + * Generally, it is possible to rely on another device/driver to allow + * the GO concurrent relaxation, however, since the device can further + * enforce the relaxation (by doing a similar verifications as this), + * and thus fail the GO instantiation, consider only the interfaces of + * the current registered device. + */ + list_for_each_entry(wdev_iter, &rdev->wdev_list, list) { + struct ieee80211_channel *other_chan = NULL; + int r1, r2; + + if (wdev_iter->iftype != NL80211_IFTYPE_STATION || + !netif_running(wdev_iter->netdev)) + continue; + + wdev_lock(wdev_iter); + if (wdev_iter->current_bss) + other_chan = wdev_iter->current_bss->pub.channel; + wdev_unlock(wdev_iter); + + if (!other_chan) + continue; + + if (chan == other_chan) + return true; + + if (chan->band != IEEE80211_BAND_5GHZ) + continue; + + r1 = cfg80211_get_unii(chan->center_freq); + r2 = cfg80211_get_unii(other_chan->center_freq); + + if (r1 != -EINVAL && r1 == r2) + return true; + } + + return false; +} + bool cfg80211_reg_can_beacon(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef) + struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype) { + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); bool res; u32 prohibited_flags = IEEE80211_CHAN_DISABLED | - IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR; - trace_cfg80211_reg_can_beacon(wiphy, chandef); + trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype); + + /* + * Under certain conditions suggested by the some regulatory bodies + * a GO can operate on channels marked with IEEE80211_NO_IR + * so set this flag only if such relaxations are not enabled and + * the conditions are not met. + */ + if (iftype != NL80211_IFTYPE_P2P_GO || + !cfg80211_go_permissive_chan(rdev, chandef->chan)) + prohibited_flags |= IEEE80211_CHAN_NO_IR; if (cfg80211_chandef_dfs_required(wiphy, chandef) > 0 && cfg80211_chandef_dfs_available(wiphy, chandef)) { diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 5af5cc6b2c4c..7031ee0afad8 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -175,7 +175,8 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, scan_width); } - if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef)) + if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef, + NL80211_IFTYPE_MESH_POINT)) return -EINVAL; err = cfg80211_chandef_dfs_required(wdev->wiphy, &setup->chandef); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c5ead18ad3ab..b8d81e41b0f7 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1939,7 +1939,7 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, result = -EBUSY; break; } - if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef)) { + if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef, iftype)) { result = -EINVAL; break; } @@ -3271,7 +3271,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) } else if (!nl80211_get_ap_channel(rdev, ¶ms)) return -EINVAL; - if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef)) + if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef, + wdev->iftype)) return -EINVAL; err = cfg80211_chandef_dfs_required(wdev->wiphy, ¶ms.chandef); @@ -5941,7 +5942,8 @@ skip_beacons: if (err) return err; - if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef)) + if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef, + wdev->iftype)) return -EINVAL; switch (dev->ieee80211_ptr->iftype) { @@ -6717,7 +6719,8 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) if (err) return err; - if (!cfg80211_reg_can_beacon(&rdev->wiphy, &ibss.chandef)) + if (!cfg80211_reg_can_beacon(&rdev->wiphy, &ibss.chandef, + NL80211_IFTYPE_ADHOC)) return -EINVAL; switch (ibss.chandef.width) { diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2b8c1000c1be..58f48b8f42ae 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2616,6 +2616,35 @@ static void reg_timeout_work(struct work_struct *work) rtnl_unlock(); } +/* + * See http://www.fcc.gov/document/5-ghz-unlicensed-spectrum-unii, for + * UNII band definitions + */ +int cfg80211_get_unii(int freq) +{ + /* UNII-1 */ + if (freq >= 5150 && freq <= 5250) + return 0; + + /* UNII-2A */ + if (freq > 5250 && freq <= 5350) + return 1; + + /* UNII-2B */ + if (freq > 5350 && freq <= 5470) + return 2; + + /* UNII-2C */ + if (freq > 5470 && freq <= 5725) + return 3; + + /* UNII-3 */ + if (freq > 5725 && freq <= 5825) + return 4; + + return -EINVAL; +} + int __init regulatory_init(void) { int err = 0; diff --git a/net/wireless/reg.h b/net/wireless/reg.h index 37c180df34b7..334a53af0fc8 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -104,4 +104,16 @@ void regulatory_hint_country_ie(struct wiphy *wiphy, */ void regulatory_hint_disconnect(void); +/** + * cfg80211_get_unii - get the U-NII band for the frequency + * @freq: the frequency for which we want to get the UNII band. + + * Get a value specifying the U-NII band frequency belongs to. + * U-NII bands are defined by the FCC in C.F.R 47 part 15. + * + * Returns -EINVAL if freq is invalid, 0 for UNII-1, 1 for UNII-2A, + * 2 for UNII-2B, 3 for UNII-2C and 4 for UNII-3. + */ +int cfg80211_get_unii(int freq); + #endif /* __NET_WIRELESS_REG_H */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index aabccf13e07b..47b499f8f54d 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2193,18 +2193,21 @@ TRACE_EVENT(cfg80211_cqm_rssi_notify, ); TRACE_EVENT(cfg80211_reg_can_beacon, - TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef), - TP_ARGS(wiphy, chandef), + TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype), + TP_ARGS(wiphy, chandef, iftype), TP_STRUCT__entry( WIPHY_ENTRY CHAN_DEF_ENTRY + __field(enum nl80211_iftype, iftype) ), TP_fast_assign( WIPHY_ASSIGN; CHAN_DEF_ASSIGN(chandef); + __entry->iftype = iftype; ), - TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, - WIPHY_PR_ARG, CHAN_DEF_PR_ARG) + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d", + WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->iftype) ); TRACE_EVENT(cfg80211_chandef_dfs_required, -- cgit v1.2.3-71-gd317 From 52616f2b446eaad8eb2cd78bbd052f0066069757 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Tue, 25 Feb 2014 16:26:00 +0200 Subject: cfg80211: Add an option to hint indoor operation Add the option to hint the wireless core that it is operating in an indoor environment. Signed-off-by: Ilan Peer Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +++ net/wireless/nl80211.c | 18 ++++++-------- net/wireless/reg.c | 58 +++++++++++++++++++++++++++++++++++++++++++- net/wireless/reg.h | 1 + 4 files changed, 68 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index ac5b2d25f0fc..513bfd7b2e5f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2602,10 +2602,13 @@ enum nl80211_dfs_regions { * present has been registered with the wireless core that * has listed NL80211_FEATURE_CELL_BASE_REG_HINTS as a * supported feature. + * @NL80211_USER_REG_HINT_INDOOR: a user sent an hint indicating that the + * platform is operating in an indoor environment. */ enum nl80211_user_reg_hint_type { NL80211_USER_REG_HINT_USER = 0, NL80211_USER_REG_HINT_CELL_BASE = 1, + NL80211_USER_REG_HINT_INDOOR = 2, }; /** diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b8d81e41b0f7..85bc830fd7e3 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4677,7 +4677,6 @@ static int parse_reg_rule(struct nlattr *tb[], static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) { - int r; char *data = NULL; enum nl80211_user_reg_hint_type user_reg_hint_type; @@ -4690,11 +4689,6 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) if (unlikely(!rcu_access_pointer(cfg80211_regdomain))) return -EINPROGRESS; - if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) - return -EINVAL; - - data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); - if (info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE]) user_reg_hint_type = nla_get_u32(info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE]); @@ -4704,14 +4698,16 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) switch (user_reg_hint_type) { case NL80211_USER_REG_HINT_USER: case NL80211_USER_REG_HINT_CELL_BASE: - break; + if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) + return -EINVAL; + + data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); + return regulatory_hint_user(data, user_reg_hint_type); + case NL80211_USER_REG_HINT_INDOOR: + return regulatory_hint_indoor_user(); default: return -EINVAL; } - - r = regulatory_hint_user(data, user_reg_hint_type); - - return r; } static int nl80211_get_mesh_config(struct sk_buff *skb, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 58f48b8f42ae..55d68c31ad72 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -65,11 +65,26 @@ #define REG_DBG_PRINT(args...) #endif +/** + * enum reg_request_treatment - regulatory request treatment + * + * @REG_REQ_OK: continue processing the regulatory request + * @REG_REQ_IGNORE: ignore the regulatory request + * @REG_REQ_INTERSECT: the regulatory domain resulting from this request should + * be intersected with the current one. + * @REG_REQ_ALREADY_SET: the regulatory request will not change the current + * regulatory settings, and no further processing is required. + * @REG_REQ_USER_HINT_HANDLED: a non alpha2 user hint was handled and no + * further processing is required, i.e., not need to update last_request + * etc. This should be used for user hints that do not provide an alpha2 + * but some other type of regulatory hint, i.e., indoor operation. + */ enum reg_request_treatment { REG_REQ_OK, REG_REQ_IGNORE, REG_REQ_INTERSECT, REG_REQ_ALREADY_SET, + REG_REQ_USER_HINT_HANDLED, }; static struct regulatory_request core_request_world = { @@ -106,6 +121,14 @@ const struct ieee80211_regdomain __rcu *cfg80211_regdomain; */ static int reg_num_devs_support_basehint; +/* + * State variable indicating if the platform on which the devices + * are attached is operating in an indoor environment. The state variable + * is relevant for all registered devices. + * (protected by RTNL) + */ +static bool reg_is_indoor; + static const struct ieee80211_regdomain *get_cfg80211_regdom(void) { return rtnl_dereference(cfg80211_regdomain); @@ -1128,6 +1151,13 @@ static bool reg_request_cell_base(struct regulatory_request *request) return request->user_reg_hint_type == NL80211_USER_REG_HINT_CELL_BASE; } +static bool reg_request_indoor(struct regulatory_request *request) +{ + if (request->initiator != NL80211_REGDOM_SET_BY_USER) + return false; + return request->user_reg_hint_type == NL80211_USER_REG_HINT_INDOOR; +} + bool reg_last_request_cell_base(void) { return reg_request_cell_base(get_last_request()); @@ -1570,6 +1600,11 @@ __reg_process_hint_user(struct regulatory_request *user_request) { struct regulatory_request *lr = get_last_request(); + if (reg_request_indoor(user_request)) { + reg_is_indoor = true; + return REG_REQ_USER_HINT_HANDLED; + } + if (reg_request_cell_base(user_request)) return reg_ignore_cell_hint(user_request); @@ -1617,7 +1652,8 @@ reg_process_hint_user(struct regulatory_request *user_request) treatment = __reg_process_hint_user(user_request); if (treatment == REG_REQ_IGNORE || - treatment == REG_REQ_ALREADY_SET) { + treatment == REG_REQ_ALREADY_SET || + treatment == REG_REQ_USER_HINT_HANDLED) { kfree(user_request); return treatment; } @@ -1678,6 +1714,7 @@ reg_process_hint_driver(struct wiphy *wiphy, case REG_REQ_OK: break; case REG_REQ_IGNORE: + case REG_REQ_USER_HINT_HANDLED: kfree(driver_request); return treatment; case REG_REQ_INTERSECT: @@ -1777,6 +1814,7 @@ reg_process_hint_country_ie(struct wiphy *wiphy, case REG_REQ_OK: break; case REG_REQ_IGNORE: + case REG_REQ_USER_HINT_HANDLED: /* fall through */ case REG_REQ_ALREADY_SET: kfree(country_ie_request); @@ -1969,6 +2007,22 @@ int regulatory_hint_user(const char *alpha2, return 0; } +int regulatory_hint_indoor_user(void) +{ + struct regulatory_request *request; + + request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); + if (!request) + return -ENOMEM; + + request->wiphy_idx = WIPHY_IDX_INVALID; + request->initiator = NL80211_REGDOM_SET_BY_USER; + request->user_reg_hint_type = NL80211_USER_REG_HINT_INDOOR; + queue_regulatory_request(request); + + return 0; +} + /* Driver hints */ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) { @@ -2136,6 +2190,8 @@ static void restore_regulatory_settings(bool reset_user) ASSERT_RTNL(); + reg_is_indoor = false; + reset_regdomains(true, &world_regdom); restore_alpha2(alpha2, reset_user); diff --git a/net/wireless/reg.h b/net/wireless/reg.h index 334a53af0fc8..2a3842828f6d 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -25,6 +25,7 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy); int regulatory_hint_user(const char *alpha2, enum nl80211_user_reg_hint_type user_reg_hint_type); +int regulatory_hint_indoor_user(void); void wiphy_regulatory_register(struct wiphy *wiphy); void wiphy_regulatory_deregister(struct wiphy *wiphy); -- cgit v1.2.3-71-gd317 From cb2d956dd329caa11b5ece454dc52253aa038e73 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Mon, 17 Feb 2014 16:52:35 +0200 Subject: cfg80211: refactor cfg80211_can_use_iftype_chan() Separate the code that counts the interface types and channels from the code that check the interface combinations. The new function that checks for combinations is exported so it can be called by the drivers. This is done in preparation for moving the interface combinations checks out of cfg80211. Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- Documentation/DocBook/80211.tmpl | 1 + include/net/cfg80211.h | 22 +++++++ net/wireless/util.c | 129 ++++++++++++++++++++++----------------- 3 files changed, 96 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl index 044b76436e83..d9b9416c989f 100644 --- a/Documentation/DocBook/80211.tmpl +++ b/Documentation/DocBook/80211.tmpl @@ -100,6 +100,7 @@ !Finclude/net/cfg80211.h wdev_priv !Finclude/net/cfg80211.h ieee80211_iface_limit !Finclude/net/cfg80211.h ieee80211_iface_combination +!Finclude/net/cfg80211.h cfg80211_check_combinations Actions and configuration diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5640dc028bfa..4653e9f75d0d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4692,6 +4692,28 @@ void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp); */ unsigned int ieee80211_get_num_supported_channels(struct wiphy *wiphy); +/** + * cfg80211_check_combinations - check interface combinations + * + * @wiphy: the wiphy + * @num_different_channels: the number of different channels we want + * to use for verification + * @radar_detect: a bitmap where each bit corresponds to a channel + * width where radar detection is needed, as in the definition of + * &struct ieee80211_iface_combination.@radar_detect_widths + * @iftype_num: array with the numbers of interfaces of each interface + * type. The index is the interface type as specified in &enum + * nl80211_iftype. + * + * This function can be called by the driver to check whether a + * combination of interfaces and their types are allowed according to + * the interface combinations. + */ +int cfg80211_check_combinations(struct wiphy *wiphy, + const int num_different_channels, + const u8 radar_detect, + const int iftype_num[NUM_NL80211_IFTYPES]); + /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ diff --git a/net/wireless/util.c b/net/wireless/util.c index e5872ff2c27c..46f404df35e3 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1268,6 +1268,76 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, return res; } +int cfg80211_check_combinations(struct wiphy *wiphy, + const int num_different_channels, + const u8 radar_detect, + const int iftype_num[NUM_NL80211_IFTYPES]) +{ + int i, j, iftype; + int num_interfaces = 0; + u32 used_iftypes = 0; + + for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) { + num_interfaces += iftype_num[iftype]; + if (iftype_num[iftype] > 0 && + !(wiphy->software_iftypes & BIT(iftype))) + used_iftypes |= BIT(iftype); + } + + for (i = 0; i < wiphy->n_iface_combinations; i++) { + const struct ieee80211_iface_combination *c; + struct ieee80211_iface_limit *limits; + u32 all_iftypes = 0; + + c = &wiphy->iface_combinations[i]; + + if (num_interfaces > c->max_interfaces) + continue; + if (num_different_channels > c->num_different_channels) + continue; + + limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits, + GFP_KERNEL); + if (!limits) + return -ENOMEM; + + for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) { + if (wiphy->software_iftypes & BIT(iftype)) + continue; + for (j = 0; j < c->n_limits; j++) { + all_iftypes |= limits[j].types; + if (!(limits[j].types & BIT(iftype))) + continue; + if (limits[j].max < iftype_num[iftype]) + goto cont; + limits[j].max -= iftype_num[iftype]; + } + } + + if (radar_detect && !(c->radar_detect_widths & radar_detect)) + goto cont; + + /* Finally check that all iftypes that we're currently + * using are actually part of this combination. If they + * aren't then we can't use this combination and have + * to continue to the next. + */ + if ((all_iftypes & used_iftypes) != used_iftypes) + goto cont; + + /* This combination covered all interface types and + * supported the requested numbers, so we're good. + */ + kfree(limits); + return 0; + cont: + kfree(limits); + } + + return -EBUSY; +} +EXPORT_SYMBOL(cfg80211_check_combinations); + int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, enum nl80211_iftype iftype, @@ -1276,7 +1346,6 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, u8 radar_detect) { struct wireless_dev *wdev_iter; - u32 used_iftypes = BIT(iftype); int num[NUM_NL80211_IFTYPES]; struct ieee80211_channel *used_channels[CFG80211_MAX_NUM_DIFFERENT_CHANNELS]; @@ -1284,7 +1353,7 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, enum cfg80211_chan_mode chmode; int num_different_channels = 0; int total = 1; - int i, j; + int i; ASSERT_RTNL(); @@ -1369,65 +1438,13 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, num[wdev_iter->iftype]++; total++; - used_iftypes |= BIT(wdev_iter->iftype); } if (total == 1 && !radar_detect) return 0; - for (i = 0; i < rdev->wiphy.n_iface_combinations; i++) { - const struct ieee80211_iface_combination *c; - struct ieee80211_iface_limit *limits; - u32 all_iftypes = 0; - - c = &rdev->wiphy.iface_combinations[i]; - - if (total > c->max_interfaces) - continue; - if (num_different_channels > c->num_different_channels) - continue; - - limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits, - GFP_KERNEL); - if (!limits) - return -ENOMEM; - - for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) { - if (rdev->wiphy.software_iftypes & BIT(iftype)) - continue; - for (j = 0; j < c->n_limits; j++) { - all_iftypes |= limits[j].types; - if (!(limits[j].types & BIT(iftype))) - continue; - if (limits[j].max < num[iftype]) - goto cont; - limits[j].max -= num[iftype]; - } - } - - if (radar_detect && !(c->radar_detect_widths & radar_detect)) - goto cont; - - /* - * Finally check that all iftypes that we're currently - * using are actually part of this combination. If they - * aren't then we can't use this combination and have - * to continue to the next. - */ - if ((all_iftypes & used_iftypes) != used_iftypes) - goto cont; - - /* - * This combination covered all interface types and - * supported the requested numbers, so we're good. - */ - kfree(limits); - return 0; - cont: - kfree(limits); - } - - return -EBUSY; + return cfg80211_check_combinations(&rdev->wiphy, num_different_channels, + radar_detect, num); } int ieee80211_get_ratemask(struct ieee80211_supported_band *sband, -- cgit v1.2.3-71-gd317 From 2beb6dab2d799ee8934cb0801845e551ad8c70f2 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Tue, 18 Feb 2014 11:40:36 +0200 Subject: cfg80211/mac80211: refactor cfg80211_chandef_dfs_required() Some interface types don't require DFS (such as STATION, P2P_CLIENT etc). In order to centralize these decisions, make cfg80211_chandef_dfs_required() take the iftype into consideration. Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 7 +++-- net/mac80211/ibss.c | 32 +++++++++++----------- net/mac80211/mesh.c | 9 +++--- net/wireless/chan.c | 74 ++++++++++++++++++++++++++++++++++++++------------ net/wireless/mesh.c | 7 +++-- net/wireless/nl80211.c | 37 ++++++++++++------------- 6 files changed, 104 insertions(+), 62 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 4653e9f75d0d..92a65c331cf4 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -446,10 +446,13 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, * cfg80211_chandef_dfs_required - checks if radar detection is required * @wiphy: the wiphy to validate against * @chandef: the channel definition to check - * Return: 1 if radar detection is required, 0 if it is not, < 0 on error + * @iftype: the interface type as specified in &enum nl80211_iftype + * Returns: + * 1 if radar detection is required, 0 if it is not, < 0 on error */ int cfg80211_chandef_dfs_required(struct wiphy *wiphy, - const struct cfg80211_chan_def *chandef); + const struct cfg80211_chan_def *chandef, + enum nl80211_iftype); /** * ieee80211_chandef_rate_flags - returns rate flags for a channel diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 741445e498b0..1759bd661e25 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -228,7 +228,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, struct beacon_data *presp; enum nl80211_bss_scan_width scan_width; bool have_higher_than_11mbit; - bool radar_required = false; + bool radar_required; int err; sdata_assert_lock(sdata); @@ -284,21 +284,20 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, } err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, - &chandef); + &chandef, NL80211_IFTYPE_ADHOC); if (err < 0) { sdata_info(sdata, "Failed to join IBSS, invalid chandef\n"); return; } - if (err > 0) { - if (!ifibss->userspace_handles_dfs) { - sdata_info(sdata, - "Failed to join IBSS, DFS channel without control program\n"); - return; - } - radar_required = true; + if (err > 0 && !ifibss->userspace_handles_dfs) { + sdata_info(sdata, + "Failed to join IBSS, DFS channel without control program\n"); + return; } + radar_required = err; + mutex_lock(&local->mtx); if (ieee80211_vif_use_channel(sdata, &chandef, ifibss->fixed_channel ? @@ -777,7 +776,8 @@ static void ieee80211_ibss_csa_mark_radar(struct ieee80211_sub_if_data *sdata) * unavailable. */ err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, - &ifibss->chandef); + &ifibss->chandef, + NL80211_IFTYPE_ADHOC); if (err > 0) cfg80211_radar_event(sdata->local->hw.wiphy, &ifibss->chandef, GFP_ATOMIC); @@ -876,17 +876,17 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, } err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, - ¶ms.chandef); + ¶ms.chandef, + NL80211_IFTYPE_ADHOC); if (err < 0) goto disconnect; - if (err) { + if (err > 0 && !ifibss->userspace_handles_dfs) { /* IBSS-DFS only allowed with a control program */ - if (!ifibss->userspace_handles_dfs) - goto disconnect; - - params.radar_required = true; + goto disconnect; } + params.radar_required = err; + if (cfg80211_chandef_identical(¶ms.chandef, &sdata->vif.bss_conf.chandef)) { ibss_dbg(sdata, diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 387f61c4557d..9d292377d3a6 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -903,14 +903,15 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, } err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, - ¶ms.chandef); + ¶ms.chandef, + NL80211_IFTYPE_MESH_POINT); if (err < 0) return false; - if (err) { - params.radar_required = true; + if (err > 0) /* TODO: DFS not (yet) supported */ return false; - } + + params.radar_required = err; if (cfg80211_chandef_identical(¶ms.chandef, &sdata->vif.bss_conf.chandef)) { diff --git a/net/wireless/chan.c b/net/wireless/chan.c index c3180dc03a33..c61bcdd3dfbc 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -326,28 +326,57 @@ static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy, int cfg80211_chandef_dfs_required(struct wiphy *wiphy, - const struct cfg80211_chan_def *chandef) + const struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype) { int width; - int r; + int ret; if (WARN_ON(!cfg80211_chandef_valid(chandef))) return -EINVAL; - width = cfg80211_chandef_get_width(chandef); - if (width < 0) - return -EINVAL; + switch (iftype) { + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_MESH_POINT: + width = cfg80211_chandef_get_width(chandef); + if (width < 0) + return -EINVAL; - r = cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq1, - width); - if (r) - return r; + ret = cfg80211_get_chans_dfs_required(wiphy, + chandef->center_freq1, + width); + if (ret < 0) + return ret; + else if (ret > 0) + return BIT(chandef->width); - if (!chandef->center_freq2) - return 0; + if (!chandef->center_freq2) + return 0; + + ret = cfg80211_get_chans_dfs_required(wiphy, + chandef->center_freq2, + width); + if (ret < 0) + return ret; + else if (ret > 0) + return BIT(chandef->width); - return cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq2, - width); + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_UNSPECIFIED: + break; + case NUM_NL80211_IFTYPES: + WARN_ON(1); + } + + return 0; } EXPORT_SYMBOL(cfg80211_chandef_dfs_required); @@ -749,7 +778,8 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy, !cfg80211_go_permissive_chan(rdev, chandef->chan)) prohibited_flags |= IEEE80211_CHAN_NO_IR; - if (cfg80211_chandef_dfs_required(wiphy, chandef) > 0 && + if (cfg80211_chandef_dfs_required(wiphy, chandef, + NL80211_IFTYPE_UNSPECIFIED) > 0 && cfg80211_chandef_dfs_available(wiphy, chandef)) { /* We can skip IEEE80211_CHAN_NO_IR if chandef dfs available */ prohibited_flags = IEEE80211_CHAN_DISABLED; @@ -779,6 +809,8 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, enum cfg80211_chan_mode *chanmode, u8 *radar_detect) { + int ret; + *chan = NULL; *chanmode = CHAN_MODE_UNDEFINED; @@ -821,8 +853,11 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, *chan = wdev->chandef.chan; *chanmode = CHAN_MODE_SHARED; - if (cfg80211_chandef_dfs_required(wdev->wiphy, - &wdev->chandef)) + ret = cfg80211_chandef_dfs_required(wdev->wiphy, + &wdev->chandef, + wdev->iftype); + WARN_ON(ret < 0); + if (ret > 0) *radar_detect |= BIT(wdev->chandef.width); } return; @@ -831,8 +866,11 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, *chan = wdev->chandef.chan; *chanmode = CHAN_MODE_SHARED; - if (cfg80211_chandef_dfs_required(wdev->wiphy, - &wdev->chandef)) + ret = cfg80211_chandef_dfs_required(wdev->wiphy, + &wdev->chandef, + wdev->iftype); + WARN_ON(ret < 0); + if (ret > 0) *radar_detect |= BIT(wdev->chandef.width); } return; diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 7031ee0afad8..6ebe883653a4 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -179,10 +179,13 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, NL80211_IFTYPE_MESH_POINT)) return -EINVAL; - err = cfg80211_chandef_dfs_required(wdev->wiphy, &setup->chandef); + err = cfg80211_chandef_dfs_required(wdev->wiphy, + &setup->chandef, + NL80211_IFTYPE_MESH_POINT); if (err < 0) return err; - if (err) + + if (err > 0) radar_detect_width = BIT(setup->chandef.width); err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 85bc830fd7e3..4f82b9b71db1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3275,12 +3275,15 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) wdev->iftype)) return -EINVAL; - err = cfg80211_chandef_dfs_required(wdev->wiphy, ¶ms.chandef); + err = cfg80211_chandef_dfs_required(wdev->wiphy, + ¶ms.chandef, + NL80211_IFTYPE_AP); if (err < 0) return err; - if (err) { - radar_detect_width = BIT(params.chandef.width); + + if (err > 0) { params.radar_required = true; + radar_detect_width = BIT(params.chandef.width); } err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, @@ -5806,7 +5809,8 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, if (wdev->cac_started) return -EBUSY; - err = cfg80211_chandef_dfs_required(wdev->wiphy, &chandef); + err = cfg80211_chandef_dfs_required(wdev->wiphy, &chandef, + NL80211_IFTYPE_UNSPECIFIED); if (err < 0) return err; @@ -5942,22 +5946,15 @@ skip_beacons: wdev->iftype)) return -EINVAL; - switch (dev->ieee80211_ptr->iftype) { - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_P2P_GO: - case NL80211_IFTYPE_ADHOC: - case NL80211_IFTYPE_MESH_POINT: - err = cfg80211_chandef_dfs_required(wdev->wiphy, - ¶ms.chandef); - if (err < 0) - return err; - if (err) { - radar_detect_width = BIT(params.chandef.width); - params.radar_required = true; - } - break; - default: - break; + err = cfg80211_chandef_dfs_required(wdev->wiphy, + ¶ms.chandef, + wdev->iftype); + if (err < 0) + return err; + + if (err > 0) { + radar_detect_width = BIT(params.chandef.width); + params.radar_required = true; } err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, -- cgit v1.2.3-71-gd317 From 73de86a38962b18edad3205c2358599dd9c83e9f Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Thu, 13 Feb 2014 11:31:59 +0200 Subject: cfg80211/mac80211: move interface counting for combination check to mac80211 Move the counting part of the interface combination check from cfg80211 to mac80211. This is needed to simplify locking when the driver has to perform a combination check by itself (eg. with channel-switch). Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 -- net/mac80211/cfg.c | 2 -- net/mac80211/chan.c | 17 +++++++++++ net/mac80211/ieee80211_i.h | 4 +++ net/mac80211/util.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++ net/wireless/core.h | 13 ++------- net/wireless/ibss.c | 4 +++ net/wireless/mesh.c | 28 ------------------ net/wireless/mlme.c | 14 +-------- net/wireless/nl80211.c | 29 +++---------------- net/wireless/util.c | 5 ++++ 11 files changed, 110 insertions(+), 80 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 92a65c331cf4..fb8afcee62b4 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -662,7 +662,6 @@ struct cfg80211_acl_data { * @p2p_opp_ps: P2P opportunistic PS * @acl: ACL configuration used by the drivers which has support for * MAC address based access control - * @radar_required: set if radar detection is required */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -680,7 +679,6 @@ struct cfg80211_ap_settings { u8 p2p_ctwindow; bool p2p_opp_ps; const struct cfg80211_acl_data *acl; - bool radar_required; }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 906bc3b05aae..8bf94eaa0456 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -972,7 +972,6 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->needed_rx_chains = sdata->local->rx_chains; mutex_lock(&local->mtx); - sdata->radar_required = params->radar_required; err = ieee80211_vif_use_channel(sdata, ¶ms->chandef, IEEE80211_CHANCTX_SHARED); mutex_unlock(&local->mtx); @@ -2930,7 +2929,6 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy, /* whatever, but channel contexts should not complain about that one */ sdata->smps_mode = IEEE80211_SMPS_OFF; sdata->needed_rx_chains = local->rx_chains; - sdata->radar_required = true; err = ieee80211_vif_use_channel(sdata, chandef, IEEE80211_CHANCTX_SHARED); diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index b297bd3043a8..623b336f3efd 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -519,6 +519,7 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx *ctx; + u8 radar_detect_width = 0; int ret; lockdep_assert_held(&local->mtx); @@ -526,6 +527,22 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, WARN_ON(sdata->dev && netif_carrier_ok(sdata->dev)); mutex_lock(&local->chanctx_mtx); + + ret = cfg80211_chandef_dfs_required(local->hw.wiphy, + chandef, + sdata->wdev.iftype); + if (ret < 0) + goto out; + if (ret > 0) + radar_detect_width = BIT(chandef->width); + + sdata->radar_required = ret; + + ret = ieee80211_check_combinations(sdata, chandef, mode, + radar_detect_width); + if (ret < 0) + goto out; + __ieee80211_vif_release_channel(sdata); ctx = ieee80211_find_chanctx(local, chandef, mode); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 222c28b75315..09213fd87f8d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1805,6 +1805,10 @@ int ieee80211_cs_headroom(struct ieee80211_local *local, enum nl80211_iftype iftype); void ieee80211_recalc_dtim(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); +int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, + const struct cfg80211_chan_def *chandef, + enum ieee80211_chanctx_mode chanmode, + u8 radar_detect); #ifdef CONFIG_MAC80211_NOINLINE #define debug_noinline noinline diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 73af7398850b..436f98870066 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2797,3 +2797,75 @@ void ieee80211_recalc_dtim(struct ieee80211_local *local, ps->dtim_count = dtim_count; } + +int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, + const struct cfg80211_chan_def *chandef, + enum ieee80211_chanctx_mode chanmode, + u8 radar_detect) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_sub_if_data *sdata_iter; + enum nl80211_iftype iftype = sdata->wdev.iftype; + int num[NUM_NL80211_IFTYPES]; + struct ieee80211_chanctx *ctx; + int num_different_channels = 1; + int total = 1; + + lockdep_assert_held(&local->chanctx_mtx); + + if (WARN_ON(hweight32(radar_detect) > 1)) + return -EINVAL; + + if (WARN_ON(chanmode == IEEE80211_CHANCTX_SHARED && !chandef->chan)) + return -EINVAL; + + if (WARN_ON(iftype >= NUM_NL80211_IFTYPES)) + return -EINVAL; + + /* Always allow software iftypes */ + if (local->hw.wiphy->software_iftypes & BIT(iftype)) { + if (radar_detect) + return -EINVAL; + return 0; + } + + memset(num, 0, sizeof(num)); + + if (iftype != NL80211_IFTYPE_UNSPECIFIED) + num[iftype] = 1; + + list_for_each_entry(ctx, &local->chanctx_list, list) { + if (ctx->conf.radar_enabled) + radar_detect |= BIT(ctx->conf.def.width); + if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) { + num_different_channels++; + continue; + } + if ((chanmode == IEEE80211_CHANCTX_SHARED) && + cfg80211_chandef_compatible(chandef, + &ctx->conf.def)) + continue; + num_different_channels++; + } + + list_for_each_entry_rcu(sdata_iter, &local->interfaces, list) { + struct wireless_dev *wdev_iter; + + wdev_iter = &sdata_iter->wdev; + + if (sdata_iter == sdata || + rcu_access_pointer(sdata_iter->vif.chanctx_conf) == NULL || + local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype)) + continue; + + num[wdev_iter->iftype]++; + total++; + } + + if (total == 1 && !radar_detect) + return 0; + + return cfg80211_check_combinations(local->hw.wiphy, + num_different_channels, + radar_detect, num); +} diff --git a/net/wireless/core.h b/net/wireless/core.h index 6f6f75609852..6684c5d965d8 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -417,6 +417,9 @@ cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, enum nl80211_iftype iftype) { + /* TODO: For this function, we'll probably need to keep some + * kind of interface combination check in cfg80211... + */ return cfg80211_can_use_iftype_chan(rdev, wdev, iftype, NULL, CHAN_MODE_UNDEFINED, 0); } @@ -431,16 +434,6 @@ cfg80211_can_add_interface(struct cfg80211_registered_device *rdev, return cfg80211_can_change_interface(rdev, NULL, iftype); } -static inline int -cfg80211_can_use_chan(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, - struct ieee80211_channel *chan, - enum cfg80211_chan_mode chanmode) -{ - return cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - chan, chanmode, 0); -} - static inline unsigned int elapsed_jiffies_msecs(unsigned long start) { unsigned long end = jiffies; diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index a6b5bdad039c..faf4961d47d8 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -135,6 +135,10 @@ static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, radar_detect_width = BIT(params->chandef.width); } + /* TODO: We need to check the combinations at this point, we + * probably must move this call down to join_ibss() in + * mac80211. + */ err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, check_chan, (params->channel_fixed && diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 6ebe883653a4..3ddfb7cd335e 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -99,7 +99,6 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, const struct mesh_config *conf) { struct wireless_dev *wdev = dev->ieee80211_ptr; - u8 radar_detect_width = 0; int err; BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); @@ -179,22 +178,6 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, NL80211_IFTYPE_MESH_POINT)) return -EINVAL; - err = cfg80211_chandef_dfs_required(wdev->wiphy, - &setup->chandef, - NL80211_IFTYPE_MESH_POINT); - if (err < 0) - return err; - - if (err > 0) - radar_detect_width = BIT(setup->chandef.width); - - err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - setup->chandef.chan, - CHAN_MODE_SHARED, - radar_detect_width); - if (err) - return err; - err = rdev_join_mesh(rdev, dev, conf, setup); if (!err) { memcpy(wdev->ssid, setup->mesh_id, setup->mesh_id_len); @@ -240,17 +223,6 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, if (!netif_running(wdev->netdev)) return -ENETDOWN; - /* cfg80211_can_use_chan() calls - * cfg80211_can_use_iftype_chan() with no radar - * detection, so if we're trying to use a radar - * channel here, something is wrong. - */ - WARN_ON_ONCE(chandef->chan->flags & IEEE80211_CHAN_RADAR); - err = cfg80211_can_use_chan(rdev, wdev, chandef->chan, - CHAN_MODE_SHARED); - if (err) - return err; - err = rdev_libertas_set_mesh_channel(rdev, wdev->netdev, chandef->chan); if (!err) diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index c52ff59a3e96..4b4ba707fab9 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -233,14 +233,8 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, if (!req.bss) return -ENOENT; - err = cfg80211_can_use_chan(rdev, wdev, req.bss->channel, - CHAN_MODE_SHARED); - if (err) - goto out; - err = rdev_auth(rdev, dev, &req); -out: cfg80211_put_bss(&rdev->wiphy, req.bss); return err; } @@ -306,16 +300,10 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, if (!req->bss) return -ENOENT; - err = cfg80211_can_use_chan(rdev, wdev, chan, CHAN_MODE_SHARED); - if (err) - goto out; - err = rdev_assoc(rdev, dev, req); if (!err) cfg80211_hold_bss(bss_from_pub(req->bss)); - -out: - if (err) + else cfg80211_put_bss(&rdev->wiphy, req->bss); return err; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4f82b9b71db1..2b99aad33ae0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3155,7 +3155,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_ap_settings params; int err; - u8 radar_detect_width = 0; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) @@ -3275,24 +3274,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) wdev->iftype)) return -EINVAL; - err = cfg80211_chandef_dfs_required(wdev->wiphy, - ¶ms.chandef, - NL80211_IFTYPE_AP); - if (err < 0) - return err; - - if (err > 0) { - params.radar_required = true; - radar_detect_width = BIT(params.chandef.width); - } - - err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - params.chandef.chan, - CHAN_MODE_SHARED, - radar_detect_width); - if (err) - return err; - if (info->attrs[NL80211_ATTR_ACL_POLICY]) { params.acl = parse_acl_data(&rdev->wiphy, info); if (IS_ERR(params.acl)) @@ -5823,12 +5804,6 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, if (!rdev->ops->start_radar_detection) return -EOPNOTSUPP; - err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - chandef.chan, CHAN_MODE_SHARED, - BIT(chandef.width)); - if (err) - return err; - cac_time_ms = cfg80211_chandef_dfs_cac_time(&rdev->wiphy, &chandef); if (WARN_ON(!cac_time_ms)) cac_time_ms = IEEE80211_DFS_MIN_CAC_TIME_MS; @@ -5957,6 +5932,10 @@ skip_beacons: params.radar_required = true; } + /* TODO: I left this here for now. With channel switch, the + * verification is a bit more complicated, because we only do + * it later when the channel switch really happens. + */ err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, params.chandef.chan, CHAN_MODE_SHARED, diff --git a/net/wireless/util.c b/net/wireless/util.c index 46f404df35e3..9bfc4c621509 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1375,6 +1375,11 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, num[iftype] = 1; + /* TODO: We'll probably not need this anymore, since this + * should only be called with CHAN_MODE_UNDEFINED. There are + * still a couple of pending calls where other chanmodes are + * used, but we should get rid of them. + */ switch (chanmode) { case CHAN_MODE_UNDEFINED: break; -- cgit v1.2.3-71-gd317 From 5d52ee81101943c507f45c76368026935f6bb75a Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Thu, 27 Feb 2014 14:33:47 +0200 Subject: mac80211: allow reservation of a running chanctx With single-channel drivers, we need to be able to change a running chanctx if we want to use chanctx reservation. Not all drivers may be able to do this, so add a flag that indicates support for it. Changing a running chanctx can also be used as an optimization in multi-channel drivers when the context needs to be reserved for future usage. Introduce IEEE80211_CHANCTX_RESERVED chanctx mode to mark a channel as reserved so nobody else can use it (since we know it's going to change). In the future, we may allow several vifs to use the same reservation as long as they plan to use the chanctx on the same future channel. Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- include/net/mac80211.h | 7 +++++ net/mac80211/chan.c | 79 ++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 68 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index faa7b9cf9cc7..03ab3c08fb70 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1555,6 +1555,12 @@ struct ieee80211_tx_control { * for a single active channel while using channel contexts. When support * is not enabled the default action is to disconnect when getting the * CSA frame. + * + * @IEEE80211_HW_CHANGE_RUNNING_CHANCTX: The hardware can change a + * channel context on-the-fly. This is needed for channel switch + * on single-channel hardware. It can also be used as an + * optimization in certain channel switch cases with + * multi-channel. */ enum ieee80211_hw_flags { IEEE80211_HW_HAS_RATE_CONTROL = 1<<0, @@ -1586,6 +1592,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_TIMING_BEACON_ONLY = 1<<26, IEEE80211_HW_SUPPORTS_HT_CCK_RATES = 1<<27, IEEE80211_HW_CHANCTX_STA_CSA = 1<<28, + IEEE80211_HW_CHANGE_RUNNING_CHANCTX = 1<<29, }; /** diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index baad75610a6a..57b8ab1bc5c1 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -168,6 +168,27 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local, } } +static bool ieee80211_chanctx_is_reserved(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) +{ + struct ieee80211_sub_if_data *sdata; + bool ret = false; + + lockdep_assert_held(&local->chanctx_mtx); + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (!ieee80211_sdata_running(sdata)) + continue; + if (sdata->reserved_chanctx == ctx) { + ret = true; + break; + } + } + + rcu_read_unlock(); + return ret; +} + static struct ieee80211_chanctx * ieee80211_find_chanctx(struct ieee80211_local *local, const struct cfg80211_chan_def *chandef, @@ -183,7 +204,12 @@ ieee80211_find_chanctx(struct ieee80211_local *local, list_for_each_entry(ctx, &local->chanctx_list, list) { const struct cfg80211_chan_def *compat; - if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) + /* We don't support chanctx reservation for multiple + * vifs yet, so don't allow reserved chanctxs to be + * reused. + */ + if ((ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) || + ieee80211_chanctx_is_reserved(local, ctx)) continue; compat = cfg80211_chandef_compatible(&ctx->conf.def, chandef); @@ -718,11 +744,20 @@ int ieee80211_vif_reserve_chanctx(struct ieee80211_sub_if_data *sdata, /* try to find another context with the chandef we want */ new_ctx = ieee80211_find_chanctx(local, chandef, mode); if (!new_ctx) { - /* create a new context */ - new_ctx = ieee80211_new_chanctx(local, chandef, mode); - if (IS_ERR(new_ctx)) { - ret = PTR_ERR(new_ctx); - goto out; + if (curr_ctx->refcount == 1 && + (local->hw.flags & IEEE80211_HW_CHANGE_RUNNING_CHANCTX)) { + /* if we're the only users of the chanctx and + * the driver supports changing a running + * context, reserve our current context + */ + new_ctx = curr_ctx; + } else { + /* create a new context and reserve it */ + new_ctx = ieee80211_new_chanctx(local, chandef, mode); + if (IS_ERR(new_ctx)) { + ret = PTR_ERR(new_ctx); + goto out; + } } } @@ -770,22 +805,30 @@ int ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.chandef = sdata->reserved_chandef; - /* unref our reservation before assigning */ + /* unref our reservation */ ctx->refcount--; sdata->reserved_chanctx = NULL; - ret = ieee80211_assign_vif_chanctx(sdata, ctx); - if (old_ctx->refcount == 0) - ieee80211_free_chanctx(local, old_ctx); - if (ret) { - /* if assign fails refcount stays the same */ - if (ctx->refcount == 0) - ieee80211_free_chanctx(local, ctx); - goto out; - } + if (old_ctx == ctx) { + /* This is our own context, just change it */ + ret = __ieee80211_vif_change_channel(sdata, old_ctx, + &tmp_changed); + if (ret) + goto out; + } else { + ret = ieee80211_assign_vif_chanctx(sdata, ctx); + if (old_ctx->refcount == 0) + ieee80211_free_chanctx(local, old_ctx); + if (ret) { + /* if assign fails refcount stays the same */ + if (ctx->refcount == 0) + ieee80211_free_chanctx(local, ctx); + goto out; + } - if (sdata->vif.type == NL80211_IFTYPE_AP) - __ieee80211_vif_copy_chanctx_to_vlans(sdata, false); + if (sdata->vif.type == NL80211_IFTYPE_AP) + __ieee80211_vif_copy_chanctx_to_vlans(sdata, false); + } *changed = tmp_changed; -- cgit v1.2.3-71-gd317 From ce26151bc35d9d893ec1b441a261ea145511c89f Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Thu, 3 Apr 2014 10:03:45 +0300 Subject: cfg80211: update comment about WIPHY_FLAG_CUSTOM_REGULATORY Commit a2f73b6c5db3c ("cfg80211: move regulatory flags to their own variable") renamed WIPHY_FLAG_CUSTOM_REGULATORY to REGULATORY_CUSTOM_REG, but missed to update one comment. Signed-off-by: Kalle Valo Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index fb8afcee62b4..9496fe5ea6b4 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3609,7 +3609,7 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2); * default channel settings will be disregarded. If no rule is found for a * channel on the regulatory domain the channel will be disabled. * Drivers using this for a wiphy should also set the wiphy flag - * WIPHY_FLAG_CUSTOM_REGULATORY or cfg80211 will set it for the wiphy + * REGULATORY_CUSTOM_REG or cfg80211 will set it for the wiphy * that called this helper. */ void wiphy_apply_custom_regulatory(struct wiphy *wiphy, -- cgit v1.2.3-71-gd317 From 041f607de17666bed0407370e3f4a56e697354a8 Mon Sep 17 00:00:00 2001 From: Rostislav Lisovy Date: Wed, 2 Apr 2014 15:31:55 +0200 Subject: mac80211: Update conf_is_ht() to work properly with 5/10MHz channels The channels with 5/10MHz bandwidth are not HT. We have to reflect this in conf_is_ht() function which returns whether the particular channel is HT or not. Signed-off-by: Rostislav Lisovy Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 03ab3c08fb70..a3044e124229 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4585,7 +4585,9 @@ conf_is_ht40(struct ieee80211_conf *conf) static inline bool conf_is_ht(struct ieee80211_conf *conf) { - return conf->chandef.width != NL80211_CHAN_WIDTH_20_NOHT; + return (conf->chandef.width != NL80211_CHAN_WIDTH_5) && + (conf->chandef.width != NL80211_CHAN_WIDTH_10) && + (conf->chandef.width != NL80211_CHAN_WIDTH_20_NOHT); } static inline enum nl80211_iftype -- cgit v1.2.3-71-gd317 From 848ef58695d8c013f24633352586279cfb40e9d9 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 1 Apr 2014 17:02:53 +0300 Subject: net: rfkill: gpio: remove unused and obsolete platform parameters After upgrading to descriptor based gpios, the gpio numbers are not used anymore. The power_clk_name and the platform specific setup and close hooks are not used by anybody, and we should not encourage use of such things, so removing them. Acked-by: Alexandre Courbot Reviewed-by: Linus Walleij Signed-off-by: Heikki Krogerus Signed-off-by: Johannes Berg --- include/linux/rfkill-gpio.h | 10 ---------- net/rfkill/rfkill-gpio.c | 15 +-------------- 2 files changed, 1 insertion(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/rfkill-gpio.h b/include/linux/rfkill-gpio.h index 4d09f6eab359..20bcb55498cd 100644 --- a/include/linux/rfkill-gpio.h +++ b/include/linux/rfkill-gpio.h @@ -27,21 +27,11 @@ * struct rfkill_gpio_platform_data - platform data for rfkill gpio device. * for unused gpio's, the expected value is -1. * @name: name for the gpio rf kill instance - * @reset_gpio: GPIO which is used for reseting rfkill switch - * @shutdown_gpio: GPIO which is used for shutdown of rfkill switch - * @power_clk_name: [optional] name of clk to turn off while blocked - * @gpio_runtime_close: clean up platform specific gpio configuration - * @gpio_runtime_setup: set up platform specific gpio configuration */ struct rfkill_gpio_platform_data { char *name; - int reset_gpio; - int shutdown_gpio; - const char *power_clk_name; enum rfkill_type type; - void (*gpio_runtime_close)(struct platform_device *); - int (*gpio_runtime_setup)(struct platform_device *); }; #endif /* __RFKILL_GPIO_H */ diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index bd2a5b90400c..0adda445dfe7 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -87,7 +87,6 @@ static int rfkill_gpio_probe(struct platform_device *pdev) { struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data; struct rfkill_gpio_data *rfkill; - const char *clk_name = NULL; struct gpio_desc *gpio; int ret; int len; @@ -101,7 +100,6 @@ static int rfkill_gpio_probe(struct platform_device *pdev) if (ret) return ret; } else if (pdata) { - clk_name = pdata->power_clk_name; rfkill->name = pdata->name; rfkill->type = pdata->type; } else { @@ -120,7 +118,7 @@ static int rfkill_gpio_probe(struct platform_device *pdev) snprintf(rfkill->reset_name, len + 6 , "%s_reset", rfkill->name); snprintf(rfkill->shutdown_name, len + 9, "%s_shutdown", rfkill->name); - rfkill->clk = devm_clk_get(&pdev->dev, clk_name); + rfkill->clk = devm_clk_get(&pdev->dev, NULL); gpio = devm_gpiod_get_index(&pdev->dev, rfkill->reset_name, 0); if (!IS_ERR(gpio)) { @@ -146,14 +144,6 @@ static int rfkill_gpio_probe(struct platform_device *pdev) return -EINVAL; } - if (pdata && pdata->gpio_runtime_setup) { - ret = pdata->gpio_runtime_setup(pdev); - if (ret) { - dev_err(&pdev->dev, "can't set up gpio\n"); - return ret; - } - } - rfkill->rfkill_dev = rfkill_alloc(rfkill->name, &pdev->dev, rfkill->type, &rfkill_gpio_ops, rfkill); @@ -174,10 +164,7 @@ static int rfkill_gpio_probe(struct platform_device *pdev) static int rfkill_gpio_remove(struct platform_device *pdev) { struct rfkill_gpio_data *rfkill = platform_get_drvdata(pdev); - struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data; - if (pdata && pdata->gpio_runtime_close) - pdata->gpio_runtime_close(pdev); rfkill_unregister(rfkill->rfkill_dev); rfkill_destroy(rfkill->rfkill_dev); -- cgit v1.2.3-71-gd317 From f39d2fa0122e6abd8505a3598f3aa535d0d5aade Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Mon, 24 Feb 2014 18:37:35 +0800 Subject: mtd: spi-nor: copy the SPI NOR commands to a new header file This patch adds a new header :spi-nor.h, and copies all the SPI NOR commands and relative macros into this new header. This hearder can be used by the m25p80.c and other spi-nor controller, such as Freescale's Quadspi. Signed-off-by: Huang Shijie Acked-by: Marek Vasut Signed-off-by: Brian Norris --- include/linux/mtd/spi-nor.h | 55 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 include/linux/mtd/spi-nor.h (limited to 'include') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h new file mode 100644 index 000000000000..483fc2a086c4 --- /dev/null +++ b/include/linux/mtd/spi-nor.h @@ -0,0 +1,55 @@ +#ifndef __LINUX_MTD_SPI_NOR_H +#define __LINUX_MTD_SPI_NOR_H + +/* Flash opcodes. */ +#define OPCODE_WREN 0x06 /* Write enable */ +#define OPCODE_RDSR 0x05 /* Read status register */ +#define OPCODE_WRSR 0x01 /* Write status register 1 byte */ +#define OPCODE_NORM_READ 0x03 /* Read data bytes (low frequency) */ +#define OPCODE_FAST_READ 0x0b /* Read data bytes (high frequency) */ +#define OPCODE_DUAL_READ 0x3b /* Read data bytes (Dual SPI) */ +#define OPCODE_QUAD_READ 0x6b /* Read data bytes (Quad SPI) */ +#define OPCODE_PP 0x02 /* Page program (up to 256 bytes) */ +#define OPCODE_BE_4K 0x20 /* Erase 4KiB block */ +#define OPCODE_BE_4K_PMC 0xd7 /* Erase 4KiB block on PMC chips */ +#define OPCODE_BE_32K 0x52 /* Erase 32KiB block */ +#define OPCODE_CHIP_ERASE 0xc7 /* Erase whole flash chip */ +#define OPCODE_SE 0xd8 /* Sector erase (usually 64KiB) */ +#define OPCODE_RDID 0x9f /* Read JEDEC ID */ +#define OPCODE_RDCR 0x35 /* Read configuration register */ + +/* 4-byte address opcodes - used on Spansion and some Macronix flashes. */ +#define OPCODE_NORM_READ_4B 0x13 /* Read data bytes (low frequency) */ +#define OPCODE_FAST_READ_4B 0x0c /* Read data bytes (high frequency) */ +#define OPCODE_DUAL_READ_4B 0x3c /* Read data bytes (Dual SPI) */ +#define OPCODE_QUAD_READ_4B 0x6c /* Read data bytes (Quad SPI) */ +#define OPCODE_PP_4B 0x12 /* Page program (up to 256 bytes) */ +#define OPCODE_SE_4B 0xdc /* Sector erase (usually 64KiB) */ + +/* Used for SST flashes only. */ +#define OPCODE_BP 0x02 /* Byte program */ +#define OPCODE_WRDI 0x04 /* Write disable */ +#define OPCODE_AAI_WP 0xad /* Auto address increment word program */ + +/* Used for Macronix and Winbond flashes. */ +#define OPCODE_EN4B 0xb7 /* Enter 4-byte mode */ +#define OPCODE_EX4B 0xe9 /* Exit 4-byte mode */ + +/* Used for Spansion flashes only. */ +#define OPCODE_BRWR 0x17 /* Bank register write */ + +/* Status Register bits. */ +#define SR_WIP 1 /* Write in progress */ +#define SR_WEL 2 /* Write enable latch */ +/* meaning of other SR_* bits may differ between vendors */ +#define SR_BP0 4 /* Block protect 0 */ +#define SR_BP1 8 /* Block protect 1 */ +#define SR_BP2 0x10 /* Block protect 2 */ +#define SR_SRWD 0x80 /* SR write protect */ + +#define SR_QUAD_EN_MX 0x40 /* Macronix Quad I/O */ + +/* Configuration Register bits. */ +#define CR_QUAD_EN_SPAN 0x2 /* Spansion Quad I/O */ + +#endif -- cgit v1.2.3-71-gd317 From 6e602ef73334550bbbb8be1041a3ce6eecbd42f1 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Mon, 24 Feb 2014 18:37:36 +0800 Subject: mtd: spi-nor: add the basic data structures The spi_nor{} is cloned from the m25p{}. The spi_nor{} can be used by both the m25p80 and spi-nor controller. We also add the spi_nor_xfer_cfg{} which can be used by the two fundamental primitives: read_xfer/write_xfer. 1) the hooks for spi_nor{}: @prepare/unpreare: used to do some work before or after the read/write/erase/lock/unlock. @read_xfer/write_xfer: We can use these two hooks to code all the following hooks if the driver tries to implement them by itself. @read_reg: used to read the registers, such as read status register, read configure register. @write_reg: used to write the registers, such as write enable, erase sector. @read_id: read out the ID info. @wait_till_ready: wait till the NOR becomes ready. @read: read out the data from the NOR. @write: write data to the NOR. @erase: erase a sector of the NOR. 2) Add a new field sst_write_second for the SST NOR write. Signed-off-by: Huang Shijie Acked-by: Marek Vasut Signed-off-by: Brian Norris --- include/linux/mtd/spi-nor.h | 110 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 483fc2a086c4..3a3c3872c8cd 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -52,4 +52,114 @@ /* Configuration Register bits. */ #define CR_QUAD_EN_SPAN 0x2 /* Spansion Quad I/O */ +enum read_mode { + SPI_NOR_NORMAL = 0, + SPI_NOR_FAST, + SPI_NOR_DUAL, + SPI_NOR_QUAD, +}; + +/** + * struct spi_nor_xfer_cfg - Structure for defining a Serial Flash transfer + * @wren: command for "Write Enable", or 0x00 for not required + * @cmd: command for operation + * @cmd_pins: number of pins to send @cmd (1, 2, 4) + * @addr: address for operation + * @addr_pins: number of pins to send @addr (1, 2, 4) + * @addr_width: number of address bytes + * (3,4, or 0 for address not required) + * @mode: mode data + * @mode_pins: number of pins to send @mode (1, 2, 4) + * @mode_cycles: number of mode cycles (0 for mode not required) + * @dummy_cycles: number of dummy cycles (0 for dummy not required) + */ +struct spi_nor_xfer_cfg { + u8 wren; + u8 cmd; + u8 cmd_pins; + u32 addr; + u8 addr_pins; + u8 addr_width; + u8 mode; + u8 mode_pins; + u8 mode_cycles; + u8 dummy_cycles; +}; + +#define SPI_NOR_MAX_CMD_SIZE 8 +enum spi_nor_ops { + SPI_NOR_OPS_READ = 0, + SPI_NOR_OPS_WRITE, + SPI_NOR_OPS_ERASE, + SPI_NOR_OPS_LOCK, + SPI_NOR_OPS_UNLOCK, +}; + +/** + * struct spi_nor - Structure for defining a the SPI NOR layer + * @mtd: point to a mtd_info structure + * @lock: the lock for the read/write/erase/lock/unlock operations + * @dev: point to a spi device, or a spi nor controller device. + * @page_size: the page size of the SPI NOR + * @addr_width: number of address bytes + * @erase_opcode: the opcode for erasing a sector + * @read_opcode: the read opcode + * @read_dummy: the dummy needed by the read operation + * @program_opcode: the program opcode + * @flash_read: the mode of the read + * @sst_write_second: used by the SST write operation + * @cfg: used by the read_xfer/write_xfer + * @cmd_buf: used by the write_reg + * @prepare: [OPTIONAL] do some preparations for the + * read/write/erase/lock/unlock operations + * @unprepare: [OPTIONAL] do some post work after the + * read/write/erase/lock/unlock operations + * @read_xfer: [OPTIONAL] the read fundamental primitive + * @write_xfer: [OPTIONAL] the writefundamental primitive + * @read_reg: [DRIVER-SPECIFIC] read out the register + * @write_reg: [DRIVER-SPECIFIC] write data to the register + * @read_id: [REPLACEABLE] read out the ID data, and find + * the proper spi_device_id + * @wait_till_ready: [REPLACEABLE] wait till the NOR becomes ready + * @read: [DRIVER-SPECIFIC] read data from the SPI NOR + * @write: [DRIVER-SPECIFIC] write data to the SPI NOR + * @erase: [DRIVER-SPECIFIC] erase a sector of the SPI NOR + * at the offset @offs + * @priv: the private data + */ +struct spi_nor { + struct mtd_info *mtd; + struct mutex lock; + struct device *dev; + u32 page_size; + u8 addr_width; + u8 erase_opcode; + u8 read_opcode; + u8 read_dummy; + u8 program_opcode; + enum read_mode flash_read; + bool sst_write_second; + struct spi_nor_xfer_cfg cfg; + u8 cmd_buf[SPI_NOR_MAX_CMD_SIZE]; + + int (*prepare)(struct spi_nor *nor, enum spi_nor_ops ops); + void (*unprepare)(struct spi_nor *nor, enum spi_nor_ops ops); + int (*read_xfer)(struct spi_nor *nor, struct spi_nor_xfer_cfg *cfg, + u8 *buf, size_t len); + int (*write_xfer)(struct spi_nor *nor, struct spi_nor_xfer_cfg *cfg, + u8 *buf, size_t len); + int (*read_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len); + int (*write_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len, + int write_enable); + const struct spi_device_id *(*read_id)(struct spi_nor *nor); + int (*wait_till_ready)(struct spi_nor *nor); + + int (*read)(struct spi_nor *nor, loff_t from, + size_t len, size_t *retlen, u_char *read_buf); + void (*write)(struct spi_nor *nor, loff_t to, + size_t len, size_t *retlen, const u_char *write_buf); + int (*erase)(struct spi_nor *nor, loff_t offs); + + void *priv; +}; #endif -- cgit v1.2.3-71-gd317 From b199489d37b21c5e294f95bf265acc5dde3fc3a2 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Mon, 24 Feb 2014 18:37:37 +0800 Subject: mtd: spi-nor: add the framework for SPI NOR This patch cloned most of the m25p80.c. In theory, it adds a new spi-nor layer. Before this patch, the layer is like: MTD ------------------------ m25p80 ------------------------ spi bus driver ------------------------ SPI NOR chip After this patch, the layer is like: MTD ------------------------ spi-nor ------------------------ m25p80 ------------------------ spi bus driver ------------------------ SPI NOR chip With the spi-nor controller driver(Freescale Quadspi), it looks like: MTD ------------------------ spi-nor ------------------------ fsl-quadspi ------------------------ SPI NOR chip New APIs: spi_nor_scan: used to scan a spi-nor flash. Signed-off-by: Huang Shijie Acked-by: Marek Vasut [Brian: rebased to include additional m25p_ids[] entry] Signed-off-by: Brian Norris --- drivers/mtd/Kconfig | 2 + drivers/mtd/Makefile | 1 + drivers/mtd/spi-nor/Kconfig | 6 + drivers/mtd/spi-nor/Makefile | 1 + drivers/mtd/spi-nor/spi-nor.c | 1088 +++++++++++++++++++++++++++++++++++++++++ include/linux/mtd/spi-nor.h | 20 + 6 files changed, 1118 insertions(+) create mode 100644 drivers/mtd/spi-nor/Kconfig create mode 100644 drivers/mtd/spi-nor/Makefile create mode 100644 drivers/mtd/spi-nor/spi-nor.c (limited to 'include') diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig index 5d49a2129618..94b821042d9d 100644 --- a/drivers/mtd/Kconfig +++ b/drivers/mtd/Kconfig @@ -321,6 +321,8 @@ source "drivers/mtd/onenand/Kconfig" source "drivers/mtd/lpddr/Kconfig" +source "drivers/mtd/spi-nor/Kconfig" + source "drivers/mtd/ubi/Kconfig" endif # MTD diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile index 4cfb31e6c966..40fd15344387 100644 --- a/drivers/mtd/Makefile +++ b/drivers/mtd/Makefile @@ -32,4 +32,5 @@ inftl-objs := inftlcore.o inftlmount.o obj-y += chips/ lpddr/ maps/ devices/ nand/ onenand/ tests/ +obj-$(CONFIG_MTD_SPI_NOR_BASE) += spi-nor/ obj-$(CONFIG_MTD_UBI) += ubi/ diff --git a/drivers/mtd/spi-nor/Kconfig b/drivers/mtd/spi-nor/Kconfig new file mode 100644 index 000000000000..41591af67aca --- /dev/null +++ b/drivers/mtd/spi-nor/Kconfig @@ -0,0 +1,6 @@ +config MTD_SPI_NOR_BASE + bool "the framework for SPI-NOR support" + depends on MTD + help + This is the framework for the SPI NOR which can be used by the SPI + device drivers and the SPI-NOR device driver. diff --git a/drivers/mtd/spi-nor/Makefile b/drivers/mtd/spi-nor/Makefile new file mode 100644 index 000000000000..7dfe1f9b6940 --- /dev/null +++ b/drivers/mtd/spi-nor/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_MTD_SPI_NOR_BASE) += spi-nor.o diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c new file mode 100644 index 000000000000..50b929095bdb --- /dev/null +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -0,0 +1,1088 @@ +/* + * Cloned most of the code from the m25p80.c + * + * This code is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/* Define max times to check status register before we give up. */ +#define MAX_READY_WAIT_JIFFIES (40 * HZ) /* M25P16 specs 40s max chip erase */ + +#define JEDEC_MFR(_jedec_id) ((_jedec_id) >> 16) + +/* + * Read the status register, returning its value in the location + * Return the status register value. + * Returns negative if error occurred. + */ +static int read_sr(struct spi_nor *nor) +{ + int ret; + u8 val; + + ret = nor->read_reg(nor, OPCODE_RDSR, &val, 1); + if (ret < 0) { + pr_err("error %d reading SR\n", (int) ret); + return ret; + } + + return val; +} + +/* + * Read configuration register, returning its value in the + * location. Return the configuration register value. + * Returns negative if error occured. + */ +static int read_cr(struct spi_nor *nor) +{ + int ret; + u8 val; + + ret = nor->read_reg(nor, OPCODE_RDCR, &val, 1); + if (ret < 0) { + dev_err(nor->dev, "error %d reading CR\n", ret); + return ret; + } + + return val; +} + +/* + * Dummy Cycle calculation for different type of read. + * It can be used to support more commands with + * different dummy cycle requirements. + */ +static inline int spi_nor_read_dummy_cycles(struct spi_nor *nor) +{ + switch (nor->flash_read) { + case SPI_NOR_FAST: + case SPI_NOR_DUAL: + case SPI_NOR_QUAD: + return 1; + case SPI_NOR_NORMAL: + return 0; + } + return 0; +} + +/* + * Write status register 1 byte + * Returns negative if error occurred. + */ +static inline int write_sr(struct spi_nor *nor, u8 val) +{ + nor->cmd_buf[0] = val; + return nor->write_reg(nor, OPCODE_WRSR, nor->cmd_buf, 1, 0); +} + +/* + * Set write enable latch with Write Enable command. + * Returns negative if error occurred. + */ +static inline int write_enable(struct spi_nor *nor) +{ + return nor->write_reg(nor, OPCODE_WREN, NULL, 0, 0); +} + +/* + * Send write disble instruction to the chip. + */ +static inline int write_disable(struct spi_nor *nor) +{ + return nor->write_reg(nor, OPCODE_WRDI, NULL, 0, 0); +} + +static inline struct spi_nor *mtd_to_spi_nor(struct mtd_info *mtd) +{ + return mtd->priv; +} + +/* Enable/disable 4-byte addressing mode. */ +static inline int set_4byte(struct spi_nor *nor, u32 jedec_id, int enable) +{ + int status; + bool need_wren = false; + u8 cmd; + + switch (JEDEC_MFR(jedec_id)) { + case CFI_MFR_ST: /* Micron, actually */ + /* Some Micron need WREN command; all will accept it */ + need_wren = true; + case CFI_MFR_MACRONIX: + case 0xEF /* winbond */: + if (need_wren) + write_enable(nor); + + cmd = enable ? OPCODE_EN4B : OPCODE_EX4B; + status = nor->write_reg(nor, cmd, NULL, 0, 0); + if (need_wren) + write_disable(nor); + + return status; + default: + /* Spansion style */ + nor->cmd_buf[0] = enable << 7; + return nor->write_reg(nor, OPCODE_BRWR, nor->cmd_buf, 1, 0); + } +} + +static int spi_nor_wait_till_ready(struct spi_nor *nor) +{ + unsigned long deadline; + int sr; + + deadline = jiffies + MAX_READY_WAIT_JIFFIES; + + do { + cond_resched(); + + sr = read_sr(nor); + if (sr < 0) + break; + else if (!(sr & SR_WIP)) + return 0; + } while (!time_after_eq(jiffies, deadline)); + + return -ETIMEDOUT; +} + +/* + * Service routine to read status register until ready, or timeout occurs. + * Returns non-zero if error. + */ +static int wait_till_ready(struct spi_nor *nor) +{ + return nor->wait_till_ready(nor); +} + +/* + * Erase the whole flash memory + * + * Returns 0 if successful, non-zero otherwise. + */ +static int erase_chip(struct spi_nor *nor) +{ + int ret; + + dev_dbg(nor->dev, " %lldKiB\n", (long long)(nor->mtd->size >> 10)); + + /* Wait until finished previous write command. */ + ret = wait_till_ready(nor); + if (ret) + return ret; + + /* Send write enable, then erase commands. */ + write_enable(nor); + + return nor->write_reg(nor, OPCODE_CHIP_ERASE, NULL, 0, 0); +} + +static int spi_nor_lock_and_prep(struct spi_nor *nor, enum spi_nor_ops ops) +{ + int ret = 0; + + mutex_lock(&nor->lock); + + if (nor->prepare) { + ret = nor->prepare(nor, ops); + if (ret) { + dev_err(nor->dev, "failed in the preparation.\n"); + mutex_unlock(&nor->lock); + return ret; + } + } + return ret; +} + +static void spi_nor_unlock_and_unprep(struct spi_nor *nor, enum spi_nor_ops ops) +{ + if (nor->unprepare) + nor->unprepare(nor, ops); + mutex_unlock(&nor->lock); +} + +/* + * Erase an address range on the nor chip. The address range may extend + * one or more erase sectors. Return an error is there is a problem erasing. + */ +static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr) +{ + struct spi_nor *nor = mtd_to_spi_nor(mtd); + u32 addr, len; + uint32_t rem; + int ret; + + dev_dbg(nor->dev, "at 0x%llx, len %lld\n", (long long)instr->addr, + (long long)instr->len); + + div_u64_rem(instr->len, mtd->erasesize, &rem); + if (rem) + return -EINVAL; + + addr = instr->addr; + len = instr->len; + + ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_ERASE); + if (ret) + return ret; + + /* whole-chip erase? */ + if (len == mtd->size) { + if (erase_chip(nor)) { + ret = -EIO; + goto erase_err; + } + + /* REVISIT in some cases we could speed up erasing large regions + * by using OPCODE_SE instead of OPCODE_BE_4K. We may have set up + * to use "small sector erase", but that's not always optimal. + */ + + /* "sector"-at-a-time erase */ + } else { + while (len) { + if (nor->erase(nor, addr)) { + ret = -EIO; + goto erase_err; + } + + addr += mtd->erasesize; + len -= mtd->erasesize; + } + } + + spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_ERASE); + + instr->state = MTD_ERASE_DONE; + mtd_erase_callback(instr); + + return ret; + +erase_err: + spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_ERASE); + instr->state = MTD_ERASE_FAILED; + return ret; +} + +static int spi_nor_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len) +{ + struct spi_nor *nor = mtd_to_spi_nor(mtd); + uint32_t offset = ofs; + uint8_t status_old, status_new; + int ret = 0; + + ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_LOCK); + if (ret) + return ret; + + /* Wait until finished previous command */ + ret = wait_till_ready(nor); + if (ret) + goto err; + + status_old = read_sr(nor); + + if (offset < mtd->size - (mtd->size / 2)) + status_new = status_old | SR_BP2 | SR_BP1 | SR_BP0; + else if (offset < mtd->size - (mtd->size / 4)) + status_new = (status_old & ~SR_BP0) | SR_BP2 | SR_BP1; + else if (offset < mtd->size - (mtd->size / 8)) + status_new = (status_old & ~SR_BP1) | SR_BP2 | SR_BP0; + else if (offset < mtd->size - (mtd->size / 16)) + status_new = (status_old & ~(SR_BP0 | SR_BP1)) | SR_BP2; + else if (offset < mtd->size - (mtd->size / 32)) + status_new = (status_old & ~SR_BP2) | SR_BP1 | SR_BP0; + else if (offset < mtd->size - (mtd->size / 64)) + status_new = (status_old & ~(SR_BP2 | SR_BP0)) | SR_BP1; + else + status_new = (status_old & ~(SR_BP2 | SR_BP1)) | SR_BP0; + + /* Only modify protection if it will not unlock other areas */ + if ((status_new & (SR_BP2 | SR_BP1 | SR_BP0)) > + (status_old & (SR_BP2 | SR_BP1 | SR_BP0))) { + write_enable(nor); + ret = write_sr(nor, status_new); + if (ret) + goto err; + } + +err: + spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_LOCK); + return ret; +} + +static int spi_nor_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) +{ + struct spi_nor *nor = mtd_to_spi_nor(mtd); + uint32_t offset = ofs; + uint8_t status_old, status_new; + int ret = 0; + + ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_UNLOCK); + if (ret) + return ret; + + /* Wait until finished previous command */ + ret = wait_till_ready(nor); + if (ret) + goto err; + + status_old = read_sr(nor); + + if (offset+len > mtd->size - (mtd->size / 64)) + status_new = status_old & ~(SR_BP2 | SR_BP1 | SR_BP0); + else if (offset+len > mtd->size - (mtd->size / 32)) + status_new = (status_old & ~(SR_BP2 | SR_BP1)) | SR_BP0; + else if (offset+len > mtd->size - (mtd->size / 16)) + status_new = (status_old & ~(SR_BP2 | SR_BP0)) | SR_BP1; + else if (offset+len > mtd->size - (mtd->size / 8)) + status_new = (status_old & ~SR_BP2) | SR_BP1 | SR_BP0; + else if (offset+len > mtd->size - (mtd->size / 4)) + status_new = (status_old & ~(SR_BP0 | SR_BP1)) | SR_BP2; + else if (offset+len > mtd->size - (mtd->size / 2)) + status_new = (status_old & ~SR_BP1) | SR_BP2 | SR_BP0; + else + status_new = (status_old & ~SR_BP0) | SR_BP2 | SR_BP1; + + /* Only modify protection if it will not lock other areas */ + if ((status_new & (SR_BP2 | SR_BP1 | SR_BP0)) < + (status_old & (SR_BP2 | SR_BP1 | SR_BP0))) { + write_enable(nor); + ret = write_sr(nor, status_new); + if (ret) + goto err; + } + +err: + spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_UNLOCK); + return ret; +} + +struct flash_info { + /* JEDEC id zero means "no ID" (most older chips); otherwise it has + * a high byte of zero plus three data bytes: the manufacturer id, + * then a two byte device id. + */ + u32 jedec_id; + u16 ext_id; + + /* The size listed here is what works with OPCODE_SE, which isn't + * necessarily called a "sector" by the vendor. + */ + unsigned sector_size; + u16 n_sectors; + + u16 page_size; + u16 addr_width; + + u16 flags; +#define SECT_4K 0x01 /* OPCODE_BE_4K works uniformly */ +#define SPI_NOR_NO_ERASE 0x02 /* No erase command needed */ +#define SST_WRITE 0x04 /* use SST byte programming */ +#define SPI_NOR_NO_FR 0x08 /* Can't do fastread */ +#define SECT_4K_PMC 0x10 /* OPCODE_BE_4K_PMC works uniformly */ +#define SPI_NOR_DUAL_READ 0x20 /* Flash supports Dual Read */ +#define SPI_NOR_QUAD_READ 0x40 /* Flash supports Quad Read */ +}; + +#define INFO(_jedec_id, _ext_id, _sector_size, _n_sectors, _flags) \ + ((kernel_ulong_t)&(struct flash_info) { \ + .jedec_id = (_jedec_id), \ + .ext_id = (_ext_id), \ + .sector_size = (_sector_size), \ + .n_sectors = (_n_sectors), \ + .page_size = 256, \ + .flags = (_flags), \ + }) + +#define CAT25_INFO(_sector_size, _n_sectors, _page_size, _addr_width, _flags) \ + ((kernel_ulong_t)&(struct flash_info) { \ + .sector_size = (_sector_size), \ + .n_sectors = (_n_sectors), \ + .page_size = (_page_size), \ + .addr_width = (_addr_width), \ + .flags = (_flags), \ + }) + +/* NOTE: double check command sets and memory organization when you add + * more nor chips. This current list focusses on newer chips, which + * have been converging on command sets which including JEDEC ID. + */ +const struct spi_device_id spi_nor_ids[] = { + /* Atmel -- some are (confusingly) marketed as "DataFlash" */ + { "at25fs010", INFO(0x1f6601, 0, 32 * 1024, 4, SECT_4K) }, + { "at25fs040", INFO(0x1f6604, 0, 64 * 1024, 8, SECT_4K) }, + + { "at25df041a", INFO(0x1f4401, 0, 64 * 1024, 8, SECT_4K) }, + { "at25df321a", INFO(0x1f4701, 0, 64 * 1024, 64, SECT_4K) }, + { "at25df641", INFO(0x1f4800, 0, 64 * 1024, 128, SECT_4K) }, + + { "at26f004", INFO(0x1f0400, 0, 64 * 1024, 8, SECT_4K) }, + { "at26df081a", INFO(0x1f4501, 0, 64 * 1024, 16, SECT_4K) }, + { "at26df161a", INFO(0x1f4601, 0, 64 * 1024, 32, SECT_4K) }, + { "at26df321", INFO(0x1f4700, 0, 64 * 1024, 64, SECT_4K) }, + + { "at45db081d", INFO(0x1f2500, 0, 64 * 1024, 16, SECT_4K) }, + + /* EON -- en25xxx */ + { "en25f32", INFO(0x1c3116, 0, 64 * 1024, 64, SECT_4K) }, + { "en25p32", INFO(0x1c2016, 0, 64 * 1024, 64, 0) }, + { "en25q32b", INFO(0x1c3016, 0, 64 * 1024, 64, 0) }, + { "en25p64", INFO(0x1c2017, 0, 64 * 1024, 128, 0) }, + { "en25q64", INFO(0x1c3017, 0, 64 * 1024, 128, SECT_4K) }, + { "en25qh256", INFO(0x1c7019, 0, 64 * 1024, 512, 0) }, + + /* ESMT */ + { "f25l32pa", INFO(0x8c2016, 0, 64 * 1024, 64, SECT_4K) }, + + /* Everspin */ + { "mr25h256", CAT25_INFO( 32 * 1024, 1, 256, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) }, + { "mr25h10", CAT25_INFO(128 * 1024, 1, 256, 3, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) }, + + /* GigaDevice */ + { "gd25q32", INFO(0xc84016, 0, 64 * 1024, 64, SECT_4K) }, + { "gd25q64", INFO(0xc84017, 0, 64 * 1024, 128, SECT_4K) }, + + /* Intel/Numonyx -- xxxs33b */ + { "160s33b", INFO(0x898911, 0, 64 * 1024, 32, 0) }, + { "320s33b", INFO(0x898912, 0, 64 * 1024, 64, 0) }, + { "640s33b", INFO(0x898913, 0, 64 * 1024, 128, 0) }, + + /* Macronix */ + { "mx25l2005a", INFO(0xc22012, 0, 64 * 1024, 4, SECT_4K) }, + { "mx25l4005a", INFO(0xc22013, 0, 64 * 1024, 8, SECT_4K) }, + { "mx25l8005", INFO(0xc22014, 0, 64 * 1024, 16, 0) }, + { "mx25l1606e", INFO(0xc22015, 0, 64 * 1024, 32, SECT_4K) }, + { "mx25l3205d", INFO(0xc22016, 0, 64 * 1024, 64, 0) }, + { "mx25l3255e", INFO(0xc29e16, 0, 64 * 1024, 64, SECT_4K) }, + { "mx25l6405d", INFO(0xc22017, 0, 64 * 1024, 128, 0) }, + { "mx25l12805d", INFO(0xc22018, 0, 64 * 1024, 256, 0) }, + { "mx25l12855e", INFO(0xc22618, 0, 64 * 1024, 256, 0) }, + { "mx25l25635e", INFO(0xc22019, 0, 64 * 1024, 512, 0) }, + { "mx25l25655e", INFO(0xc22619, 0, 64 * 1024, 512, 0) }, + { "mx66l51235l", INFO(0xc2201a, 0, 64 * 1024, 1024, SPI_NOR_QUAD_READ) }, + { "mx66l1g55g", INFO(0xc2261b, 0, 64 * 1024, 2048, SPI_NOR_QUAD_READ) }, + + /* Micron */ + { "n25q064", INFO(0x20ba17, 0, 64 * 1024, 128, 0) }, + { "n25q128a11", INFO(0x20bb18, 0, 64 * 1024, 256, 0) }, + { "n25q128a13", INFO(0x20ba18, 0, 64 * 1024, 256, 0) }, + { "n25q256a", INFO(0x20ba19, 0, 64 * 1024, 512, SECT_4K) }, + { "n25q512a", INFO(0x20bb20, 0, 64 * 1024, 1024, SECT_4K) }, + + /* PMC */ + { "pm25lv512", INFO(0, 0, 32 * 1024, 2, SECT_4K_PMC) }, + { "pm25lv010", INFO(0, 0, 32 * 1024, 4, SECT_4K_PMC) }, + { "pm25lq032", INFO(0x7f9d46, 0, 64 * 1024, 64, SECT_4K) }, + + /* Spansion -- single (large) sector size only, at least + * for the chips listed here (without boot sectors). + */ + { "s25sl032p", INFO(0x010215, 0x4d00, 64 * 1024, 64, 0) }, + { "s25sl064p", INFO(0x010216, 0x4d00, 64 * 1024, 128, 0) }, + { "s25fl256s0", INFO(0x010219, 0x4d00, 256 * 1024, 128, 0) }, + { "s25fl256s1", INFO(0x010219, 0x4d01, 64 * 1024, 512, SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, + { "s25fl512s", INFO(0x010220, 0x4d00, 256 * 1024, 256, SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, + { "s70fl01gs", INFO(0x010221, 0x4d00, 256 * 1024, 256, 0) }, + { "s25sl12800", INFO(0x012018, 0x0300, 256 * 1024, 64, 0) }, + { "s25sl12801", INFO(0x012018, 0x0301, 64 * 1024, 256, 0) }, + { "s25fl129p0", INFO(0x012018, 0x4d00, 256 * 1024, 64, 0) }, + { "s25fl129p1", INFO(0x012018, 0x4d01, 64 * 1024, 256, 0) }, + { "s25sl004a", INFO(0x010212, 0, 64 * 1024, 8, 0) }, + { "s25sl008a", INFO(0x010213, 0, 64 * 1024, 16, 0) }, + { "s25sl016a", INFO(0x010214, 0, 64 * 1024, 32, 0) }, + { "s25sl032a", INFO(0x010215, 0, 64 * 1024, 64, 0) }, + { "s25sl064a", INFO(0x010216, 0, 64 * 1024, 128, 0) }, + { "s25fl008k", INFO(0xef4014, 0, 64 * 1024, 16, SECT_4K) }, + { "s25fl016k", INFO(0xef4015, 0, 64 * 1024, 32, SECT_4K) }, + { "s25fl064k", INFO(0xef4017, 0, 64 * 1024, 128, SECT_4K) }, + + /* SST -- large erase sizes are "overlays", "sectors" are 4K */ + { "sst25vf040b", INFO(0xbf258d, 0, 64 * 1024, 8, SECT_4K | SST_WRITE) }, + { "sst25vf080b", INFO(0xbf258e, 0, 64 * 1024, 16, SECT_4K | SST_WRITE) }, + { "sst25vf016b", INFO(0xbf2541, 0, 64 * 1024, 32, SECT_4K | SST_WRITE) }, + { "sst25vf032b", INFO(0xbf254a, 0, 64 * 1024, 64, SECT_4K | SST_WRITE) }, + { "sst25vf064c", INFO(0xbf254b, 0, 64 * 1024, 128, SECT_4K) }, + { "sst25wf512", INFO(0xbf2501, 0, 64 * 1024, 1, SECT_4K | SST_WRITE) }, + { "sst25wf010", INFO(0xbf2502, 0, 64 * 1024, 2, SECT_4K | SST_WRITE) }, + { "sst25wf020", INFO(0xbf2503, 0, 64 * 1024, 4, SECT_4K | SST_WRITE) }, + { "sst25wf040", INFO(0xbf2504, 0, 64 * 1024, 8, SECT_4K | SST_WRITE) }, + + /* ST Microelectronics -- newer production may have feature updates */ + { "m25p05", INFO(0x202010, 0, 32 * 1024, 2, 0) }, + { "m25p10", INFO(0x202011, 0, 32 * 1024, 4, 0) }, + { "m25p20", INFO(0x202012, 0, 64 * 1024, 4, 0) }, + { "m25p40", INFO(0x202013, 0, 64 * 1024, 8, 0) }, + { "m25p80", INFO(0x202014, 0, 64 * 1024, 16, 0) }, + { "m25p16", INFO(0x202015, 0, 64 * 1024, 32, 0) }, + { "m25p32", INFO(0x202016, 0, 64 * 1024, 64, 0) }, + { "m25p64", INFO(0x202017, 0, 64 * 1024, 128, 0) }, + { "m25p128", INFO(0x202018, 0, 256 * 1024, 64, 0) }, + { "n25q032", INFO(0x20ba16, 0, 64 * 1024, 64, 0) }, + + { "m25p05-nonjedec", INFO(0, 0, 32 * 1024, 2, 0) }, + { "m25p10-nonjedec", INFO(0, 0, 32 * 1024, 4, 0) }, + { "m25p20-nonjedec", INFO(0, 0, 64 * 1024, 4, 0) }, + { "m25p40-nonjedec", INFO(0, 0, 64 * 1024, 8, 0) }, + { "m25p80-nonjedec", INFO(0, 0, 64 * 1024, 16, 0) }, + { "m25p16-nonjedec", INFO(0, 0, 64 * 1024, 32, 0) }, + { "m25p32-nonjedec", INFO(0, 0, 64 * 1024, 64, 0) }, + { "m25p64-nonjedec", INFO(0, 0, 64 * 1024, 128, 0) }, + { "m25p128-nonjedec", INFO(0, 0, 256 * 1024, 64, 0) }, + + { "m45pe10", INFO(0x204011, 0, 64 * 1024, 2, 0) }, + { "m45pe80", INFO(0x204014, 0, 64 * 1024, 16, 0) }, + { "m45pe16", INFO(0x204015, 0, 64 * 1024, 32, 0) }, + + { "m25pe20", INFO(0x208012, 0, 64 * 1024, 4, 0) }, + { "m25pe80", INFO(0x208014, 0, 64 * 1024, 16, 0) }, + { "m25pe16", INFO(0x208015, 0, 64 * 1024, 32, SECT_4K) }, + + { "m25px16", INFO(0x207115, 0, 64 * 1024, 32, SECT_4K) }, + { "m25px32", INFO(0x207116, 0, 64 * 1024, 64, SECT_4K) }, + { "m25px32-s0", INFO(0x207316, 0, 64 * 1024, 64, SECT_4K) }, + { "m25px32-s1", INFO(0x206316, 0, 64 * 1024, 64, SECT_4K) }, + { "m25px64", INFO(0x207117, 0, 64 * 1024, 128, 0) }, + + /* Winbond -- w25x "blocks" are 64K, "sectors" are 4KiB */ + { "w25x10", INFO(0xef3011, 0, 64 * 1024, 2, SECT_4K) }, + { "w25x20", INFO(0xef3012, 0, 64 * 1024, 4, SECT_4K) }, + { "w25x40", INFO(0xef3013, 0, 64 * 1024, 8, SECT_4K) }, + { "w25x80", INFO(0xef3014, 0, 64 * 1024, 16, SECT_4K) }, + { "w25x16", INFO(0xef3015, 0, 64 * 1024, 32, SECT_4K) }, + { "w25x32", INFO(0xef3016, 0, 64 * 1024, 64, SECT_4K) }, + { "w25q32", INFO(0xef4016, 0, 64 * 1024, 64, SECT_4K) }, + { "w25q32dw", INFO(0xef6016, 0, 64 * 1024, 64, SECT_4K) }, + { "w25x64", INFO(0xef3017, 0, 64 * 1024, 128, SECT_4K) }, + { "w25q64", INFO(0xef4017, 0, 64 * 1024, 128, SECT_4K) }, + { "w25q128", INFO(0xef4018, 0, 64 * 1024, 256, SECT_4K) }, + { "w25q80", INFO(0xef5014, 0, 64 * 1024, 16, SECT_4K) }, + { "w25q80bl", INFO(0xef4014, 0, 64 * 1024, 16, SECT_4K) }, + { "w25q128", INFO(0xef4018, 0, 64 * 1024, 256, SECT_4K) }, + { "w25q256", INFO(0xef4019, 0, 64 * 1024, 512, SECT_4K) }, + + /* Catalyst / On Semiconductor -- non-JEDEC */ + { "cat25c11", CAT25_INFO( 16, 8, 16, 1, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) }, + { "cat25c03", CAT25_INFO( 32, 8, 16, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) }, + { "cat25c09", CAT25_INFO( 128, 8, 32, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) }, + { "cat25c17", CAT25_INFO( 256, 8, 32, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) }, + { "cat25128", CAT25_INFO(2048, 8, 64, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) }, + { }, +}; + +static const struct spi_device_id *spi_nor_read_id(struct spi_nor *nor) +{ + int tmp; + u8 id[5]; + u32 jedec; + u16 ext_jedec; + struct flash_info *info; + + tmp = nor->read_reg(nor, OPCODE_RDID, id, 5); + if (tmp < 0) { + dev_dbg(nor->dev, " error %d reading JEDEC ID\n", tmp); + return ERR_PTR(tmp); + } + jedec = id[0]; + jedec = jedec << 8; + jedec |= id[1]; + jedec = jedec << 8; + jedec |= id[2]; + + ext_jedec = id[3] << 8 | id[4]; + + for (tmp = 0; tmp < ARRAY_SIZE(spi_nor_ids) - 1; tmp++) { + info = (void *)spi_nor_ids[tmp].driver_data; + if (info->jedec_id == jedec) { + if (info->ext_id == 0 || info->ext_id == ext_jedec) + return &spi_nor_ids[tmp]; + } + } + dev_err(nor->dev, "unrecognized JEDEC id %06x\n", jedec); + return ERR_PTR(-ENODEV); +} + +static const struct spi_device_id *jedec_probe(struct spi_nor *nor) +{ + return nor->read_id(nor); +} + +static int spi_nor_read(struct mtd_info *mtd, loff_t from, size_t len, + size_t *retlen, u_char *buf) +{ + struct spi_nor *nor = mtd_to_spi_nor(mtd); + int ret; + + dev_dbg(nor->dev, "from 0x%08x, len %zd\n", (u32)from, len); + + ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_READ); + if (ret) + return ret; + + ret = nor->read(nor, from, len, retlen, buf); + + spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_READ); + return ret; +} + +static int sst_write(struct mtd_info *mtd, loff_t to, size_t len, + size_t *retlen, const u_char *buf) +{ + struct spi_nor *nor = mtd_to_spi_nor(mtd); + size_t actual; + int ret; + + dev_dbg(nor->dev, "to 0x%08x, len %zd\n", (u32)to, len); + + ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_WRITE); + if (ret) + return ret; + + /* Wait until finished previous write command. */ + ret = wait_till_ready(nor); + if (ret) + goto time_out; + + write_enable(nor); + + nor->sst_write_second = false; + + actual = to % 2; + /* Start write from odd address. */ + if (actual) { + nor->program_opcode = OPCODE_BP; + + /* write one byte. */ + nor->write(nor, to, 1, retlen, buf); + ret = wait_till_ready(nor); + if (ret) + goto time_out; + } + to += actual; + + /* Write out most of the data here. */ + for (; actual < len - 1; actual += 2) { + nor->program_opcode = OPCODE_AAI_WP; + + /* write two bytes. */ + nor->write(nor, to, 2, retlen, buf + actual); + ret = wait_till_ready(nor); + if (ret) + goto time_out; + to += 2; + nor->sst_write_second = true; + } + nor->sst_write_second = false; + + write_disable(nor); + ret = wait_till_ready(nor); + if (ret) + goto time_out; + + /* Write out trailing byte if it exists. */ + if (actual != len) { + write_enable(nor); + + nor->program_opcode = OPCODE_BP; + nor->write(nor, to, 1, retlen, buf + actual); + + ret = wait_till_ready(nor); + if (ret) + goto time_out; + write_disable(nor); + } +time_out: + spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_WRITE); + return ret; +} + +/* + * Write an address range to the nor chip. Data must be written in + * FLASH_PAGESIZE chunks. The address range may be any size provided + * it is within the physical boundaries. + */ +static int spi_nor_write(struct mtd_info *mtd, loff_t to, size_t len, + size_t *retlen, const u_char *buf) +{ + struct spi_nor *nor = mtd_to_spi_nor(mtd); + u32 page_offset, page_size, i; + int ret; + + dev_dbg(nor->dev, "to 0x%08x, len %zd\n", (u32)to, len); + + ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_WRITE); + if (ret) + return ret; + + /* Wait until finished previous write command. */ + ret = wait_till_ready(nor); + if (ret) + goto write_err; + + write_enable(nor); + + page_offset = to & (nor->page_size - 1); + + /* do all the bytes fit onto one page? */ + if (page_offset + len <= nor->page_size) { + nor->write(nor, to, len, retlen, buf); + } else { + /* the size of data remaining on the first page */ + page_size = nor->page_size - page_offset; + nor->write(nor, to, page_size, retlen, buf); + + /* write everything in nor->page_size chunks */ + for (i = page_size; i < len; i += page_size) { + page_size = len - i; + if (page_size > nor->page_size) + page_size = nor->page_size; + + wait_till_ready(nor); + write_enable(nor); + + nor->write(nor, to + i, page_size, retlen, buf + i); + } + } + +write_err: + spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_WRITE); + return 0; +} + +static int macronix_quad_enable(struct spi_nor *nor) +{ + int ret, val; + + val = read_sr(nor); + write_enable(nor); + + nor->cmd_buf[0] = val | SR_QUAD_EN_MX; + nor->write_reg(nor, OPCODE_WRSR, nor->cmd_buf, 1, 0); + + if (wait_till_ready(nor)) + return 1; + + ret = read_sr(nor); + if (!(ret > 0 && (ret & SR_QUAD_EN_MX))) { + dev_err(nor->dev, "Macronix Quad bit not set\n"); + return -EINVAL; + } + + return 0; +} + +/* + * Write status Register and configuration register with 2 bytes + * The first byte will be written to the status register, while the + * second byte will be written to the configuration register. + * Return negative if error occured. + */ +static int write_sr_cr(struct spi_nor *nor, u16 val) +{ + nor->cmd_buf[0] = val & 0xff; + nor->cmd_buf[1] = (val >> 8); + + return nor->write_reg(nor, OPCODE_WRSR, nor->cmd_buf, 2, 0); +} + +static int spansion_quad_enable(struct spi_nor *nor) +{ + int ret; + int quad_en = CR_QUAD_EN_SPAN << 8; + + write_enable(nor); + + ret = write_sr_cr(nor, quad_en); + if (ret < 0) { + dev_err(nor->dev, + "error while writing configuration register\n"); + return -EINVAL; + } + + /* read back and check it */ + ret = read_cr(nor); + if (!(ret > 0 && (ret & CR_QUAD_EN_SPAN))) { + dev_err(nor->dev, "Spansion Quad bit not set\n"); + return -EINVAL; + } + + return 0; +} + +static int set_quad_mode(struct spi_nor *nor, u32 jedec_id) +{ + int status; + + switch (JEDEC_MFR(jedec_id)) { + case CFI_MFR_MACRONIX: + status = macronix_quad_enable(nor); + if (status) { + dev_err(nor->dev, "Macronix quad-read not enabled\n"); + return -EINVAL; + } + return status; + default: + status = spansion_quad_enable(nor); + if (status) { + dev_err(nor->dev, "Spansion quad-read not enabled\n"); + return -EINVAL; + } + return status; + } +} + +static int spi_nor_check(struct spi_nor *nor) +{ + if (!nor->dev || !nor->read || !nor->write || + !nor->read_reg || !nor->write_reg || !nor->erase) { + pr_err("spi-nor: please fill all the necessary fields!\n"); + return -EINVAL; + } + + if (!nor->read_id) + nor->read_id = spi_nor_read_id; + if (!nor->wait_till_ready) + nor->wait_till_ready = spi_nor_wait_till_ready; + + return 0; +} + +int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id, + enum read_mode mode) +{ + struct flash_info *info; + struct flash_platform_data *data; + struct device *dev = nor->dev; + struct mtd_info *mtd = nor->mtd; + struct device_node *np = dev->of_node; + int ret; + int i; + + ret = spi_nor_check(nor); + if (ret) + return ret; + + /* Platform data helps sort out which chip type we have, as + * well as how this board partitions it. If we don't have + * a chip ID, try the JEDEC id commands; they'll work for most + * newer chips, even if we don't recognize the particular chip. + */ + data = dev_get_platdata(dev); + if (data && data->type) { + const struct spi_device_id *plat_id; + + for (i = 0; i < ARRAY_SIZE(spi_nor_ids) - 1; i++) { + plat_id = &spi_nor_ids[i]; + if (strcmp(data->type, plat_id->name)) + continue; + break; + } + + if (i < ARRAY_SIZE(spi_nor_ids) - 1) + id = plat_id; + else + dev_warn(dev, "unrecognized id %s\n", data->type); + } + + info = (void *)id->driver_data; + + if (info->jedec_id) { + const struct spi_device_id *jid; + + jid = jedec_probe(nor); + if (IS_ERR(jid)) { + return PTR_ERR(jid); + } else if (jid != id) { + /* + * JEDEC knows better, so overwrite platform ID. We + * can't trust partitions any longer, but we'll let + * mtd apply them anyway, since some partitions may be + * marked read-only, and we don't want to lose that + * information, even if it's not 100% accurate. + */ + dev_warn(dev, "found %s, expected %s\n", + jid->name, id->name); + id = jid; + info = (void *)jid->driver_data; + } + } + + mutex_init(&nor->lock); + + /* + * Atmel, SST and Intel/Numonyx serial nor tend to power + * up with the software protection bits set + */ + + if (JEDEC_MFR(info->jedec_id) == CFI_MFR_ATMEL || + JEDEC_MFR(info->jedec_id) == CFI_MFR_INTEL || + JEDEC_MFR(info->jedec_id) == CFI_MFR_SST) { + write_enable(nor); + write_sr(nor, 0); + } + + if (data && data->name) + mtd->name = data->name; + else + mtd->name = dev_name(dev); + + mtd->type = MTD_NORFLASH; + mtd->writesize = 1; + mtd->flags = MTD_CAP_NORFLASH; + mtd->size = info->sector_size * info->n_sectors; + mtd->_erase = spi_nor_erase; + mtd->_read = spi_nor_read; + + /* nor protection support for STmicro chips */ + if (JEDEC_MFR(info->jedec_id) == CFI_MFR_ST) { + mtd->_lock = spi_nor_lock; + mtd->_unlock = spi_nor_unlock; + } + + /* sst nor chips use AAI word program */ + if (info->flags & SST_WRITE) + mtd->_write = sst_write; + else + mtd->_write = spi_nor_write; + + /* prefer "small sector" erase if possible */ + if (info->flags & SECT_4K) { + nor->erase_opcode = OPCODE_BE_4K; + mtd->erasesize = 4096; + } else if (info->flags & SECT_4K_PMC) { + nor->erase_opcode = OPCODE_BE_4K_PMC; + mtd->erasesize = 4096; + } else { + nor->erase_opcode = OPCODE_SE; + mtd->erasesize = info->sector_size; + } + + if (info->flags & SPI_NOR_NO_ERASE) + mtd->flags |= MTD_NO_ERASE; + + mtd->dev.parent = dev; + nor->page_size = info->page_size; + mtd->writebufsize = nor->page_size; + + if (np) { + /* If we were instantiated by DT, use it */ + if (of_property_read_bool(np, "m25p,fast-read")) + nor->flash_read = SPI_NOR_FAST; + else + nor->flash_read = SPI_NOR_NORMAL; + } else { + /* If we weren't instantiated by DT, default to fast-read */ + nor->flash_read = SPI_NOR_FAST; + } + + /* Some devices cannot do fast-read, no matter what DT tells us */ + if (info->flags & SPI_NOR_NO_FR) + nor->flash_read = SPI_NOR_NORMAL; + + /* Quad/Dual-read mode takes precedence over fast/normal */ + if (mode == SPI_NOR_QUAD && info->flags & SPI_NOR_QUAD_READ) { + ret = set_quad_mode(nor, info->jedec_id); + if (ret) { + dev_err(dev, "quad mode not supported\n"); + return ret; + } + nor->flash_read = SPI_NOR_QUAD; + } else if (mode == SPI_NOR_DUAL && info->flags & SPI_NOR_DUAL_READ) { + nor->flash_read = SPI_NOR_DUAL; + } + + /* Default commands */ + switch (nor->flash_read) { + case SPI_NOR_QUAD: + nor->read_opcode = OPCODE_QUAD_READ; + break; + case SPI_NOR_DUAL: + nor->read_opcode = OPCODE_DUAL_READ; + break; + case SPI_NOR_FAST: + nor->read_opcode = OPCODE_FAST_READ; + break; + case SPI_NOR_NORMAL: + nor->read_opcode = OPCODE_NORM_READ; + break; + default: + dev_err(dev, "No Read opcode defined\n"); + return -EINVAL; + } + + nor->program_opcode = OPCODE_PP; + + if (info->addr_width) + nor->addr_width = info->addr_width; + else if (mtd->size > 0x1000000) { + /* enable 4-byte addressing if the device exceeds 16MiB */ + nor->addr_width = 4; + if (JEDEC_MFR(info->jedec_id) == CFI_MFR_AMD) { + /* Dedicated 4-byte command set */ + switch (nor->flash_read) { + case SPI_NOR_QUAD: + nor->read_opcode = OPCODE_QUAD_READ_4B; + break; + case SPI_NOR_DUAL: + nor->read_opcode = OPCODE_DUAL_READ_4B; + break; + case SPI_NOR_FAST: + nor->read_opcode = OPCODE_FAST_READ_4B; + break; + case SPI_NOR_NORMAL: + nor->read_opcode = OPCODE_NORM_READ_4B; + break; + } + nor->program_opcode = OPCODE_PP_4B; + /* No small sector erase for 4-byte command set */ + nor->erase_opcode = OPCODE_SE_4B; + mtd->erasesize = info->sector_size; + } else + set_4byte(nor, info->jedec_id, 1); + } else { + nor->addr_width = 3; + } + + nor->read_dummy = spi_nor_read_dummy_cycles(nor); + + dev_info(dev, "%s (%lld Kbytes)\n", id->name, + (long long)mtd->size >> 10); + + dev_dbg(dev, + "mtd .name = %s, .size = 0x%llx (%lldMiB), " + ".erasesize = 0x%.8x (%uKiB) .numeraseregions = %d\n", + mtd->name, (long long)mtd->size, (long long)(mtd->size >> 20), + mtd->erasesize, mtd->erasesize / 1024, mtd->numeraseregions); + + if (mtd->numeraseregions) + for (i = 0; i < mtd->numeraseregions; i++) + dev_dbg(dev, + "mtd.eraseregions[%d] = { .offset = 0x%llx, " + ".erasesize = 0x%.8x (%uKiB), " + ".numblocks = %d }\n", + i, (long long)mtd->eraseregions[i].offset, + mtd->eraseregions[i].erasesize, + mtd->eraseregions[i].erasesize / 1024, + mtd->eraseregions[i].numblocks); + return 0; +} + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Huang Shijie "); +MODULE_AUTHOR("Mike Lavender"); +MODULE_DESCRIPTION("framework for SPI NOR"); diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 3a3c3872c8cd..16d8409abcdc 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -162,4 +162,24 @@ struct spi_nor { void *priv; }; + +/** + * spi_nor_scan() - scan the SPI NOR + * @nor: the spi_nor structure + * @id: the spi_device_id provided by the driver + * @mode: the read mode supported by the driver + * + * The drivers can use this fuction to scan the SPI NOR. + * In the scanning, it will try to get all the necessary information to + * fill the mtd_info{} and the spi_nor{}. + * + * The board may assigns a spi_device_id with @id which be used to compared with + * the spi_device_id detected by the scanning. + * + * Return: 0 for success, others for failure. + */ +int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id, + enum read_mode mode); +extern const struct spi_device_id spi_nor_ids[]; + #endif -- cgit v1.2.3-71-gd317 From 0d8c11c01274bde227d368daa8954911dd324a9f Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Mon, 24 Feb 2014 18:37:40 +0800 Subject: mtd: spi-nor: add a helper to find the spi_device_id Add the spi_nor_match_id() to find the proper spi_device_id with the NOR flash's name in the spi_nor_ids table. Signed-off-by: Huang Shijie Acked-by: Marek Vasut Signed-off-by: Brian Norris --- drivers/mtd/spi-nor/spi-nor.c | 12 ++++++++++++ include/linux/mtd/spi-nor.h | 12 ++++++++++++ 2 files changed, 24 insertions(+) (limited to 'include') diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 50b929095bdb..f7c9e638623b 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -1082,6 +1082,18 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id, return 0; } +const struct spi_device_id *spi_nor_match_id(char *name) +{ + const struct spi_device_id *id = spi_nor_ids; + + while (id->name[0]) { + if (!strcmp(name, id->name)) + return id; + id++; + } + return NULL; +} + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Huang Shijie "); MODULE_AUTHOR("Mike Lavender"); diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 16d8409abcdc..41dae78fbd1d 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -182,4 +182,16 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id, enum read_mode mode); extern const struct spi_device_id spi_nor_ids[]; +/** + * spi_nor_match_id() - find the spi_device_id by the name + * @name: the name of the spi_device_id + * + * The drivers use this function to find the spi_device_id + * specified by the @name. + * + * Return: returns the right spi_device_id pointer on success, + * and returns NULL on failure. + */ +const struct spi_device_id *spi_nor_match_id(char *name); + #endif -- cgit v1.2.3-71-gd317 From 8eabdd1ec122cf6b77cf73e798f134fbace1b8d1 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Thu, 10 Apr 2014 16:27:28 +0800 Subject: mtd: spi-nor: add the copyright information Add the copyright information for spi-nor.c and spi-nor.h. Signed-off-by: Huang Shijie Signed-off-by: Brian Norris --- drivers/mtd/spi-nor/spi-nor.c | 6 +++++- include/linux/mtd/spi-nor.h | 9 +++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 5cd86eb2a5f0..6c64ab95dee2 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -1,5 +1,9 @@ /* - * Cloned most of the code from the m25p80.c + * Based on m25p80.c, by Mike Lavender (mike@steroidmicros.com), with + * influence from lart.c (Abraham Van Der Merwe) and mtd_dataflash.c + * + * Copyright (C) 2005, Intec Automation Inc. + * Copyright (C) 2014, Freescale Semiconductor, Inc. * * This code is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 41dae78fbd1d..9428d285489b 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -1,3 +1,12 @@ +/* + * Copyright (C) 2014 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + #ifndef __LINUX_MTD_SPI_NOR_H #define __LINUX_MTD_SPI_NOR_H -- cgit v1.2.3-71-gd317 From becd0cb8666de4bfaaf6eb3042f69066c8fb8677 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 8 Apr 2014 18:10:23 -0700 Subject: mtd: spi-nor: drop \t after #define Spacing is a little non-standard here. Fix up tabs vs. spaces. Signed-off-by: Brian Norris Acked-by: Huang Shijie Reviewed-by: Marek Vasut --- include/linux/mtd/spi-nor.h | 72 ++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 9428d285489b..a6e87190ead1 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -11,55 +11,55 @@ #define __LINUX_MTD_SPI_NOR_H /* Flash opcodes. */ -#define OPCODE_WREN 0x06 /* Write enable */ -#define OPCODE_RDSR 0x05 /* Read status register */ -#define OPCODE_WRSR 0x01 /* Write status register 1 byte */ -#define OPCODE_NORM_READ 0x03 /* Read data bytes (low frequency) */ -#define OPCODE_FAST_READ 0x0b /* Read data bytes (high frequency) */ -#define OPCODE_DUAL_READ 0x3b /* Read data bytes (Dual SPI) */ -#define OPCODE_QUAD_READ 0x6b /* Read data bytes (Quad SPI) */ -#define OPCODE_PP 0x02 /* Page program (up to 256 bytes) */ -#define OPCODE_BE_4K 0x20 /* Erase 4KiB block */ -#define OPCODE_BE_4K_PMC 0xd7 /* Erase 4KiB block on PMC chips */ -#define OPCODE_BE_32K 0x52 /* Erase 32KiB block */ -#define OPCODE_CHIP_ERASE 0xc7 /* Erase whole flash chip */ -#define OPCODE_SE 0xd8 /* Sector erase (usually 64KiB) */ -#define OPCODE_RDID 0x9f /* Read JEDEC ID */ -#define OPCODE_RDCR 0x35 /* Read configuration register */ +#define OPCODE_WREN 0x06 /* Write enable */ +#define OPCODE_RDSR 0x05 /* Read status register */ +#define OPCODE_WRSR 0x01 /* Write status register 1 byte */ +#define OPCODE_NORM_READ 0x03 /* Read data bytes (low frequency) */ +#define OPCODE_FAST_READ 0x0b /* Read data bytes (high frequency) */ +#define OPCODE_DUAL_READ 0x3b /* Read data bytes (Dual SPI) */ +#define OPCODE_QUAD_READ 0x6b /* Read data bytes (Quad SPI) */ +#define OPCODE_PP 0x02 /* Page program (up to 256 bytes) */ +#define OPCODE_BE_4K 0x20 /* Erase 4KiB block */ +#define OPCODE_BE_4K_PMC 0xd7 /* Erase 4KiB block on PMC chips */ +#define OPCODE_BE_32K 0x52 /* Erase 32KiB block */ +#define OPCODE_CHIP_ERASE 0xc7 /* Erase whole flash chip */ +#define OPCODE_SE 0xd8 /* Sector erase (usually 64KiB) */ +#define OPCODE_RDID 0x9f /* Read JEDEC ID */ +#define OPCODE_RDCR 0x35 /* Read configuration register */ /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */ -#define OPCODE_NORM_READ_4B 0x13 /* Read data bytes (low frequency) */ -#define OPCODE_FAST_READ_4B 0x0c /* Read data bytes (high frequency) */ -#define OPCODE_DUAL_READ_4B 0x3c /* Read data bytes (Dual SPI) */ -#define OPCODE_QUAD_READ_4B 0x6c /* Read data bytes (Quad SPI) */ -#define OPCODE_PP_4B 0x12 /* Page program (up to 256 bytes) */ -#define OPCODE_SE_4B 0xdc /* Sector erase (usually 64KiB) */ +#define OPCODE_NORM_READ_4B 0x13 /* Read data bytes (low frequency) */ +#define OPCODE_FAST_READ_4B 0x0c /* Read data bytes (high frequency) */ +#define OPCODE_DUAL_READ_4B 0x3c /* Read data bytes (Dual SPI) */ +#define OPCODE_QUAD_READ_4B 0x6c /* Read data bytes (Quad SPI) */ +#define OPCODE_PP_4B 0x12 /* Page program (up to 256 bytes) */ +#define OPCODE_SE_4B 0xdc /* Sector erase (usually 64KiB) */ /* Used for SST flashes only. */ -#define OPCODE_BP 0x02 /* Byte program */ -#define OPCODE_WRDI 0x04 /* Write disable */ -#define OPCODE_AAI_WP 0xad /* Auto address increment word program */ +#define OPCODE_BP 0x02 /* Byte program */ +#define OPCODE_WRDI 0x04 /* Write disable */ +#define OPCODE_AAI_WP 0xad /* Auto address increment word program */ /* Used for Macronix and Winbond flashes. */ -#define OPCODE_EN4B 0xb7 /* Enter 4-byte mode */ -#define OPCODE_EX4B 0xe9 /* Exit 4-byte mode */ +#define OPCODE_EN4B 0xb7 /* Enter 4-byte mode */ +#define OPCODE_EX4B 0xe9 /* Exit 4-byte mode */ /* Used for Spansion flashes only. */ -#define OPCODE_BRWR 0x17 /* Bank register write */ +#define OPCODE_BRWR 0x17 /* Bank register write */ /* Status Register bits. */ -#define SR_WIP 1 /* Write in progress */ -#define SR_WEL 2 /* Write enable latch */ +#define SR_WIP 1 /* Write in progress */ +#define SR_WEL 2 /* Write enable latch */ /* meaning of other SR_* bits may differ between vendors */ -#define SR_BP0 4 /* Block protect 0 */ -#define SR_BP1 8 /* Block protect 1 */ -#define SR_BP2 0x10 /* Block protect 2 */ -#define SR_SRWD 0x80 /* SR write protect */ +#define SR_BP0 4 /* Block protect 0 */ +#define SR_BP1 8 /* Block protect 1 */ +#define SR_BP2 0x10 /* Block protect 2 */ +#define SR_SRWD 0x80 /* SR write protect */ -#define SR_QUAD_EN_MX 0x40 /* Macronix Quad I/O */ +#define SR_QUAD_EN_MX 0x40 /* Macronix Quad I/O */ /* Configuration Register bits. */ -#define CR_QUAD_EN_SPAN 0x2 /* Spansion Quad I/O */ +#define CR_QUAD_EN_SPAN 0x2 /* Spansion Quad I/O */ enum read_mode { SPI_NOR_NORMAL = 0, @@ -95,7 +95,7 @@ struct spi_nor_xfer_cfg { u8 dummy_cycles; }; -#define SPI_NOR_MAX_CMD_SIZE 8 +#define SPI_NOR_MAX_CMD_SIZE 8 enum spi_nor_ops { SPI_NOR_OPS_READ = 0, SPI_NOR_OPS_WRITE, -- cgit v1.2.3-71-gd317 From b02e7f3ef0beb72da8fc64542f0ac977996ec56b Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 8 Apr 2014 18:15:31 -0700 Subject: mtd: spi-nor: re-name OPCODE_* to SPINOR_OP_* Qualify these with a better namespace, and prepare them for use in more drivers. Signed-off-by: Brian Norris Reviewed-by: Marek Vasut Acked-by: Huang Shijie --- drivers/mtd/devices/m25p80.c | 4 +-- drivers/mtd/spi-nor/fsl-quadspi.c | 58 +++++++++++++++++------------------ drivers/mtd/spi-nor/spi-nor.c | 64 +++++++++++++++++++-------------------- include/linux/mtd/spi-nor.h | 54 ++++++++++++++++----------------- 4 files changed, 90 insertions(+), 90 deletions(-) (limited to 'include') diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c index 4af6400ccd95..1557d8f672c1 100644 --- a/drivers/mtd/devices/m25p80.c +++ b/drivers/mtd/devices/m25p80.c @@ -86,7 +86,7 @@ static void m25p80_write(struct spi_nor *nor, loff_t to, size_t len, spi_message_init(&m); - if (nor->program_opcode == OPCODE_AAI_WP && nor->sst_write_second) + if (nor->program_opcode == SPINOR_OP_AAI_WP && nor->sst_write_second) cmd_sz = 1; flash->command[0] = nor->program_opcode; @@ -171,7 +171,7 @@ static int m25p80_erase(struct spi_nor *nor, loff_t offset) return ret; /* Send write enable, then erase commands. */ - ret = nor->write_reg(nor, OPCODE_WREN, NULL, 0, 0); + ret = nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0, 0); if (ret) return ret; diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c index 6dc08ed950c8..2977f026f39d 100644 --- a/drivers/mtd/spi-nor/fsl-quadspi.c +++ b/drivers/mtd/spi-nor/fsl-quadspi.c @@ -294,12 +294,12 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q) lut_base = SEQID_QUAD_READ * 4; if (q->nor_size <= SZ_16M) { - cmd = OPCODE_QUAD_READ; + cmd = SPINOR_OP_QUAD_READ; addrlen = ADDR24BIT; dummy = 8; } else { /* use the 4-byte address */ - cmd = OPCODE_QUAD_READ; + cmd = SPINOR_OP_QUAD_READ; addrlen = ADDR32BIT; dummy = 8; } @@ -311,17 +311,17 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q) /* Write enable */ lut_base = SEQID_WREN * 4; - writel(LUT0(CMD, PAD1, OPCODE_WREN), base + QUADSPI_LUT(lut_base)); + writel(LUT0(CMD, PAD1, SPINOR_OP_WREN), base + QUADSPI_LUT(lut_base)); /* Page Program */ lut_base = SEQID_PP * 4; if (q->nor_size <= SZ_16M) { - cmd = OPCODE_PP; + cmd = SPINOR_OP_PP; addrlen = ADDR24BIT; } else { /* use the 4-byte address */ - cmd = OPCODE_PP; + cmd = SPINOR_OP_PP; addrlen = ADDR32BIT; } @@ -331,18 +331,18 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q) /* Read Status */ lut_base = SEQID_RDSR * 4; - writel(LUT0(CMD, PAD1, OPCODE_RDSR) | LUT1(READ, PAD1, 0x1), + writel(LUT0(CMD, PAD1, SPINOR_OP_RDSR) | LUT1(READ, PAD1, 0x1), base + QUADSPI_LUT(lut_base)); /* Erase a sector */ lut_base = SEQID_SE * 4; if (q->nor_size <= SZ_16M) { - cmd = OPCODE_SE; + cmd = SPINOR_OP_SE; addrlen = ADDR24BIT; } else { /* use the 4-byte address */ - cmd = OPCODE_SE; + cmd = SPINOR_OP_SE; addrlen = ADDR32BIT; } @@ -351,35 +351,35 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q) /* Erase the whole chip */ lut_base = SEQID_CHIP_ERASE * 4; - writel(LUT0(CMD, PAD1, OPCODE_CHIP_ERASE), + writel(LUT0(CMD, PAD1, SPINOR_OP_CHIP_ERASE), base + QUADSPI_LUT(lut_base)); /* READ ID */ lut_base = SEQID_RDID * 4; - writel(LUT0(CMD, PAD1, OPCODE_RDID) | LUT1(READ, PAD1, 0x8), + writel(LUT0(CMD, PAD1, SPINOR_OP_RDID) | LUT1(READ, PAD1, 0x8), base + QUADSPI_LUT(lut_base)); /* Write Register */ lut_base = SEQID_WRSR * 4; - writel(LUT0(CMD, PAD1, OPCODE_WRSR) | LUT1(WRITE, PAD1, 0x2), + writel(LUT0(CMD, PAD1, SPINOR_OP_WRSR) | LUT1(WRITE, PAD1, 0x2), base + QUADSPI_LUT(lut_base)); /* Read Configuration Register */ lut_base = SEQID_RDCR * 4; - writel(LUT0(CMD, PAD1, OPCODE_RDCR) | LUT1(READ, PAD1, 0x1), + writel(LUT0(CMD, PAD1, SPINOR_OP_RDCR) | LUT1(READ, PAD1, 0x1), base + QUADSPI_LUT(lut_base)); /* Write disable */ lut_base = SEQID_WRDI * 4; - writel(LUT0(CMD, PAD1, OPCODE_WRDI), base + QUADSPI_LUT(lut_base)); + writel(LUT0(CMD, PAD1, SPINOR_OP_WRDI), base + QUADSPI_LUT(lut_base)); /* Enter 4 Byte Mode (Micron) */ lut_base = SEQID_EN4B * 4; - writel(LUT0(CMD, PAD1, OPCODE_EN4B), base + QUADSPI_LUT(lut_base)); + writel(LUT0(CMD, PAD1, SPINOR_OP_EN4B), base + QUADSPI_LUT(lut_base)); /* Enter 4 Byte Mode (Spansion) */ lut_base = SEQID_BRWR * 4; - writel(LUT0(CMD, PAD1, OPCODE_BRWR), base + QUADSPI_LUT(lut_base)); + writel(LUT0(CMD, PAD1, SPINOR_OP_BRWR), base + QUADSPI_LUT(lut_base)); fsl_qspi_lock_lut(q); } @@ -388,29 +388,29 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q) static int fsl_qspi_get_seqid(struct fsl_qspi *q, u8 cmd) { switch (cmd) { - case OPCODE_QUAD_READ: + case SPINOR_OP_QUAD_READ: return SEQID_QUAD_READ; - case OPCODE_WREN: + case SPINOR_OP_WREN: return SEQID_WREN; - case OPCODE_WRDI: + case SPINOR_OP_WRDI: return SEQID_WRDI; - case OPCODE_RDSR: + case SPINOR_OP_RDSR: return SEQID_RDSR; - case OPCODE_SE: + case SPINOR_OP_SE: return SEQID_SE; - case OPCODE_CHIP_ERASE: + case SPINOR_OP_CHIP_ERASE: return SEQID_CHIP_ERASE; - case OPCODE_PP: + case SPINOR_OP_PP: return SEQID_PP; - case OPCODE_RDID: + case SPINOR_OP_RDID: return SEQID_RDID; - case OPCODE_WRSR: + case SPINOR_OP_WRSR: return SEQID_WRSR; - case OPCODE_RDCR: + case SPINOR_OP_RDCR: return SEQID_RDCR; - case OPCODE_EN4B: + case SPINOR_OP_EN4B: return SEQID_EN4B; - case OPCODE_BRWR: + case SPINOR_OP_BRWR: return SEQID_BRWR; default: dev_err(q->dev, "Unsupported cmd 0x%.2x\n", cmd); @@ -688,7 +688,7 @@ static int fsl_qspi_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len, if (ret) return ret; - if (opcode == OPCODE_CHIP_ERASE) + if (opcode == SPINOR_OP_CHIP_ERASE) fsl_qspi_invalid(q); } else if (len > 0) { @@ -750,7 +750,7 @@ static int fsl_qspi_erase(struct spi_nor *nor, loff_t offs) return ret; /* Send write enable, then erase commands. */ - ret = nor->write_reg(nor, OPCODE_WREN, NULL, 0, 0); + ret = nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0, 0); if (ret) return ret; diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 6c64ab95dee2..1716f3ce9949 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -38,7 +38,7 @@ static int read_sr(struct spi_nor *nor) int ret; u8 val; - ret = nor->read_reg(nor, OPCODE_RDSR, &val, 1); + ret = nor->read_reg(nor, SPINOR_OP_RDSR, &val, 1); if (ret < 0) { pr_err("error %d reading SR\n", (int) ret); return ret; @@ -57,7 +57,7 @@ static int read_cr(struct spi_nor *nor) int ret; u8 val; - ret = nor->read_reg(nor, OPCODE_RDCR, &val, 1); + ret = nor->read_reg(nor, SPINOR_OP_RDCR, &val, 1); if (ret < 0) { dev_err(nor->dev, "error %d reading CR\n", ret); return ret; @@ -91,7 +91,7 @@ static inline int spi_nor_read_dummy_cycles(struct spi_nor *nor) static inline int write_sr(struct spi_nor *nor, u8 val) { nor->cmd_buf[0] = val; - return nor->write_reg(nor, OPCODE_WRSR, nor->cmd_buf, 1, 0); + return nor->write_reg(nor, SPINOR_OP_WRSR, nor->cmd_buf, 1, 0); } /* @@ -100,7 +100,7 @@ static inline int write_sr(struct spi_nor *nor, u8 val) */ static inline int write_enable(struct spi_nor *nor) { - return nor->write_reg(nor, OPCODE_WREN, NULL, 0, 0); + return nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0, 0); } /* @@ -108,7 +108,7 @@ static inline int write_enable(struct spi_nor *nor) */ static inline int write_disable(struct spi_nor *nor) { - return nor->write_reg(nor, OPCODE_WRDI, NULL, 0, 0); + return nor->write_reg(nor, SPINOR_OP_WRDI, NULL, 0, 0); } static inline struct spi_nor *mtd_to_spi_nor(struct mtd_info *mtd) @@ -132,7 +132,7 @@ static inline int set_4byte(struct spi_nor *nor, u32 jedec_id, int enable) if (need_wren) write_enable(nor); - cmd = enable ? OPCODE_EN4B : OPCODE_EX4B; + cmd = enable ? SPINOR_OP_EN4B : SPINOR_OP_EX4B; status = nor->write_reg(nor, cmd, NULL, 0, 0); if (need_wren) write_disable(nor); @@ -141,7 +141,7 @@ static inline int set_4byte(struct spi_nor *nor, u32 jedec_id, int enable) default: /* Spansion style */ nor->cmd_buf[0] = enable << 7; - return nor->write_reg(nor, OPCODE_BRWR, nor->cmd_buf, 1, 0); + return nor->write_reg(nor, SPINOR_OP_BRWR, nor->cmd_buf, 1, 0); } } @@ -193,7 +193,7 @@ static int erase_chip(struct spi_nor *nor) /* Send write enable, then erase commands. */ write_enable(nor); - return nor->write_reg(nor, OPCODE_CHIP_ERASE, NULL, 0, 0); + return nor->write_reg(nor, SPINOR_OP_CHIP_ERASE, NULL, 0, 0); } static int spi_nor_lock_and_prep(struct spi_nor *nor, enum spi_nor_ops ops) @@ -253,7 +253,7 @@ static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr) } /* REVISIT in some cases we could speed up erasing large regions - * by using OPCODE_SE instead of OPCODE_BE_4K. We may have set up + * by using SPINOR_OP_SE instead of SPINOR_OP_BE_4K. We may have set up * to use "small sector erase", but that's not always optimal. */ @@ -385,7 +385,7 @@ struct flash_info { u32 jedec_id; u16 ext_id; - /* The size listed here is what works with OPCODE_SE, which isn't + /* The size listed here is what works with SPINOR_OP_SE, which isn't * necessarily called a "sector" by the vendor. */ unsigned sector_size; @@ -395,11 +395,11 @@ struct flash_info { u16 addr_width; u16 flags; -#define SECT_4K 0x01 /* OPCODE_BE_4K works uniformly */ +#define SECT_4K 0x01 /* SPINOR_OP_BE_4K works uniformly */ #define SPI_NOR_NO_ERASE 0x02 /* No erase command needed */ #define SST_WRITE 0x04 /* use SST byte programming */ #define SPI_NOR_NO_FR 0x08 /* Can't do fastread */ -#define SECT_4K_PMC 0x10 /* OPCODE_BE_4K_PMC works uniformly */ +#define SECT_4K_PMC 0x10 /* SPINOR_OP_BE_4K_PMC works uniformly */ #define SPI_NOR_DUAL_READ 0x20 /* Flash supports Dual Read */ #define SPI_NOR_QUAD_READ 0x40 /* Flash supports Quad Read */ }; @@ -598,7 +598,7 @@ static const struct spi_device_id *spi_nor_read_id(struct spi_nor *nor) u16 ext_jedec; struct flash_info *info; - tmp = nor->read_reg(nor, OPCODE_RDID, id, 5); + tmp = nor->read_reg(nor, SPINOR_OP_RDID, id, 5); if (tmp < 0) { dev_dbg(nor->dev, " error %d reading JEDEC ID\n", tmp); return ERR_PTR(tmp); @@ -670,7 +670,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len, actual = to % 2; /* Start write from odd address. */ if (actual) { - nor->program_opcode = OPCODE_BP; + nor->program_opcode = SPINOR_OP_BP; /* write one byte. */ nor->write(nor, to, 1, retlen, buf); @@ -682,7 +682,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len, /* Write out most of the data here. */ for (; actual < len - 1; actual += 2) { - nor->program_opcode = OPCODE_AAI_WP; + nor->program_opcode = SPINOR_OP_AAI_WP; /* write two bytes. */ nor->write(nor, to, 2, retlen, buf + actual); @@ -703,7 +703,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len, if (actual != len) { write_enable(nor); - nor->program_opcode = OPCODE_BP; + nor->program_opcode = SPINOR_OP_BP; nor->write(nor, to, 1, retlen, buf + actual); ret = wait_till_ready(nor); @@ -777,7 +777,7 @@ static int macronix_quad_enable(struct spi_nor *nor) write_enable(nor); nor->cmd_buf[0] = val | SR_QUAD_EN_MX; - nor->write_reg(nor, OPCODE_WRSR, nor->cmd_buf, 1, 0); + nor->write_reg(nor, SPINOR_OP_WRSR, nor->cmd_buf, 1, 0); if (wait_till_ready(nor)) return 1; @@ -802,7 +802,7 @@ static int write_sr_cr(struct spi_nor *nor, u16 val) nor->cmd_buf[0] = val & 0xff; nor->cmd_buf[1] = (val >> 8); - return nor->write_reg(nor, OPCODE_WRSR, nor->cmd_buf, 2, 0); + return nor->write_reg(nor, SPINOR_OP_WRSR, nor->cmd_buf, 2, 0); } static int spansion_quad_enable(struct spi_nor *nor) @@ -967,13 +967,13 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id, /* prefer "small sector" erase if possible */ if (info->flags & SECT_4K) { - nor->erase_opcode = OPCODE_BE_4K; + nor->erase_opcode = SPINOR_OP_BE_4K; mtd->erasesize = 4096; } else if (info->flags & SECT_4K_PMC) { - nor->erase_opcode = OPCODE_BE_4K_PMC; + nor->erase_opcode = SPINOR_OP_BE_4K_PMC; mtd->erasesize = 4096; } else { - nor->erase_opcode = OPCODE_SE; + nor->erase_opcode = SPINOR_OP_SE; mtd->erasesize = info->sector_size; } @@ -1014,23 +1014,23 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id, /* Default commands */ switch (nor->flash_read) { case SPI_NOR_QUAD: - nor->read_opcode = OPCODE_QUAD_READ; + nor->read_opcode = SPINOR_OP_QUAD_READ; break; case SPI_NOR_DUAL: - nor->read_opcode = OPCODE_DUAL_READ; + nor->read_opcode = SPINOR_OP_DUAL_READ; break; case SPI_NOR_FAST: - nor->read_opcode = OPCODE_FAST_READ; + nor->read_opcode = SPINOR_OP_FAST_READ; break; case SPI_NOR_NORMAL: - nor->read_opcode = OPCODE_NORM_READ; + nor->read_opcode = SPINOR_OP_NORM_READ; break; default: dev_err(dev, "No Read opcode defined\n"); return -EINVAL; } - nor->program_opcode = OPCODE_PP; + nor->program_opcode = SPINOR_OP_PP; if (info->addr_width) nor->addr_width = info->addr_width; @@ -1041,21 +1041,21 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id, /* Dedicated 4-byte command set */ switch (nor->flash_read) { case SPI_NOR_QUAD: - nor->read_opcode = OPCODE_QUAD_READ_4B; + nor->read_opcode = SPINOR_OP_QUAD_READ_4B; break; case SPI_NOR_DUAL: - nor->read_opcode = OPCODE_DUAL_READ_4B; + nor->read_opcode = SPINOR_OP_DUAL_READ_4B; break; case SPI_NOR_FAST: - nor->read_opcode = OPCODE_FAST_READ_4B; + nor->read_opcode = SPINOR_OP_FAST_READ_4B; break; case SPI_NOR_NORMAL: - nor->read_opcode = OPCODE_NORM_READ_4B; + nor->read_opcode = SPINOR_OP_NORM_READ_4B; break; } - nor->program_opcode = OPCODE_PP_4B; + nor->program_opcode = SPINOR_OP_PP_4B; /* No small sector erase for 4-byte command set */ - nor->erase_opcode = OPCODE_SE_4B; + nor->erase_opcode = SPINOR_OP_SE_4B; mtd->erasesize = info->sector_size; } else set_4byte(nor, info->jedec_id, 1); diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index a6e87190ead1..f1fe1a6659a3 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -11,41 +11,41 @@ #define __LINUX_MTD_SPI_NOR_H /* Flash opcodes. */ -#define OPCODE_WREN 0x06 /* Write enable */ -#define OPCODE_RDSR 0x05 /* Read status register */ -#define OPCODE_WRSR 0x01 /* Write status register 1 byte */ -#define OPCODE_NORM_READ 0x03 /* Read data bytes (low frequency) */ -#define OPCODE_FAST_READ 0x0b /* Read data bytes (high frequency) */ -#define OPCODE_DUAL_READ 0x3b /* Read data bytes (Dual SPI) */ -#define OPCODE_QUAD_READ 0x6b /* Read data bytes (Quad SPI) */ -#define OPCODE_PP 0x02 /* Page program (up to 256 bytes) */ -#define OPCODE_BE_4K 0x20 /* Erase 4KiB block */ -#define OPCODE_BE_4K_PMC 0xd7 /* Erase 4KiB block on PMC chips */ -#define OPCODE_BE_32K 0x52 /* Erase 32KiB block */ -#define OPCODE_CHIP_ERASE 0xc7 /* Erase whole flash chip */ -#define OPCODE_SE 0xd8 /* Sector erase (usually 64KiB) */ -#define OPCODE_RDID 0x9f /* Read JEDEC ID */ -#define OPCODE_RDCR 0x35 /* Read configuration register */ +#define SPINOR_OP_WREN 0x06 /* Write enable */ +#define SPINOR_OP_RDSR 0x05 /* Read status register */ +#define SPINOR_OP_WRSR 0x01 /* Write status register 1 byte */ +#define SPINOR_OP_NORM_READ 0x03 /* Read data bytes (low frequency) */ +#define SPINOR_OP_FAST_READ 0x0b /* Read data bytes (high frequency) */ +#define SPINOR_OP_DUAL_READ 0x3b /* Read data bytes (Dual SPI) */ +#define SPINOR_OP_QUAD_READ 0x6b /* Read data bytes (Quad SPI) */ +#define SPINOR_OP_PP 0x02 /* Page program (up to 256 bytes) */ +#define SPINOR_OP_BE_4K 0x20 /* Erase 4KiB block */ +#define SPINOR_OP_BE_4K_PMC 0xd7 /* Erase 4KiB block on PMC chips */ +#define SPINOR_OP_BE_32K 0x52 /* Erase 32KiB block */ +#define SPINOR_OP_CHIP_ERASE 0xc7 /* Erase whole flash chip */ +#define SPINOR_OP_SE 0xd8 /* Sector erase (usually 64KiB) */ +#define SPINOR_OP_RDID 0x9f /* Read JEDEC ID */ +#define SPINOR_OP_RDCR 0x35 /* Read configuration register */ /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */ -#define OPCODE_NORM_READ_4B 0x13 /* Read data bytes (low frequency) */ -#define OPCODE_FAST_READ_4B 0x0c /* Read data bytes (high frequency) */ -#define OPCODE_DUAL_READ_4B 0x3c /* Read data bytes (Dual SPI) */ -#define OPCODE_QUAD_READ_4B 0x6c /* Read data bytes (Quad SPI) */ -#define OPCODE_PP_4B 0x12 /* Page program (up to 256 bytes) */ -#define OPCODE_SE_4B 0xdc /* Sector erase (usually 64KiB) */ +#define SPINOR_OP_NORM_READ_4B 0x13 /* Read data bytes (low frequency) */ +#define SPINOR_OP_FAST_READ_4B 0x0c /* Read data bytes (high frequency) */ +#define SPINOR_OP_DUAL_READ_4B 0x3c /* Read data bytes (Dual SPI) */ +#define SPINOR_OP_QUAD_READ_4B 0x6c /* Read data bytes (Quad SPI) */ +#define SPINOR_OP_PP_4B 0x12 /* Page program (up to 256 bytes) */ +#define SPINOR_OP_SE_4B 0xdc /* Sector erase (usually 64KiB) */ /* Used for SST flashes only. */ -#define OPCODE_BP 0x02 /* Byte program */ -#define OPCODE_WRDI 0x04 /* Write disable */ -#define OPCODE_AAI_WP 0xad /* Auto address increment word program */ +#define SPINOR_OP_BP 0x02 /* Byte program */ +#define SPINOR_OP_WRDI 0x04 /* Write disable */ +#define SPINOR_OP_AAI_WP 0xad /* Auto address increment word program */ /* Used for Macronix and Winbond flashes. */ -#define OPCODE_EN4B 0xb7 /* Enter 4-byte mode */ -#define OPCODE_EX4B 0xe9 /* Exit 4-byte mode */ +#define SPINOR_OP_EN4B 0xb7 /* Enter 4-byte mode */ +#define SPINOR_OP_EX4B 0xe9 /* Exit 4-byte mode */ /* Used for Spansion flashes only. */ -#define OPCODE_BRWR 0x17 /* Bank register write */ +#define SPINOR_OP_BRWR 0x17 /* Bank register write */ /* Status Register bits. */ #define SR_WIP 1 /* Write in progress */ -- cgit v1.2.3-71-gd317 From 58b89a1f4c2a65b10b8f7b90b6ff2161b19bb0d1 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 8 Apr 2014 19:16:49 -0700 Subject: mtd: spi-nor: unify read opcode variants with ST SPI FSM serial_flash_cmds.h defines our opcodes a little differently. Let's borrow its naming, since it's borrowed from the SFDP standard, and it's more extensible. This prepares us for merging serial_flash_cmds.h and spi-nor.h opcode listing. Signed-off-by: Brian Norris Reviewed-by: Marek Vasut Acked-by: Huang Shijie --- drivers/mtd/spi-nor/fsl-quadspi.c | 6 +++--- drivers/mtd/spi-nor/spi-nor.c | 16 ++++++++-------- include/linux/mtd/spi-nor.h | 24 ++++++++++++++++-------- 3 files changed, 27 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c index 2977f026f39d..b41bbbc531ff 100644 --- a/drivers/mtd/spi-nor/fsl-quadspi.c +++ b/drivers/mtd/spi-nor/fsl-quadspi.c @@ -294,12 +294,12 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q) lut_base = SEQID_QUAD_READ * 4; if (q->nor_size <= SZ_16M) { - cmd = SPINOR_OP_QUAD_READ; + cmd = SPINOR_OP_READ_1_1_4; addrlen = ADDR24BIT; dummy = 8; } else { /* use the 4-byte address */ - cmd = SPINOR_OP_QUAD_READ; + cmd = SPINOR_OP_READ_1_1_4; addrlen = ADDR32BIT; dummy = 8; } @@ -388,7 +388,7 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q) static int fsl_qspi_get_seqid(struct fsl_qspi *q, u8 cmd) { switch (cmd) { - case SPINOR_OP_QUAD_READ: + case SPINOR_OP_READ_1_1_4: return SEQID_QUAD_READ; case SPINOR_OP_WREN: return SEQID_WREN; diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 1716f3ce9949..d6f44d527701 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -1014,16 +1014,16 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id, /* Default commands */ switch (nor->flash_read) { case SPI_NOR_QUAD: - nor->read_opcode = SPINOR_OP_QUAD_READ; + nor->read_opcode = SPINOR_OP_READ_1_1_4; break; case SPI_NOR_DUAL: - nor->read_opcode = SPINOR_OP_DUAL_READ; + nor->read_opcode = SPINOR_OP_READ_1_1_2; break; case SPI_NOR_FAST: - nor->read_opcode = SPINOR_OP_FAST_READ; + nor->read_opcode = SPINOR_OP_READ_FAST; break; case SPI_NOR_NORMAL: - nor->read_opcode = SPINOR_OP_NORM_READ; + nor->read_opcode = SPINOR_OP_READ; break; default: dev_err(dev, "No Read opcode defined\n"); @@ -1041,16 +1041,16 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id, /* Dedicated 4-byte command set */ switch (nor->flash_read) { case SPI_NOR_QUAD: - nor->read_opcode = SPINOR_OP_QUAD_READ_4B; + nor->read_opcode = SPINOR_OP_READ4_1_1_4; break; case SPI_NOR_DUAL: - nor->read_opcode = SPINOR_OP_DUAL_READ_4B; + nor->read_opcode = SPINOR_OP_READ4_1_1_2; break; case SPI_NOR_FAST: - nor->read_opcode = SPINOR_OP_FAST_READ_4B; + nor->read_opcode = SPINOR_OP_READ4_FAST; break; case SPI_NOR_NORMAL: - nor->read_opcode = SPINOR_OP_NORM_READ_4B; + nor->read_opcode = SPINOR_OP_READ4; break; } nor->program_opcode = SPINOR_OP_PP_4B; diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index f1fe1a6659a3..53241842a7ab 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -10,14 +10,22 @@ #ifndef __LINUX_MTD_SPI_NOR_H #define __LINUX_MTD_SPI_NOR_H +/* + * Note on opcode nomenclature: some opcodes have a format like + * SPINOR_OP_FUNCTION{4,}_x_y_z. The numbers x, y, and z stand for the number + * of I/O lines used for the opcode, address, and data (respectively). The + * FUNCTION has an optional suffix of '4', to represent an opcode which + * requires a 4-byte (32-bit) address. + */ + /* Flash opcodes. */ #define SPINOR_OP_WREN 0x06 /* Write enable */ #define SPINOR_OP_RDSR 0x05 /* Read status register */ #define SPINOR_OP_WRSR 0x01 /* Write status register 1 byte */ -#define SPINOR_OP_NORM_READ 0x03 /* Read data bytes (low frequency) */ -#define SPINOR_OP_FAST_READ 0x0b /* Read data bytes (high frequency) */ -#define SPINOR_OP_DUAL_READ 0x3b /* Read data bytes (Dual SPI) */ -#define SPINOR_OP_QUAD_READ 0x6b /* Read data bytes (Quad SPI) */ +#define SPINOR_OP_READ 0x03 /* Read data bytes (low frequency) */ +#define SPINOR_OP_READ_FAST 0x0b /* Read data bytes (high frequency) */ +#define SPINOR_OP_READ_1_1_2 0x3b /* Read data bytes (Dual SPI) */ +#define SPINOR_OP_READ_1_1_4 0x6b /* Read data bytes (Quad SPI) */ #define SPINOR_OP_PP 0x02 /* Page program (up to 256 bytes) */ #define SPINOR_OP_BE_4K 0x20 /* Erase 4KiB block */ #define SPINOR_OP_BE_4K_PMC 0xd7 /* Erase 4KiB block on PMC chips */ @@ -28,10 +36,10 @@ #define SPINOR_OP_RDCR 0x35 /* Read configuration register */ /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */ -#define SPINOR_OP_NORM_READ_4B 0x13 /* Read data bytes (low frequency) */ -#define SPINOR_OP_FAST_READ_4B 0x0c /* Read data bytes (high frequency) */ -#define SPINOR_OP_DUAL_READ_4B 0x3c /* Read data bytes (Dual SPI) */ -#define SPINOR_OP_QUAD_READ_4B 0x6c /* Read data bytes (Quad SPI) */ +#define SPINOR_OP_READ4 0x13 /* Read data bytes (low frequency) */ +#define SPINOR_OP_READ4_FAST 0x0c /* Read data bytes (high frequency) */ +#define SPINOR_OP_READ4_1_1_2 0x3c /* Read data bytes (Dual SPI) */ +#define SPINOR_OP_READ4_1_1_4 0x6c /* Read data bytes (Quad SPI) */ #define SPINOR_OP_PP_4B 0x12 /* Page program (up to 256 bytes) */ #define SPINOR_OP_SE_4B 0xdc /* Sector erase (usually 64KiB) */ -- cgit v1.2.3-71-gd317 From fdb9c293decf7e06795f7d9ae409df907c7ae1b6 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 15 Apr 2014 12:39:14 -0500 Subject: percpu: Replace __get_cpu_var with this_cpu_ptr __this_cpu_ptr is being phased out. Use raw_cpu_ptr instead which was introduced in 3.15-rc1. One case of using __get_cpu_var in the get_cpu_var macro for address calculation was remaining in include/linux/percpu.h. tj: Updated patch description. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/linux/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index e7a0b95ed527..539b3caa5748 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -29,7 +29,7 @@ */ #define get_cpu_var(var) (*({ \ preempt_disable(); \ - &__get_cpu_var(var); })) + this_cpu_ptr(&var); })) /* * The weird & is necessary because sparse considers (void)(var) to be -- cgit v1.2.3-71-gd317 From 79f7ae7c45a6ccf04e2908337461dee615f6afb0 Mon Sep 17 00:00:00 2001 From: Seungwon Jeon Date: Fri, 14 Mar 2014 21:11:56 +0900 Subject: mmc: clarify DDR timing mode between SD-UHS and eMMC This change distinguishes DDR timing mode of current mixed usage to clarify device type. Signed-off-by: Seungwon Jeon Acked-by: Jaehoon Chung Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- drivers/mmc/core/debugfs.c | 3 +++ drivers/mmc/core/mmc.c | 2 +- include/linux/mmc/host.h | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c index 54829c0ed000..509229b48b55 100644 --- a/drivers/mmc/core/debugfs.c +++ b/drivers/mmc/core/debugfs.c @@ -135,6 +135,9 @@ static int mmc_ios_show(struct seq_file *s, void *data) case MMC_TIMING_UHS_DDR50: str = "sd uhs DDR50"; break; + case MMC_TIMING_MMC_DDR52: + str = "mmc DDR52"; + break; case MMC_TIMING_MMC_HS200: str = "mmc high-speed SDR200"; break; diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 1ab5f3a0af5b..e22d8515ff97 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1264,7 +1264,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, goto err; } mmc_card_set_ddr_mode(card); - mmc_set_timing(card->host, MMC_TIMING_UHS_DDR50); + mmc_set_timing(card->host, MMC_TIMING_MMC_DDR52); mmc_set_bus_width(card->host, bus_width); } } diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index cb61ea4d6945..35354207e71f 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -58,7 +58,8 @@ struct mmc_ios { #define MMC_TIMING_UHS_SDR50 5 #define MMC_TIMING_UHS_SDR104 6 #define MMC_TIMING_UHS_DDR50 7 -#define MMC_TIMING_MMC_HS200 8 +#define MMC_TIMING_MMC_DDR52 8 +#define MMC_TIMING_MMC_HS200 9 #define MMC_SDR_MODE 0 #define MMC_1_2V_DDR_MODE 1 -- cgit v1.2.3-71-gd317 From af6b6967d6e17fe070c0fd1be364c34cbd31a523 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Wed, 16 Apr 2014 17:19:12 +0200 Subject: net: phy: export genphy_config_init() This enables other drivers to call this generic implementation, and then only do specific details on top of it. Signed-off-by: Daniel Mack Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 3 ++- include/linux/phy.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 0ce606624296..466ae3e06322 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1067,7 +1067,7 @@ int genphy_soft_reset(struct phy_device *phydev) } EXPORT_SYMBOL(genphy_soft_reset); -static int genphy_config_init(struct phy_device *phydev) +int genphy_config_init(struct phy_device *phydev) { int val; u32 features; @@ -1118,6 +1118,7 @@ static int gen10g_soft_reset(struct phy_device *phydev) /* Do nothing for now */ return 0; } +EXPORT_SYMBOL(genphy_config_init); static int gen10g_config_init(struct phy_device *phydev) { diff --git a/include/linux/phy.h b/include/linux/phy.h index 4d0221fd0688..51d15f684e7e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -666,6 +666,7 @@ static inline int phy_read_status(struct phy_device *phydev) return phydev->drv->read_status(phydev); } +int genphy_config_init(struct phy_device *phydev); int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); int genphy_config_aneg(struct phy_device *phydev); -- cgit v1.2.3-71-gd317 From a0265d28b3a5877b5b8edd14eb12a2ccb60ab1f3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 17 Apr 2014 13:45:03 +0800 Subject: net: Add __dev_forward_skb This patch adds the helper __dev_forward_skb which is identical to dev_forward_skb except that it doesn't actually inject the skb into the stack. This is useful where we wish to have finer control over how the packet is injected, e.g., via netif_rx_ni or netif_receive_skb. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + net/core/dev.c | 42 ++++++++++++++++++++++++------------------ 2 files changed, 25 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7ed3a3aa6604..a803d792df1e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2633,6 +2633,7 @@ int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_port_id *ppid); int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq); +int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb); diff --git a/net/core/dev.c b/net/core/dev.c index d2c8a06b3a98..11d70e3afefa 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1661,6 +1661,29 @@ bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(is_skb_forwardable); +int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) +{ + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + if (skb_copy_ubufs(skb, GFP_ATOMIC)) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + } + + if (unlikely(!is_skb_forwardable(dev, skb))) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + + skb_scrub_packet(skb, true); + skb->protocol = eth_type_trans(skb, dev); + + return 0; +} +EXPORT_SYMBOL_GPL(__dev_forward_skb); + /** * dev_forward_skb - loopback an skb to another netif * @@ -1681,24 +1704,7 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable); */ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) { - if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { - if (skb_copy_ubufs(skb, GFP_ATOMIC)) { - atomic_long_inc(&dev->rx_dropped); - kfree_skb(skb); - return NET_RX_DROP; - } - } - - if (unlikely(!is_skb_forwardable(dev, skb))) { - atomic_long_inc(&dev->rx_dropped); - kfree_skb(skb); - return NET_RX_DROP; - } - - skb_scrub_packet(skb, true); - skb->protocol = eth_type_trans(skb, dev); - - return netif_rx_internal(skb); + return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); -- cgit v1.2.3-71-gd317 From 599018a71013386119c057a64183e49240c8b4e6 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 17 Apr 2014 18:22:54 -0700 Subject: 6lowpan: add helper to get 6lowpan namespace This will simplify the new reassembly backport with no code changes being required. CC: Alexander Smirnov Cc: Dmitry Eremin-Solenikov Cc: linux-zigbee-devel@lists.sourceforge.net Cc: David S. Miller" Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Luis R. Rodriguez Signed-off-by: David S. Miller --- include/net/net_namespace.h | 15 +++++++++++++++ net/ieee802154/reassembly.c | 46 +++++++++++++++++++++++++++++---------------- 2 files changed, 45 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 5f9eb260990f..bc4118ede5b5 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -373,6 +373,21 @@ static inline void rt_genid_bump_ipv6(struct net *net) } #endif +#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN) +static inline struct netns_ieee802154_lowpan * +net_ieee802154_lowpan(struct net *net) +{ + return &net->ieee802154_lowpan; +} +#else +static inline struct netns_ieee802154_lowpan * +net_ieee802154_lowpan(struct net *net) +{ + return NULL; +} +#endif + + /* For callers who don't really care about whether it's IPv4 or IPv6 */ static inline void rt_genid_bump_all(struct net *net) { diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c index ef2d54372b13..132b65b6d957 100644 --- a/net/ieee802154/reassembly.c +++ b/net/ieee802154/reassembly.c @@ -120,6 +120,8 @@ fq_find(struct net *net, const struct lowpan_frag_info *frag_info, struct inet_frag_queue *q; struct lowpan_create_arg arg; unsigned int hash; + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); arg.tag = frag_info->d_tag; arg.d_size = frag_info->d_size; @@ -129,7 +131,7 @@ fq_find(struct net *net, const struct lowpan_frag_info *frag_info, read_lock(&lowpan_frags.lock); hash = lowpan_hash_frag(frag_info->d_tag, frag_info->d_size, src, dst); - q = inet_frag_find(&net->ieee802154_lowpan.frags, + q = inet_frag_find(&ieee802154_lowpan->frags, &lowpan_frags, &arg, hash); if (IS_ERR_OR_NULL(q)) { inet_frag_maybe_warn_overflow(q, pr_fmt()); @@ -357,6 +359,8 @@ int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type) struct net *net = dev_net(skb->dev); struct lowpan_frag_info *frag_info = lowpan_cb(skb); struct ieee802154_addr source, dest; + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); int err; source = mac_cb(skb)->source; @@ -366,10 +370,10 @@ int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type) if (err < 0) goto err; - if (frag_info->d_size > net->ieee802154_lowpan.max_dsize) + if (frag_info->d_size > ieee802154_lowpan->max_dsize) goto err; - inet_frag_evictor(&net->ieee802154_lowpan.frags, &lowpan_frags, false); + inet_frag_evictor(&ieee802154_lowpan->frags, &lowpan_frags, false); fq = fq_find(net, frag_info, &source, &dest); if (fq != NULL) { @@ -436,6 +440,8 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); table = lowpan_frags_ns_ctl_table; if (!net_eq(net, &init_net)) { @@ -444,10 +450,10 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net) if (table == NULL) goto err_alloc; - table[0].data = &net->ieee802154_lowpan.frags.high_thresh; - table[1].data = &net->ieee802154_lowpan.frags.low_thresh; - table[2].data = &net->ieee802154_lowpan.frags.timeout; - table[3].data = &net->ieee802154_lowpan.max_dsize; + table[0].data = &ieee802154_lowpan->frags.high_thresh; + table[1].data = &ieee802154_lowpan->frags.low_thresh; + table[2].data = &ieee802154_lowpan->frags.timeout; + table[3].data = &ieee802154_lowpan->max_dsize; /* Don't export sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) @@ -458,7 +464,7 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net) if (hdr == NULL) goto err_reg; - net->ieee802154_lowpan.sysctl.frags_hdr = hdr; + ieee802154_lowpan->sysctl.frags_hdr = hdr; return 0; err_reg: @@ -471,9 +477,11 @@ err_alloc: static void __net_exit lowpan_frags_ns_sysctl_unregister(struct net *net) { struct ctl_table *table; + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); - table = net->ieee802154_lowpan.sysctl.frags_hdr->ctl_table_arg; - unregister_net_sysctl_table(net->ieee802154_lowpan.sysctl.frags_hdr); + table = ieee802154_lowpan->sysctl.frags_hdr->ctl_table_arg; + unregister_net_sysctl_table(ieee802154_lowpan->sysctl.frags_hdr); if (!net_eq(net, &init_net)) kfree(table); } @@ -514,20 +522,26 @@ static inline void lowpan_frags_sysctl_unregister(void) static int __net_init lowpan_frags_init_net(struct net *net) { - net->ieee802154_lowpan.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; - net->ieee802154_lowpan.frags.low_thresh = IPV6_FRAG_LOW_THRESH; - net->ieee802154_lowpan.frags.timeout = IPV6_FRAG_TIMEOUT; - net->ieee802154_lowpan.max_dsize = 0xFFFF; + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); - inet_frags_init_net(&net->ieee802154_lowpan.frags); + ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH; + ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH; + ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT; + ieee802154_lowpan->max_dsize = 0xFFFF; + + inet_frags_init_net(&ieee802154_lowpan->frags); return lowpan_frags_ns_sysctl_register(net); } static void __net_exit lowpan_frags_exit_net(struct net *net) { + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); + lowpan_frags_ns_sysctl_unregister(net); - inet_frags_exit_net(&net->ieee802154_lowpan.frags, &lowpan_frags); + inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags); } static struct pernet_operations lowpan_frags_ops = { -- cgit v1.2.3-71-gd317 From 17d8ecb8ff791359c9d9a44bc766c3d4b87f37f7 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 17 Apr 2014 18:22:56 -0700 Subject: 6lowpan: include net/net_namespace.h on 6lowpan namepsace header Don't rely on driver files or other headers having this file included. CC: Alexander Smirnov Cc: Dmitry Eremin-Solenikov Cc: linux-zigbee-devel@lists.sourceforge.net Signed-off-by: Luis R. Rodriguez Signed-off-by: David S. Miller --- include/net/6lowpan.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h index f7d372b7d4ff..79b530fb2c4d 100644 --- a/include/net/6lowpan.h +++ b/include/net/6lowpan.h @@ -54,6 +54,7 @@ #define __6LOWPAN_H__ #include +#include #define UIP_802154_SHORTADDR_LEN 2 /* compressed ipv6 address length */ #define UIP_IPH_LEN 40 /* ipv6 fixed header size */ -- cgit v1.2.3-71-gd317 From 86fd14ad1e8c4b8f5e9a7a27b26bdade91dd4bd0 Mon Sep 17 00:00:00 2001 From: Weiping Pan Date: Fri, 18 Apr 2014 12:27:46 +0800 Subject: tcp: make tcp_cwnd_application_limited() static Make tcp_cwnd_application_limited() static and move it from tcp_input.c to tcp_output.c Signed-off-by: Weiping Pan Signed-off-by: David S. Miller --- include/net/tcp.h | 1 - net/ipv4/tcp_input.c | 22 ---------------------- net/ipv4/tcp_output.c | 22 ++++++++++++++++++++++ 3 files changed, 22 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 87d877408188..163d2b467d78 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -558,7 +558,6 @@ void tcp_send_loss_probe(struct sock *sk); bool tcp_schedule_loss_probe(struct sock *sk); /* tcp_input.c */ -void tcp_cwnd_application_limited(struct sock *sk); void tcp_resume_early_retransmit(struct sock *sk); void tcp_rearm_rto(struct sock *sk); void tcp_reset(struct sock *sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d6b46eb2f94c..6efed134ab63 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4703,28 +4703,6 @@ static int tcp_prune_queue(struct sock *sk) return -1; } -/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto. - * As additional protections, we do not touch cwnd in retransmission phases, - * and if application hit its sndbuf limit recently. - */ -void tcp_cwnd_application_limited(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open && - sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - /* Limited by application or receiver window. */ - u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk)); - u32 win_used = max(tp->snd_cwnd_used, init_win); - if (win_used < tp->snd_cwnd) { - tp->snd_ssthresh = tcp_current_ssthresh(sk); - tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; - } - tp->snd_cwnd_used = 0; - } - tp->snd_cwnd_stamp = tcp_time_stamp; -} - static bool tcp_should_expand_sndbuf(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 025e25093984..29dde97c3c41 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1387,6 +1387,28 @@ unsigned int tcp_current_mss(struct sock *sk) return mss_now; } +/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto. + * As additional protections, we do not touch cwnd in retransmission phases, + * and if application hit its sndbuf limit recently. + */ +static void tcp_cwnd_application_limited(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open && + sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { + /* Limited by application or receiver window. */ + u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk)); + u32 win_used = max(tp->snd_cwnd_used, init_win); + if (win_used < tp->snd_cwnd) { + tp->snd_ssthresh = tcp_current_ssthresh(sk); + tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; + } + tp->snd_cwnd_used = 0; + } + tp->snd_cwnd_stamp = tcp_time_stamp; +} + /* Congestion window validation. (RFC2861) */ static void tcp_cwnd_validate(struct sock *sk) { -- cgit v1.2.3-71-gd317 From 4104d326b670c2b66f575d2004daa28b2d1b4c8d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 10 Jan 2014 17:01:58 -0500 Subject: ftrace: Remove global function list and call function directly Instead of having a list of global functions that are called, as only one global function is allow to be enabled at a time, there's no reason to have a list. Instead, simply have all the users of the global ops, use the global ops directly, instead of registering their own ftrace_ops. Just switch what function is used before enabling the function tracer. This removes a lot of code as well as the complexity involved with it. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 20 ++--- kernel/trace/ftrace.c | 152 +++++++++++--------------------------- kernel/trace/trace.c | 2 + kernel/trace/trace.h | 19 +++-- kernel/trace/trace_functions.c | 55 +++++--------- kernel/trace/trace_irqsoff.c | 33 ++++----- kernel/trace/trace_sched_wakeup.c | 40 +++++----- kernel/trace/trace_selftest.c | 33 +++++---- 8 files changed, 133 insertions(+), 221 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 9212b017bc72..f0ff2c2453e7 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -62,9 +62,6 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, * set in the flags member. * * ENABLED - set/unset when ftrace_ops is registered/unregistered - * GLOBAL - set manualy by ftrace_ops user to denote the ftrace_ops - * is part of the global tracers sharing the same filter - * via set_ftrace_* debugfs files. * DYNAMIC - set when ftrace_ops is registered to denote dynamically * allocated ftrace_ops which need special care * CONTROL - set manualy by ftrace_ops user to denote the ftrace_ops @@ -96,15 +93,14 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, */ enum { FTRACE_OPS_FL_ENABLED = 1 << 0, - FTRACE_OPS_FL_GLOBAL = 1 << 1, - FTRACE_OPS_FL_DYNAMIC = 1 << 2, - FTRACE_OPS_FL_CONTROL = 1 << 3, - FTRACE_OPS_FL_SAVE_REGS = 1 << 4, - FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = 1 << 5, - FTRACE_OPS_FL_RECURSION_SAFE = 1 << 6, - FTRACE_OPS_FL_STUB = 1 << 7, - FTRACE_OPS_FL_INITIALIZED = 1 << 8, - FTRACE_OPS_FL_DELETED = 1 << 9, + FTRACE_OPS_FL_DYNAMIC = 1 << 1, + FTRACE_OPS_FL_CONTROL = 1 << 2, + FTRACE_OPS_FL_SAVE_REGS = 1 << 3, + FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = 1 << 4, + FTRACE_OPS_FL_RECURSION_SAFE = 1 << 5, + FTRACE_OPS_FL_STUB = 1 << 6, + FTRACE_OPS_FL_INITIALIZED = 1 << 7, + FTRACE_OPS_FL_DELETED = 1 << 8, }; /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1fd4b9479210..8f61ef70a297 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -62,7 +62,7 @@ #define FTRACE_HASH_DEFAULT_BITS 10 #define FTRACE_HASH_MAX_BITS 12 -#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL) +#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_CONTROL) #ifdef CONFIG_DYNAMIC_FTRACE #define INIT_REGEX_LOCK(opsname) \ @@ -103,7 +103,6 @@ static int ftrace_disabled __read_mostly; static DEFINE_MUTEX(ftrace_lock); -static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end; static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end; static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; @@ -171,23 +170,6 @@ int ftrace_nr_registered_ops(void) return cnt; } -static void -ftrace_global_list_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs) -{ - int bit; - - bit = trace_test_and_set_recursion(TRACE_GLOBAL_START, TRACE_GLOBAL_MAX); - if (bit < 0) - return; - - do_for_each_ftrace_op(op, ftrace_global_list) { - op->func(ip, parent_ip, op, regs); - } while_for_each_ftrace_op(op); - - trace_clear_recursion(bit); -} - static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *regs) { @@ -237,43 +219,6 @@ static int control_ops_alloc(struct ftrace_ops *ops) return 0; } -static void update_global_ops(void) -{ - ftrace_func_t func = ftrace_global_list_func; - void *private = NULL; - - /* The list has its own recursion protection. */ - global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE; - - /* - * If there's only one function registered, then call that - * function directly. Otherwise, we need to iterate over the - * registered callers. - */ - if (ftrace_global_list == &ftrace_list_end || - ftrace_global_list->next == &ftrace_list_end) { - func = ftrace_global_list->func; - private = ftrace_global_list->private; - /* - * As we are calling the function directly. - * If it does not have recursion protection, - * the function_trace_op needs to be updated - * accordingly. - */ - if (!(ftrace_global_list->flags & FTRACE_OPS_FL_RECURSION_SAFE)) - global_ops.flags &= ~FTRACE_OPS_FL_RECURSION_SAFE; - } - - /* If we filter on pids, update to use the pid function */ - if (!list_empty(&ftrace_pids)) { - set_ftrace_pid_function(func); - func = ftrace_pid_func; - } - - global_ops.func = func; - global_ops.private = private; -} - static void ftrace_sync(struct work_struct *work) { /* @@ -301,8 +246,6 @@ static void update_ftrace_function(void) { ftrace_func_t func; - update_global_ops(); - /* * If we are at the end of the list and this ops is * recursion safe and not dynamic and the arch supports passing ops, @@ -314,10 +257,7 @@ static void update_ftrace_function(void) (ftrace_ops_list->flags & FTRACE_OPS_FL_RECURSION_SAFE) && !FTRACE_FORCE_LIST_FUNC)) { /* Set the ftrace_ops that the arch callback uses */ - if (ftrace_ops_list == &global_ops) - set_function_trace_op = ftrace_global_list; - else - set_function_trace_op = ftrace_ops_list; + set_function_trace_op = ftrace_ops_list; func = ftrace_ops_list->func; } else { /* Just use the default ftrace_ops */ @@ -434,16 +374,9 @@ static int __register_ftrace_function(struct ftrace_ops *ops) if (ops->flags & FTRACE_OPS_FL_DELETED) return -EINVAL; - if (FTRACE_WARN_ON(ops == &global_ops)) - return -EINVAL; - if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED)) return -EBUSY; - /* We don't support both control and global flags set. */ - if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK) - return -EINVAL; - #ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS /* * If the ftrace_ops specifies SAVE_REGS, then it only can be used @@ -461,10 +394,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops) if (!core_kernel_data((unsigned long)ops)) ops->flags |= FTRACE_OPS_FL_DYNAMIC; - if (ops->flags & FTRACE_OPS_FL_GLOBAL) { - add_ftrace_list_ops(&ftrace_global_list, &global_ops, ops); - ops->flags |= FTRACE_OPS_FL_ENABLED; - } else if (ops->flags & FTRACE_OPS_FL_CONTROL) { + if (ops->flags & FTRACE_OPS_FL_CONTROL) { if (control_ops_alloc(ops)) return -ENOMEM; add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops); @@ -484,15 +414,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED))) return -EBUSY; - if (FTRACE_WARN_ON(ops == &global_ops)) - return -EINVAL; - - if (ops->flags & FTRACE_OPS_FL_GLOBAL) { - ret = remove_ftrace_list_ops(&ftrace_global_list, - &global_ops, ops); - if (!ret) - ops->flags &= ~FTRACE_OPS_FL_ENABLED; - } else if (ops->flags & FTRACE_OPS_FL_CONTROL) { + if (ops->flags & FTRACE_OPS_FL_CONTROL) { ret = remove_ftrace_list_ops(&ftrace_control_list, &control_ops, ops); } else @@ -2128,15 +2050,6 @@ static int ftrace_startup(struct ftrace_ops *ops, int command) ftrace_start_up++; command |= FTRACE_UPDATE_CALLS; - /* ops marked global share the filter hashes */ - if (ops->flags & FTRACE_OPS_FL_GLOBAL) { - ops = &global_ops; - /* Don't update hash if global is already set */ - if (global_start_up) - hash_enable = false; - global_start_up++; - } - ops->flags |= FTRACE_OPS_FL_ENABLED; if (hash_enable) ftrace_hash_rec_enable(ops, 1); @@ -2166,21 +2079,10 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) */ WARN_ON_ONCE(ftrace_start_up < 0); - if (ops->flags & FTRACE_OPS_FL_GLOBAL) { - ops = &global_ops; - global_start_up--; - WARN_ON_ONCE(global_start_up < 0); - /* Don't update hash if global still has users */ - if (global_start_up) { - WARN_ON_ONCE(!ftrace_start_up); - hash_disable = false; - } - } - if (hash_disable) ftrace_hash_rec_disable(ops, 1); - if (ops != &global_ops || !global_start_up) + if (!global_start_up) ops->flags &= ~FTRACE_OPS_FL_ENABLED; command |= FTRACE_UPDATE_CALLS; @@ -3524,10 +3426,6 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, struct ftrace_hash *hash; int ret; - /* All global ops uses the global ops filters */ - if (ops->flags & FTRACE_OPS_FL_GLOBAL) - ops = &global_ops; - if (unlikely(ftrace_disabled)) return -ENODEV; @@ -4462,6 +4360,34 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) #endif /* CONFIG_DYNAMIC_FTRACE */ +__init void ftrace_init_global_array_ops(struct trace_array *tr) +{ + tr->ops = &global_ops; + tr->ops->private = tr; +} + +void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func) +{ + /* If we filter on pids, update to use the pid function */ + if (tr->flags & TRACE_ARRAY_FL_GLOBAL) { + if (WARN_ON(tr->ops->func != ftrace_stub)) + printk("ftrace ops had %pS for function\n", + tr->ops->func); + /* Only the top level instance does pid tracing */ + if (!list_empty(&ftrace_pids)) { + set_ftrace_pid_function(func); + func = ftrace_pid_func; + } + } + tr->ops->func = func; + tr->ops->private = tr; +} + +void ftrace_reset_array_ops(struct trace_array *tr) +{ + tr->ops->func = ftrace_stub; +} + static void ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *regs) @@ -4520,9 +4446,16 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, */ preempt_disable_notrace(); do_for_each_ftrace_op(op, ftrace_ops_list) { - if (ftrace_ops_test(op, ip, regs)) + if (ftrace_ops_test(op, ip, regs)) { + if (WARN_ON(!op->func)) { + function_trace_stop = 1; + printk("op=%p %pS\n", op, op); + goto out; + } op->func(ip, parent_ip, op, regs); + } } while_for_each_ftrace_op(op); +out: preempt_enable_notrace(); trace_clear_recursion(bit); } @@ -5076,8 +5009,7 @@ ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state, /* Just a place holder for function graph */ static struct ftrace_ops fgraph_ops __read_mostly = { .func = ftrace_stub, - .flags = FTRACE_OPS_FL_STUB | FTRACE_OPS_FL_GLOBAL | - FTRACE_OPS_FL_RECURSION_SAFE, + .flags = FTRACE_OPS_FL_STUB | FTRACE_OPS_FL_RECURSION_SAFE, }; static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 737b0efa1a62..fdd33aacdf05 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6629,6 +6629,8 @@ __init static int tracer_alloc_buffers(void) */ global_trace.current_trace = &nop_trace; + ftrace_init_global_array_ops(&global_trace); + register_tracer(&nop_trace); /* All seems OK, enable tracing */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2e29d7ba5a52..df5256be64cd 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -416,13 +416,7 @@ enum { TRACE_FTRACE_IRQ_BIT, TRACE_FTRACE_SIRQ_BIT, - /* GLOBAL_BITs must be greater than FTRACE_BITs */ - TRACE_GLOBAL_BIT, - TRACE_GLOBAL_NMI_BIT, - TRACE_GLOBAL_IRQ_BIT, - TRACE_GLOBAL_SIRQ_BIT, - - /* INTERNAL_BITs must be greater than GLOBAL_BITs */ + /* INTERNAL_BITs must be greater than FTRACE_BITs */ TRACE_INTERNAL_BIT, TRACE_INTERNAL_NMI_BIT, TRACE_INTERNAL_IRQ_BIT, @@ -449,9 +443,6 @@ enum { #define TRACE_FTRACE_START TRACE_FTRACE_BIT #define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1) -#define TRACE_GLOBAL_START TRACE_GLOBAL_BIT -#define TRACE_GLOBAL_MAX ((1 << (TRACE_GLOBAL_START + TRACE_CONTEXT_BITS)) - 1) - #define TRACE_LIST_START TRACE_INTERNAL_BIT #define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) @@ -823,6 +814,9 @@ extern int ftrace_is_dead(void); int ftrace_create_function_files(struct trace_array *tr, struct dentry *parent); void ftrace_destroy_function_files(struct trace_array *tr); +void ftrace_init_global_array_ops(struct trace_array *tr); +void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func); +void ftrace_reset_array_ops(struct trace_array *tr); #else static inline int ftrace_trace_task(struct task_struct *task) { @@ -836,6 +830,11 @@ ftrace_create_function_files(struct trace_array *tr, return 0; } static inline void ftrace_destroy_function_files(struct trace_array *tr) { } +static inline __init void +ftrace_init_global_array_ops(struct trace_array *tr) { } +static inline void ftrace_reset_array_ops(struct trace_array *tr) { } +/* ftace_func_t type is not defined, use macro instead of static inline */ +#define ftrace_init_array_ops(tr, func) do { } while (0) #endif /* CONFIG_FUNCTION_TRACER */ #if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE) diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index ffd56351b521..2d9482b8f26a 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -26,8 +26,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, static void function_stack_trace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *pt_regs); -static struct ftrace_ops trace_ops; -static struct ftrace_ops trace_stack_ops; static struct tracer_flags func_flags; /* Our option */ @@ -83,28 +81,24 @@ void ftrace_destroy_function_files(struct trace_array *tr) static int function_trace_init(struct trace_array *tr) { - struct ftrace_ops *ops; - - if (tr->flags & TRACE_ARRAY_FL_GLOBAL) { - /* There's only one global tr */ - if (!trace_ops.private) { - trace_ops.private = tr; - trace_stack_ops.private = tr; - } + ftrace_func_t func; - if (func_flags.val & TRACE_FUNC_OPT_STACK) - ops = &trace_stack_ops; - else - ops = &trace_ops; - tr->ops = ops; - } else if (!tr->ops) { - /* - * Instance trace_arrays get their ops allocated - * at instance creation. Unless it failed - * the allocation. - */ + /* + * Instance trace_arrays get their ops allocated + * at instance creation. Unless it failed + * the allocation. + */ + if (!tr->ops) return -ENOMEM; - } + + /* Currently only the global instance can do stack tracing */ + if (tr->flags & TRACE_ARRAY_FL_GLOBAL && + func_flags.val & TRACE_FUNC_OPT_STACK) + func = function_stack_trace_call; + else + func = function_trace_call; + + ftrace_init_array_ops(tr, func); tr->trace_buffer.cpu = get_cpu(); put_cpu(); @@ -118,6 +112,7 @@ static void function_trace_reset(struct trace_array *tr) { tracing_stop_function_trace(tr); tracing_stop_cmdline_record(); + ftrace_reset_array_ops(tr); } static void function_trace_start(struct trace_array *tr) @@ -199,18 +194,6 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip, local_irq_restore(flags); } -static struct ftrace_ops trace_ops __read_mostly = -{ - .func = function_trace_call, - .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, -}; - -static struct ftrace_ops trace_stack_ops __read_mostly = -{ - .func = function_stack_trace_call, - .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, -}; - static struct tracer_opt func_opts[] = { #ifdef CONFIG_STACKTRACE { TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) }, @@ -248,10 +231,10 @@ func_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) unregister_ftrace_function(tr->ops); if (set) { - tr->ops = &trace_stack_ops; + tr->ops->func = function_stack_trace_call; register_ftrace_function(tr->ops); } else { - tr->ops = &trace_ops; + tr->ops->func = function_trace_call; register_ftrace_function(tr->ops); } diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 8ff02cbb892f..b5cb047df3e9 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -151,12 +151,6 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip, atomic_dec(&data->disabled); } - -static struct ftrace_ops trace_ops __read_mostly = -{ - .func = irqsoff_tracer_call, - .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, -}; #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -531,7 +525,7 @@ void trace_preempt_off(unsigned long a0, unsigned long a1) } #endif /* CONFIG_PREEMPT_TRACER */ -static int register_irqsoff_function(int graph, int set) +static int register_irqsoff_function(struct trace_array *tr, int graph, int set) { int ret; @@ -543,7 +537,7 @@ static int register_irqsoff_function(int graph, int set) ret = register_ftrace_graph(&irqsoff_graph_return, &irqsoff_graph_entry); else - ret = register_ftrace_function(&trace_ops); + ret = register_ftrace_function(tr->ops); if (!ret) function_enabled = true; @@ -551,7 +545,7 @@ static int register_irqsoff_function(int graph, int set) return ret; } -static void unregister_irqsoff_function(int graph) +static void unregister_irqsoff_function(struct trace_array *tr, int graph) { if (!function_enabled) return; @@ -559,17 +553,17 @@ static void unregister_irqsoff_function(int graph) if (graph) unregister_ftrace_graph(); else - unregister_ftrace_function(&trace_ops); + unregister_ftrace_function(tr->ops); function_enabled = false; } -static void irqsoff_function_set(int set) +static void irqsoff_function_set(struct trace_array *tr, int set) { if (set) - register_irqsoff_function(is_graph(), 1); + register_irqsoff_function(tr, is_graph(), 1); else - unregister_irqsoff_function(is_graph()); + unregister_irqsoff_function(tr, is_graph()); } static int irqsoff_flag_changed(struct trace_array *tr, u32 mask, int set) @@ -577,7 +571,7 @@ static int irqsoff_flag_changed(struct trace_array *tr, u32 mask, int set) struct tracer *tracer = tr->current_trace; if (mask & TRACE_ITER_FUNCTION) - irqsoff_function_set(set); + irqsoff_function_set(tr, set); return trace_keep_overwrite(tracer, mask, set); } @@ -586,7 +580,7 @@ static int start_irqsoff_tracer(struct trace_array *tr, int graph) { int ret; - ret = register_irqsoff_function(graph, 0); + ret = register_irqsoff_function(tr, graph, 0); if (!ret && tracing_is_enabled()) tracer_enabled = 1; @@ -600,7 +594,7 @@ static void stop_irqsoff_tracer(struct trace_array *tr, int graph) { tracer_enabled = 0; - unregister_irqsoff_function(graph); + unregister_irqsoff_function(tr, graph); } static void __irqsoff_tracer_init(struct trace_array *tr) @@ -617,7 +611,11 @@ static void __irqsoff_tracer_init(struct trace_array *tr) smp_wmb(); tracing_reset_online_cpus(&tr->trace_buffer); - if (start_irqsoff_tracer(tr, is_graph())) + ftrace_init_array_ops(tr, irqsoff_tracer_call); + + /* Only toplevel instance supports graph tracing */ + if (start_irqsoff_tracer(tr, (tr->flags & TRACE_ARRAY_FL_GLOBAL && + is_graph()))) printk(KERN_ERR "failed to start irqsoff tracer\n"); } @@ -630,6 +628,7 @@ static void irqsoff_tracer_reset(struct trace_array *tr) set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag); set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag); + ftrace_reset_array_ops(tr); } static void irqsoff_tracer_start(struct trace_array *tr) diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index e14da5e97a69..4dd986defa60 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -130,15 +130,9 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip, atomic_dec(&data->disabled); preempt_enable_notrace(); } - -static struct ftrace_ops trace_ops __read_mostly = -{ - .func = wakeup_tracer_call, - .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, -}; #endif /* CONFIG_FUNCTION_TRACER */ -static int register_wakeup_function(int graph, int set) +static int register_wakeup_function(struct trace_array *tr, int graph, int set) { int ret; @@ -150,7 +144,7 @@ static int register_wakeup_function(int graph, int set) ret = register_ftrace_graph(&wakeup_graph_return, &wakeup_graph_entry); else - ret = register_ftrace_function(&trace_ops); + ret = register_ftrace_function(tr->ops); if (!ret) function_enabled = true; @@ -158,7 +152,7 @@ static int register_wakeup_function(int graph, int set) return ret; } -static void unregister_wakeup_function(int graph) +static void unregister_wakeup_function(struct trace_array *tr, int graph) { if (!function_enabled) return; @@ -166,17 +160,17 @@ static void unregister_wakeup_function(int graph) if (graph) unregister_ftrace_graph(); else - unregister_ftrace_function(&trace_ops); + unregister_ftrace_function(tr->ops); function_enabled = false; } -static void wakeup_function_set(int set) +static void wakeup_function_set(struct trace_array *tr, int set) { if (set) - register_wakeup_function(is_graph(), 1); + register_wakeup_function(tr, is_graph(), 1); else - unregister_wakeup_function(is_graph()); + unregister_wakeup_function(tr, is_graph()); } static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set) @@ -184,16 +178,16 @@ static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set) struct tracer *tracer = tr->current_trace; if (mask & TRACE_ITER_FUNCTION) - wakeup_function_set(set); + wakeup_function_set(tr, set); return trace_keep_overwrite(tracer, mask, set); } -static int start_func_tracer(int graph) +static int start_func_tracer(struct trace_array *tr, int graph) { int ret; - ret = register_wakeup_function(graph, 0); + ret = register_wakeup_function(tr, graph, 0); if (!ret && tracing_is_enabled()) tracer_enabled = 1; @@ -203,11 +197,11 @@ static int start_func_tracer(int graph) return ret; } -static void stop_func_tracer(int graph) +static void stop_func_tracer(struct trace_array *tr, int graph) { tracer_enabled = 0; - unregister_wakeup_function(graph); + unregister_wakeup_function(tr, graph); } #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -221,12 +215,12 @@ wakeup_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) if (!(is_graph() ^ set)) return 0; - stop_func_tracer(!set); + stop_func_tracer(tr, !set); wakeup_reset(wakeup_trace); tracing_max_latency = 0; - return start_func_tracer(set); + return start_func_tracer(tr, set); } static int wakeup_graph_entry(struct ftrace_graph_ent *trace) @@ -587,7 +581,7 @@ static void start_wakeup_tracer(struct trace_array *tr) */ smp_wmb(); - if (start_func_tracer(is_graph())) + if (start_func_tracer(tr, is_graph())) printk(KERN_ERR "failed to start wakeup tracer\n"); return; @@ -600,7 +594,7 @@ fail_deprobe: static void stop_wakeup_tracer(struct trace_array *tr) { tracer_enabled = 0; - stop_func_tracer(is_graph()); + stop_func_tracer(tr, is_graph()); unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL); unregister_trace_sched_wakeup_new(probe_wakeup, NULL); unregister_trace_sched_wakeup(probe_wakeup, NULL); @@ -617,6 +611,7 @@ static int __wakeup_tracer_init(struct trace_array *tr) tracing_max_latency = 0; wakeup_trace = tr; + ftrace_init_array_ops(tr, wakeup_tracer_call); start_wakeup_tracer(tr); return 0; } @@ -653,6 +648,7 @@ static void wakeup_tracer_reset(struct trace_array *tr) set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag); set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag); + ftrace_reset_array_ops(tr); } static void wakeup_tracer_start(struct trace_array *tr) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index e98fca60974f..519d04affe38 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -161,11 +161,6 @@ static struct ftrace_ops test_probe3 = { .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; -static struct ftrace_ops test_global = { - .func = trace_selftest_test_global_func, - .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, -}; - static void print_counts(void) { printk("(%d %d %d %d %d) ", @@ -185,7 +180,7 @@ static void reset_counts(void) trace_selftest_test_dyn_cnt = 0; } -static int trace_selftest_ops(int cnt) +static int trace_selftest_ops(struct trace_array *tr, int cnt) { int save_ftrace_enabled = ftrace_enabled; struct ftrace_ops *dyn_ops; @@ -220,7 +215,11 @@ static int trace_selftest_ops(int cnt) register_ftrace_function(&test_probe1); register_ftrace_function(&test_probe2); register_ftrace_function(&test_probe3); - register_ftrace_function(&test_global); + /* First time we are running with main function */ + if (cnt > 1) { + ftrace_init_array_ops(tr, trace_selftest_test_global_func); + register_ftrace_function(tr->ops); + } DYN_FTRACE_TEST_NAME(); @@ -232,8 +231,10 @@ static int trace_selftest_ops(int cnt) goto out; if (trace_selftest_test_probe3_cnt != 1) goto out; - if (trace_selftest_test_global_cnt == 0) - goto out; + if (cnt > 1) { + if (trace_selftest_test_global_cnt == 0) + goto out; + } DYN_FTRACE_TEST_NAME2(); @@ -269,8 +270,10 @@ static int trace_selftest_ops(int cnt) goto out_free; if (trace_selftest_test_probe3_cnt != 3) goto out_free; - if (trace_selftest_test_global_cnt == 0) - goto out; + if (cnt > 1) { + if (trace_selftest_test_global_cnt == 0) + goto out; + } if (trace_selftest_test_dyn_cnt == 0) goto out_free; @@ -295,7 +298,9 @@ static int trace_selftest_ops(int cnt) unregister_ftrace_function(&test_probe1); unregister_ftrace_function(&test_probe2); unregister_ftrace_function(&test_probe3); - unregister_ftrace_function(&test_global); + if (cnt > 1) + unregister_ftrace_function(tr->ops); + ftrace_reset_array_ops(tr); /* Make sure everything is off */ reset_counts(); @@ -388,7 +393,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, } /* Test the ops with global tracing running */ - ret = trace_selftest_ops(1); + ret = trace_selftest_ops(tr, 1); trace->reset(tr); out: @@ -399,7 +404,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, /* Test the ops with global tracing off */ if (!ret) - ret = trace_selftest_ops(2); + ret = trace_selftest_ops(tr, 2); return ret; } -- cgit v1.2.3-71-gd317 From 68957303f44a501af5cf37913208a2acaa6bcdf1 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Tue, 25 Mar 2014 06:51:47 +0100 Subject: NFC: ST21NFCA: Add driver for STMicroelectronics ST21NFCA NFC Chip Add driver for STMicroelectronics ST21NFCA NFC controller. ST21NFCA is using HCI protocol, shdlc as LLC layer & I2C as communication protocol. Adding support for Reader/Writer mode with Tag type 1/2/3/4 A & B. It is using proprietary gate 15 for ISO14443-3 such as type 1 & type 2 tags. It is using proprietary gate 14 for type F tags. ST21NFCA_DEVICE_MGNT_GATE gives access to proprietary CLF configuration. Standard gate for ISO14443-4 A (13) & B (11) are also used. ST21NFCA specific mecanism: One particular point to notice for the data handling is that frame does not contain any length value. Therefore the i2c part of this driver is managing the reception with a read length sequence until the end of frame (0x7e) is reached. In order to avoid conflict between sof & eof a mecanism called byte stuffing concist of an escape byte (0x7d) insertion before special byte (0x7e, 0x7d). The special byte is then xored with 0x20. In this driver, When data are available in the CLF, the interrupt gpio is driven to active state and triggered an interrupt. Once the i2c_master_recv start, the interrupt gpio is driven to idle state until its complete. If the frame is incomplete or data are still available, interrupts will be triggered again. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- drivers/nfc/Kconfig | 1 + drivers/nfc/Makefile | 1 + drivers/nfc/st21nfca/Kconfig | 23 ++ drivers/nfc/st21nfca/Makefile | 8 + drivers/nfc/st21nfca/i2c.c | 595 +++++++++++++++++++++++++++++++++ drivers/nfc/st21nfca/st21nfca.c | 506 ++++++++++++++++++++++++++++ drivers/nfc/st21nfca/st21nfca.h | 87 +++++ include/linux/platform_data/st21nfca.h | 32 ++ 8 files changed, 1253 insertions(+) create mode 100644 drivers/nfc/st21nfca/Kconfig create mode 100644 drivers/nfc/st21nfca/Makefile create mode 100644 drivers/nfc/st21nfca/i2c.c create mode 100644 drivers/nfc/st21nfca/st21nfca.c create mode 100644 drivers/nfc/st21nfca/st21nfca.h create mode 100644 include/linux/platform_data/st21nfca.h (limited to 'include') diff --git a/drivers/nfc/Kconfig b/drivers/nfc/Kconfig index 65d4ca19d132..26c66a126551 100644 --- a/drivers/nfc/Kconfig +++ b/drivers/nfc/Kconfig @@ -71,5 +71,6 @@ config NFC_PORT100 source "drivers/nfc/pn544/Kconfig" source "drivers/nfc/microread/Kconfig" source "drivers/nfc/nfcmrvl/Kconfig" +source "drivers/nfc/st21nfca/Kconfig" endmenu diff --git a/drivers/nfc/Makefile b/drivers/nfc/Makefile index ae42a3fa60c9..23225b0287fd 100644 --- a/drivers/nfc/Makefile +++ b/drivers/nfc/Makefile @@ -11,5 +11,6 @@ obj-$(CONFIG_NFC_SIM) += nfcsim.o obj-$(CONFIG_NFC_PORT100) += port100.o obj-$(CONFIG_NFC_MRVL) += nfcmrvl/ obj-$(CONFIG_NFC_TRF7970A) += trf7970a.o +obj-$(CONFIG_NFC_ST21NFCA) += st21nfca/ ccflags-$(CONFIG_NFC_DEBUG) := -DDEBUG diff --git a/drivers/nfc/st21nfca/Kconfig b/drivers/nfc/st21nfca/Kconfig new file mode 100644 index 000000000000..ee459f066ade --- /dev/null +++ b/drivers/nfc/st21nfca/Kconfig @@ -0,0 +1,23 @@ +config NFC_ST21NFCA + tristate "STMicroelectronics ST21NFCA NFC driver" + depends on NFC_HCI + select CRC_CCITT + default n + ---help--- + STMicroelectronics ST21NFCA core driver. It implements the chipset + HCI logic and hooks into the NFC kernel APIs. Physical layers will + register against it. + + To compile this driver as a module, choose m here. The module will + be called st21nfca. + Say N if unsure. + +config NFC_ST21NFCA_I2C + tristate "NFC ST21NFCA i2c support" + depends on NFC_ST21NFCA && I2C && NFC_SHDLC + ---help--- + This module adds support for the STMicroelectronics st21nfca i2c interface. + Select this if your platform is using the i2c bus. + + If you choose to build a module, it'll be called st21nfca_i2c. + Say N if unsure. diff --git a/drivers/nfc/st21nfca/Makefile b/drivers/nfc/st21nfca/Makefile new file mode 100644 index 000000000000..038ed093a119 --- /dev/null +++ b/drivers/nfc/st21nfca/Makefile @@ -0,0 +1,8 @@ +# +# Makefile for ST21NFCA HCI based NFC driver +# + +st21nfca_i2c-objs = i2c.o + +obj-$(CONFIG_NFC_ST21NFCA) += st21nfca.o +obj-$(CONFIG_NFC_ST21NFCA_I2C) += st21nfca_i2c.o diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c new file mode 100644 index 000000000000..3b0fd0f76d1c --- /dev/null +++ b/drivers/nfc/st21nfca/i2c.c @@ -0,0 +1,595 @@ +/* + * I2C Link Layer for ST21NFCA HCI based Driver + * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "st21nfca.h" + +/* + * Every frame starts with ST21NFCA_SOF_EOF and ends with ST21NFCA_SOF_EOF. + * Because ST21NFCA_SOF_EOF is a possible data value, there is a mecanism + * called byte stuffing has been introduced. + * + * if byte == ST21NFCA_SOF_EOF or ST21NFCA_ESCAPE_BYTE_STUFFING + * - insert ST21NFCA_ESCAPE_BYTE_STUFFING (escape byte) + * - xor byte with ST21NFCA_BYTE_STUFFING_MASK + */ +#define ST21NFCA_SOF_EOF 0x7e +#define ST21NFCA_BYTE_STUFFING_MASK 0x20 +#define ST21NFCA_ESCAPE_BYTE_STUFFING 0x7d + +/* SOF + 00 fill size */ +#define ST21NFCA_FRAME_HEADROOM 2 + +/* 4 bytes crc (worst case byte stuffing) + EOF */ +#define ST21NFCA_FRAME_TAILROOM 5 + +#define ST21NFCA_HCI_I2C_DRIVER_NAME "st21nfca_hci_i2c" + +static struct i2c_device_id st21nfca_hci_i2c_id_table[] = { + {ST21NFCA_HCI_DRIVER_NAME, 0}, + {} +}; + +MODULE_DEVICE_TABLE(i2c, st21nfca_hci_i2c_id_table); + +struct st21nfca_i2c_phy { + struct i2c_client *i2c_dev; + struct nfc_hci_dev *hdev; + + unsigned int gpio_ena; + unsigned int gpio_irq; + unsigned int irq_polarity; + + struct sk_buff *pending_skb; + int current_read_len; + /* + * crc might have fail because i2c macro + * is disable due to other interface activity + */ + int crc_trials; + + int powered; + int run_mode; + + /* + * < 0 if hardware error occured (e.g. i2c err) + * and prevents normal operation. + */ + int hard_fault; +}; +static u8 len_seq[] = { 13, 24, 15, 29 }; +static u16 wait_tab[] = { 2, 3, 5, 15, 20, 40}; + +#define I2C_DUMP_SKB(info, skb) \ +do { \ + pr_debug("%s:\n", info); \ + print_hex_dump(KERN_DEBUG, "i2c: ", DUMP_PREFIX_OFFSET, \ + 16, 1, (skb)->data, (skb)->len, 0); \ +} while (0) + +static void st21nfca_hci_platform_init(struct st21nfca_i2c_phy *phy) +{ + u16 wait_tab[] = { 50, 300, 1000 }; + char reboot_cmd[] = { 0x7E, 0x66, 0x48, 0xF6, 0x7E }; + u8 tmp[ST21NFCA_HCI_LLC_MAX_SIZE]; + int i, r = -1; + + for (i = 0; i < ARRAY_SIZE(wait_tab) && r < 0; i++) + r = i2c_master_recv(phy->i2c_dev, tmp, + ST21NFCA_HCI_LLC_MAX_SIZE); + + r = -1; + for (i = 0; i < ARRAY_SIZE(wait_tab) && r < 0; i++) + r = i2c_master_send(phy->i2c_dev, reboot_cmd, + sizeof(reboot_cmd)); + usleep_range(1000, 1500); + +} + +static int st21nfca_hci_i2c_enable(void *phy_id) +{ + struct st21nfca_i2c_phy *phy = phy_id; + + gpio_set_value(phy->gpio_ena, 1); + phy->powered = 1; + phy->run_mode = ST21NFCA_HCI_MODE; + + usleep_range(10000, 15000); + + return 0; +} + +static void st21nfca_hci_i2c_disable(void *phy_id) +{ + struct st21nfca_i2c_phy *phy = phy_id; + + pr_info("\n"); + gpio_set_value(phy->gpio_ena, 0); + + phy->powered = 0; +} + +static int st21nfca_hci_add_len_crc(struct sk_buff *skb) +{ + int ret = 2; + u16 crc; + u8 tmp; + + *skb_push(skb, 1) = 0; + + crc = crc_ccitt(0xffff, skb->data, skb->len); + crc = ~crc; + + tmp = crc & 0x00ff; + *skb_put(skb, 1) = tmp; + + tmp = (crc >> 8) & 0x00ff; + *skb_put(skb, 1) = tmp; + + return ret; +} + +static void st21nfca_hci_remove_len_crc(struct sk_buff *skb, int crc_len) +{ + skb_pull(skb, ST21NFCA_FRAME_HEADROOM); + skb_trim(skb, crc_len); +} + +/* + * Writing a frame must not return the number of written bytes. + * It must return either zero for success, or <0 for error. + * In addition, it must not alter the skb + */ +static int st21nfca_hci_i2c_write(void *phy_id, struct sk_buff *skb) +{ + int r = -1, i, j, len; + struct st21nfca_i2c_phy *phy = phy_id; + struct i2c_client *client = phy->i2c_dev; + u16 wait_tab[] = { 2, 3, 5, 15, 20, 40}; + u8 tmp[ST21NFCA_HCI_LLC_MAX_SIZE * 2]; + + I2C_DUMP_SKB("st21nfca_hci_i2c_write", skb); + + + if (phy->hard_fault != 0) + return phy->hard_fault; + + /* + * Compute CRC before byte stuffing computation on frame + * Note st21nfca_hci_add_len_crc is doing a byte stuffing + * on its own value + */ + len = st21nfca_hci_add_len_crc(skb); + + /* add ST21NFCA_SOF_EOF on tail */ + *skb_put(skb, 1) = ST21NFCA_SOF_EOF; + /* add ST21NFCA_SOF_EOF on head */ + *skb_push(skb, 1) = ST21NFCA_SOF_EOF; + + /* + * Compute byte stuffing + * if byte == ST21NFCA_SOF_EOF or ST21NFCA_ESCAPE_BYTE_STUFFING + * insert ST21NFCA_ESCAPE_BYTE_STUFFING (escape byte) + * xor byte with ST21NFCA_BYTE_STUFFING_MASK + */ + tmp[0] = skb->data[0]; + for (i = 1, j = 1; i < skb->len - 1; i++, j++) { + if (skb->data[i] == ST21NFCA_SOF_EOF + || skb->data[i] == ST21NFCA_ESCAPE_BYTE_STUFFING) { + tmp[j] = ST21NFCA_ESCAPE_BYTE_STUFFING; + j++; + tmp[j] = skb->data[i] ^ ST21NFCA_BYTE_STUFFING_MASK; + } else { + tmp[j] = skb->data[i]; + } + } + tmp[j] = skb->data[i]; + j++; + + /* + * Manage sleep mode + * Try 3 times to send data with delay between each + */ + for (i = 0; i < ARRAY_SIZE(wait_tab) && r < 0; i++) { + r = i2c_master_send(client, tmp, j); + if (r < 0) + msleep(wait_tab[i]); + } + + if (r >= 0) { + if (r != j) + r = -EREMOTEIO; + else + r = 0; + } + + st21nfca_hci_remove_len_crc(skb, len); + + return r; +} + +static int get_frame_size(u8 *buf, int buflen) +{ + int len = 0; + if (buf[len + 1] == ST21NFCA_SOF_EOF) + return 0; + + for (len = 1; len < buflen && buf[len] != ST21NFCA_SOF_EOF; len++) + ; + + return len; +} + +static int check_crc(u8 *buf, int buflen) +{ + u16 crc; + + crc = crc_ccitt(0xffff, buf, buflen - 2); + crc = ~crc; + + if (buf[buflen - 2] != (crc & 0xff) || buf[buflen - 1] != (crc >> 8)) { + pr_err(ST21NFCA_HCI_DRIVER_NAME + ": CRC error 0x%x != 0x%x 0x%x\n", crc, buf[buflen - 1], + buf[buflen - 2]); + + pr_info(DRIVER_DESC ": %s : BAD CRC\n", __func__); + print_hex_dump(KERN_DEBUG, "crc: ", DUMP_PREFIX_NONE, + 16, 2, buf, buflen, false); + return -EPERM; + } + return 0; +} + +/* + * Prepare received data for upper layer. + * Received data include byte stuffing, crc and sof/eof + * which is not usable by hci part. + * returns: + * frame size without sof/eof, header and byte stuffing + * -EBADMSG : frame was incorrect and discarded + */ +static int st21nfca_hci_i2c_repack(struct sk_buff *skb) +{ + int i, j, r, size; + if (skb->len < 1 || (skb->len > 1 && skb->data[1] != 0)) + return -EBADMSG; + + size = get_frame_size(skb->data, skb->len); + if (size > 0) { + skb_trim(skb, size); + /* remove ST21NFCA byte stuffing for upper layer */ + for (i = 1, j = 0; i < skb->len; i++) { + if (skb->data[i] == + (u8) ST21NFCA_ESCAPE_BYTE_STUFFING) { + skb->data[i] = + skb->data[i + + 1] | ST21NFCA_BYTE_STUFFING_MASK; + i++; + j++; + } + skb->data[i] = skb->data[i + j]; + } + /* remove byte stuffing useless byte */ + skb_trim(skb, i - j); + /* remove ST21NFCA_SOF_EOF from head */ + skb_pull(skb, 1); + + r = check_crc(skb->data, skb->len); + if (r != 0) { + i = 0; + return -EBADMSG; + } + + /* remove headbyte */ + skb_pull(skb, 1); + /* remove crc. Byte Stuffing is already removed here */ + skb_trim(skb, skb->len - 2); + return skb->len; + } + return 0; +} + +/* + * Reads an shdlc frame and returns it in a newly allocated sk_buff. Guarantees + * that i2c bus will be flushed and that next read will start on a new frame. + * returned skb contains only LLC header and payload. + * returns: + * frame size : if received frame is complete (find ST21NFCA_SOF_EOF at + * end of read) + * -EAGAIN : if received frame is incomplete (not find ST21NFCA_SOF_EOF + * at end of read) + * -EREMOTEIO : i2c read error (fatal) + * -EBADMSG : frame was incorrect and discarded + * (value returned from st21nfca_hci_i2c_repack) + * -EIO : if no ST21NFCA_SOF_EOF is found after reaching + * the read length end sequence + */ +static int st21nfca_hci_i2c_read(struct st21nfca_i2c_phy *phy, + struct sk_buff *skb) +{ + int r, i; + u8 len; + struct i2c_client *client = phy->i2c_dev; + + if (phy->current_read_len < ARRAY_SIZE(len_seq)) { + len = len_seq[phy->current_read_len]; + + /* + * Add retry mecanism + * Operation on I2C interface may fail in case of operation on + * RF or SWP interface + */ + r = 0; + for (i = 0; i < ARRAY_SIZE(wait_tab) && r <= 0; i++) { + r = i2c_master_recv(client, skb_put(skb, len), len); + if (r < 0) + msleep(wait_tab[i]); + } + + if (r != len) { + phy->current_read_len = 0; + return -EREMOTEIO; + } + + if (memchr(skb->data + 2, ST21NFCA_SOF_EOF, + skb->len - 2) != NULL) { + phy->current_read_len = 0; + return st21nfca_hci_i2c_repack(skb); + } + phy->current_read_len++; + return -EAGAIN; + } + return -EIO; +} + +/* + * Reads an shdlc frame from the chip. This is not as straightforward as it + * seems. The frame format is data-crc, and corruption can occur anywhere + * while transiting on i2c bus, such that we could read an invalid data. + * The tricky case is when we read a corrupted data or crc. We must detect + * this here in order to determine that data can be transmitted to the hci + * core. This is the reason why we check the crc here. + * The CLF will repeat a frame until we send a RR on that frame. + * + * On ST21NFCA, IRQ goes in idle when read starts. As no size information are + * available in the incoming data, other IRQ might come. Every IRQ will trigger + * a read sequence with different length and will fill the current frame. + * The reception is complete once we reach a ST21NFCA_SOF_EOF. + */ +static irqreturn_t st21nfca_hci_irq_thread_fn(int irq, void *phy_id) +{ + struct st21nfca_i2c_phy *phy = phy_id; + struct i2c_client *client; + + int r; + + if (!phy || irq != phy->i2c_dev->irq) { + WARN_ON_ONCE(1); + return IRQ_NONE; + } + + client = phy->i2c_dev; + dev_dbg(&client->dev, "IRQ\n"); + + if (phy->hard_fault != 0) + return IRQ_HANDLED; + + r = st21nfca_hci_i2c_read(phy, phy->pending_skb); + if (r == -EREMOTEIO) { + phy->hard_fault = r; + + nfc_hci_recv_frame(phy->hdev, NULL); + + return IRQ_HANDLED; + } else if (r == -EAGAIN || r == -EIO) { + return IRQ_HANDLED; + } else if (r == -EBADMSG && phy->crc_trials < ARRAY_SIZE(wait_tab)) { + /* + * With ST21NFCA, only one interface (I2C, RF or SWP) + * may be active at a time. + * Having incorrect crc is usually due to i2c macrocell + * deactivation in the middle of a transmission. + * It may generate corrupted data on i2c. + * We give sometime to get i2c back. + * The complete frame will be repeated. + */ + msleep(wait_tab[phy->crc_trials]); + phy->crc_trials++; + phy->current_read_len = 0; + } else if (r > 0) { + /* + * We succeeded to read data from the CLF and + * data is valid. + * Reset counter. + */ + nfc_hci_recv_frame(phy->hdev, phy->pending_skb); + phy->crc_trials = 0; + } + + phy->pending_skb = alloc_skb(ST21NFCA_HCI_LLC_MAX_SIZE * 2, GFP_KERNEL); + if (phy->pending_skb == NULL) { + phy->hard_fault = -ENOMEM; + nfc_hci_recv_frame(phy->hdev, NULL); + } + + return IRQ_HANDLED; +} + +static struct nfc_phy_ops i2c_phy_ops = { + .write = st21nfca_hci_i2c_write, + .enable = st21nfca_hci_i2c_enable, + .disable = st21nfca_hci_i2c_disable, +}; + +static int st21nfca_request_resources(struct st21nfca_i2c_phy *phy, + struct i2c_client *client) +{ + struct st21nfca_nfc_platform_data *pdata; + int r; + + pdata = client->dev.platform_data; + if (pdata == NULL) { + nfc_err(&client->dev, "No platform data\n"); + return -EINVAL; + } + + /* store for later use */ + phy->gpio_irq = pdata->gpio_irq; + phy->gpio_ena = pdata->gpio_ena; + phy->irq_polarity = pdata->irq_polarity; + phy->i2c_dev = client; + + r = devm_gpio_request(&client->dev, phy->gpio_irq, "wake_up"); + if (r) { + pr_err("%s : gpio_request failed\n", __FILE__); + return -ENODEV; + } + + r = gpio_direction_input(phy->gpio_irq); + if (r) { + pr_err("%s : gpio_direction_input failed\n", __FILE__); + return -ENODEV; + } + + if (phy->gpio_ena != 0) { + r = devm_gpio_request(&client->dev, + phy->gpio_ena, "clf_enable"); + if (r) { + pr_err("%s : ena gpio_request failed\n", __FILE__); + return -ENODEV; + } + r = gpio_direction_output(phy->gpio_ena, 1); + + if (r) { + pr_err("%s : ena gpio_direction_output failed\n", + __FILE__); + return -ENODEV; + } + } + + phy->pending_skb = alloc_skb(ST21NFCA_HCI_LLC_MAX_SIZE * 2, GFP_KERNEL); + if (phy->pending_skb == NULL) + return -ENOMEM; + + phy->current_read_len = 0; + phy->crc_trials = 0; + return r; +} + +static int st21nfca_hci_i2c_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct st21nfca_i2c_phy *phy; + struct st21nfca_nfc_platform_data *pdata; + int r = 0; + + dev_dbg(&client->dev, "%s\n", __func__); + dev_dbg(&client->dev, "IRQ: %d\n", client->irq); + + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { + nfc_err(&client->dev, "Need I2C_FUNC_I2C\n"); + return -ENODEV; + } + + phy = devm_kzalloc(&client->dev, sizeof(struct st21nfca_i2c_phy), + GFP_KERNEL); + if (!phy) { + nfc_err(&client->dev, + "Cannot allocate memory for st21nfca i2c phy.\n"); + return -ENOMEM; + } + + phy->i2c_dev = client; + + i2c_set_clientdata(client, phy); + + pdata = client->dev.platform_data; + if (pdata == NULL) { + nfc_err(&client->dev, "No platform data\n"); + return -EINVAL; + } + + r = st21nfca_request_resources(phy, client); + if (r) { + nfc_err(&client->dev, "Cannot get platform resources\n"); + return r; + } + + st21nfca_hci_platform_init(phy); + r = devm_request_threaded_irq(&client->dev, client->irq, NULL, + st21nfca_hci_irq_thread_fn, + phy->irq_polarity | IRQF_ONESHOT, + ST21NFCA_HCI_DRIVER_NAME, phy); + if (r < 0) { + nfc_err(&client->dev, "Unable to register IRQ handler\n"); + return r; + } + + r = st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME, + ST21NFCA_FRAME_HEADROOM, ST21NFCA_FRAME_TAILROOM, + ST21NFCA_HCI_LLC_MAX_PAYLOAD, &phy->hdev); + + if (r < 0) + return r; + + return 0; +} + +static int st21nfca_hci_i2c_remove(struct i2c_client *client) +{ + struct st21nfca_i2c_phy *phy = i2c_get_clientdata(client); + + dev_dbg(&client->dev, "%s\n", __func__); + + st21nfca_hci_remove(phy->hdev); + + if (phy->powered) + st21nfca_hci_i2c_disable(phy); + + return 0; +} + +static struct i2c_driver st21nfca_hci_i2c_driver = { + .driver = { + .name = ST21NFCA_HCI_I2C_DRIVER_NAME, + }, + .probe = st21nfca_hci_i2c_probe, + .id_table = st21nfca_hci_i2c_id_table, + .remove = st21nfca_hci_i2c_remove, +}; + +module_i2c_driver(st21nfca_hci_i2c_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/nfc/st21nfca/st21nfca.c b/drivers/nfc/st21nfca/st21nfca.c new file mode 100644 index 000000000000..69213f37b7ba --- /dev/null +++ b/drivers/nfc/st21nfca/st21nfca.c @@ -0,0 +1,506 @@ +/* + * HCI based Driver for STMicroelectronics NFC Chip + * + * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include "st21nfca.h" +#include + +#define DRIVER_DESC "HCI NFC driver for ST21NFCA" + +#define FULL_VERSION_LEN 3 + +/* Proprietary gates, events, commands and registers */ + +/* Commands that apply to all RF readers */ +#define ST21NFCA_RF_READER_CMD_PRESENCE_CHECK 0x30 + +#define ST21NFCA_RF_READER_ISO15693_GATE 0x12 + +/* + * Reader gate for communication with contact-less cards using Type A + * protocol ISO14443-3 but not compliant with ISO14443-4 + */ +#define ST21NFCA_RF_READER_14443_3_A_GATE 0x15 +#define ST21NFCA_RF_READER_14443_3_A_UID 0x02 +#define ST21NFCA_RF_READER_14443_3_A_ATQA 0x03 +#define ST21NFCA_RF_READER_14443_3_A_SAK 0x04 + +#define ST21NFCA_DEVICE_MGNT_GATE 0x01 +#define ST21NFCA_DEVICE_MGNT_PIPE 0x02 +#define ST21NFCA_NFC_MODE 0x03 /* NFC_MODE parameter*/ + + +static DECLARE_BITMAP(dev_mask, ST21NFCA_NUM_DEVICES); + +static struct nfc_hci_gate st21nfca_gates[] = { + {NFC_HCI_ADMIN_GATE, NFC_HCI_INVALID_PIPE}, + {NFC_HCI_LOOPBACK_GATE, NFC_HCI_INVALID_PIPE}, + {NFC_HCI_ID_MGMT_GATE, NFC_HCI_INVALID_PIPE}, + {NFC_HCI_LINK_MGMT_GATE, NFC_HCI_INVALID_PIPE}, + {NFC_HCI_RF_READER_B_GATE, NFC_HCI_INVALID_PIPE}, + {NFC_HCI_RF_READER_A_GATE, NFC_HCI_INVALID_PIPE}, + {ST21NFCA_DEVICE_MGNT_GATE, ST21NFCA_DEVICE_MGNT_PIPE}, + {ST21NFCA_RF_READER_F_GATE, NFC_HCI_INVALID_PIPE}, + {ST21NFCA_RF_READER_14443_3_A_GATE, NFC_HCI_INVALID_PIPE}, +}; +/* Largest headroom needed for outgoing custom commands */ +#define ST21NFCA_CMDS_HEADROOM 7 + +static int st21nfca_hci_open(struct nfc_hci_dev *hdev) +{ + struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev); + int r; + + mutex_lock(&info->info_lock); + + if (info->state != ST21NFCA_ST_COLD) { + r = -EBUSY; + goto out; + } + + r = info->phy_ops->enable(info->phy_id); + + if (r == 0) + info->state = ST21NFCA_ST_READY; + +out: + mutex_unlock(&info->info_lock); + return r; +} + +static void st21nfca_hci_close(struct nfc_hci_dev *hdev) +{ + struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev); + + mutex_lock(&info->info_lock); + + if (info->state == ST21NFCA_ST_COLD) + goto out; + + info->phy_ops->disable(info->phy_id); + info->state = ST21NFCA_ST_COLD; + +out: + mutex_unlock(&info->info_lock); +} + +static int st21nfca_hci_ready(struct nfc_hci_dev *hdev) +{ + struct sk_buff *skb; + + u8 param; + int r; + + param = NFC_HCI_UICC_HOST_ID; + r = nfc_hci_set_param(hdev, NFC_HCI_ADMIN_GATE, + NFC_HCI_ADMIN_WHITELIST, ¶m, 1); + if (r < 0) + return r; + + /* Set NFC_MODE in device management gate to enable */ + r = nfc_hci_get_param(hdev, ST21NFCA_DEVICE_MGNT_GATE, + ST21NFCA_NFC_MODE, &skb); + if (r < 0) + return r; + + if (skb->data[0] == 0) { + kfree_skb(skb); + param = 1; + + r = nfc_hci_set_param(hdev, ST21NFCA_DEVICE_MGNT_GATE, + ST21NFCA_NFC_MODE, ¶m, 1); + if (r < 0) + return r; + } + + r = nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE, + NFC_HCI_EVT_END_OPERATION, NULL, 0); + if (r < 0) + return r; + + r = nfc_hci_get_param(hdev, NFC_HCI_ID_MGMT_GATE, + NFC_HCI_ID_MGMT_VERSION_SW, &skb); + if (r < 0) + return r; + + if (skb->len != FULL_VERSION_LEN) { + kfree_skb(skb); + return -EINVAL; + } + + print_hex_dump(KERN_DEBUG, "FULL VERSION SOFTWARE INFO: ", + DUMP_PREFIX_NONE, 16, 1, + skb->data, FULL_VERSION_LEN, false); + + kfree_skb(skb); + + return 0; +} + +static int st21nfca_hci_xmit(struct nfc_hci_dev *hdev, struct sk_buff *skb) +{ + struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev); + + return info->phy_ops->write(info->phy_id, skb); +} + +static int st21nfca_hci_start_poll(struct nfc_hci_dev *hdev, + u32 im_protocols, u32 tm_protocols) +{ + int r; + + pr_info(DRIVER_DESC ": %s protocols 0x%x 0x%x\n", + __func__, im_protocols, tm_protocols); + + r = nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE, + NFC_HCI_EVT_END_OPERATION, NULL, 0); + if (r < 0) + return r; + if (im_protocols) { + /* + * enable polling according to im_protocols & tm_protocols + * - CLOSE pipe according to im_protocols & tm_protocols + */ + if ((NFC_HCI_RF_READER_B_GATE & im_protocols) == 0) { + r = nfc_hci_disconnect_gate(hdev, + NFC_HCI_RF_READER_B_GATE); + if (r < 0) + return r; + } + + if ((NFC_HCI_RF_READER_A_GATE & im_protocols) == 0) { + r = nfc_hci_disconnect_gate(hdev, + NFC_HCI_RF_READER_A_GATE); + if (r < 0) + return r; + } + + if ((ST21NFCA_RF_READER_F_GATE & im_protocols) == 0) { + r = nfc_hci_disconnect_gate(hdev, + ST21NFCA_RF_READER_F_GATE); + if (r < 0) + return r; + } + + if ((ST21NFCA_RF_READER_14443_3_A_GATE & im_protocols) == 0) { + r = nfc_hci_disconnect_gate(hdev, + ST21NFCA_RF_READER_14443_3_A_GATE); + if (r < 0) + return r; + } + + if ((ST21NFCA_RF_READER_ISO15693_GATE & im_protocols) == 0) { + r = nfc_hci_disconnect_gate(hdev, + ST21NFCA_RF_READER_ISO15693_GATE); + if (r < 0) + return r; + } + + r = nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE, + NFC_HCI_EVT_READER_REQUESTED, NULL, 0); + if (r < 0) + nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE, + NFC_HCI_EVT_END_OPERATION, NULL, 0); + } + return r; +} + +static int st21nfca_get_iso14443_3_atqa(struct nfc_hci_dev *hdev, u16 *atqa) +{ + int r; + struct sk_buff *atqa_skb = NULL; + + r = nfc_hci_get_param(hdev, ST21NFCA_RF_READER_14443_3_A_GATE, + ST21NFCA_RF_READER_14443_3_A_ATQA, &atqa_skb); + if (r < 0) + goto exit; + + if (atqa_skb->len != 2) { + r = -EPROTO; + goto exit; + } + + *atqa = be16_to_cpu(*(u16 *) atqa_skb->data); + +exit: + kfree_skb(atqa_skb); + return r; +} + +static int st21nfca_get_iso14443_3_sak(struct nfc_hci_dev *hdev, u8 *sak) +{ + int r; + struct sk_buff *sak_skb = NULL; + + r = nfc_hci_get_param(hdev, ST21NFCA_RF_READER_14443_3_A_GATE, + ST21NFCA_RF_READER_14443_3_A_SAK, &sak_skb); + if (r < 0) + goto exit; + + if (sak_skb->len != 1) { + r = -EPROTO; + goto exit; + } + + *sak = sak_skb->data[0]; + +exit: + kfree_skb(sak_skb); + return r; +} + +static int st21nfca_get_iso14443_3_uid(struct nfc_hci_dev *hdev, u8 *gate, + int *len) +{ + int r; + struct sk_buff *uid_skb = NULL; + + r = nfc_hci_get_param(hdev, ST21NFCA_RF_READER_14443_3_A_GATE, + ST21NFCA_RF_READER_14443_3_A_UID, &uid_skb); + if (r < 0) + goto exit; + + if (uid_skb->len == 0 || uid_skb->len > NFC_NFCID1_MAXSIZE) { + r = -EPROTO; + goto exit; + } + + gate = uid_skb->data; + *len = uid_skb->len; +exit: + kfree_skb(uid_skb); + return r; +} + +static int st21nfca_hci_target_from_gate(struct nfc_hci_dev *hdev, u8 gate, + struct nfc_target *target) +{ + int r, len; + u16 atqa; + u8 sak; + u8 uid[NFC_NFCID1_MAXSIZE]; + + switch (gate) { + case ST21NFCA_RF_READER_F_GATE: + target->supported_protocols = NFC_PROTO_FELICA_MASK; + break; + case ST21NFCA_RF_READER_14443_3_A_GATE: + /* ISO14443-3 type 1 or 2 tags */ + r = st21nfca_get_iso14443_3_atqa(hdev, &atqa); + if (r < 0) + return r; + if (atqa == 0x000c) { + target->supported_protocols = NFC_PROTO_JEWEL_MASK; + target->sens_res = 0x0c00; + } else { + r = st21nfca_get_iso14443_3_sak(hdev, &sak); + if (r < 0) + return r; + + r = st21nfca_get_iso14443_3_uid(hdev, uid, &len); + if (r < 0) + return r; + + target->supported_protocols = + nfc_hci_sak_to_protocol(sak); + if (target->supported_protocols == 0xffffffff) + return -EPROTO; + + target->sens_res = atqa; + target->sel_res = sak; + memcpy(target->nfcid1, uid, len); + target->nfcid1_len = len; + } + + break; + default: + return -EPROTO; + } + + return 0; +} + +/* + * Returns: + * <= 0: driver handled the data exchange + * 1: driver doesn't especially handle, please do standard processing + */ +static int st21nfca_hci_im_transceive(struct nfc_hci_dev *hdev, + struct nfc_target *target, + struct sk_buff *skb, + data_exchange_cb_t cb, void *cb_context) +{ + pr_info(DRIVER_DESC ": %s for gate=%d len=%d\n", __func__, + target->hci_reader_gate, skb->len); + + switch (target->hci_reader_gate) { + case ST21NFCA_RF_READER_F_GATE: + *skb_push(skb, 1) = 0x1a; + return nfc_hci_send_cmd_async(hdev, target->hci_reader_gate, + ST21NFCA_WR_XCHG_DATA, skb->data, + skb->len, cb, cb_context); + case ST21NFCA_RF_READER_14443_3_A_GATE: + *skb_push(skb, 1) = 0x1a; /* CTR, see spec:10.2.2.1 */ + + return nfc_hci_send_cmd_async(hdev, target->hci_reader_gate, + ST21NFCA_WR_XCHG_DATA, skb->data, + skb->len, cb, cb_context); + default: + return 1; + } +} + +static int st21nfca_hci_check_presence(struct nfc_hci_dev *hdev, + struct nfc_target *target) +{ + u8 fwi = 0x11; + switch (target->hci_reader_gate) { + case NFC_HCI_RF_READER_A_GATE: + case NFC_HCI_RF_READER_B_GATE: + /* + * PRESENCE_CHECK on those gates is available + * However, the answer to this command is taking 3 * fwi + * if the card is no present. + * Instead, we send an empty I-Frame with a very short + * configurable fwi ~604µs. + */ + return nfc_hci_send_cmd(hdev, target->hci_reader_gate, + ST21NFCA_WR_XCHG_DATA, &fwi, 1, NULL); + case ST21NFCA_RF_READER_14443_3_A_GATE: + return nfc_hci_send_cmd(hdev, target->hci_reader_gate, + ST21NFCA_RF_READER_CMD_PRESENCE_CHECK, + NULL, 0, NULL); + default: + return -EOPNOTSUPP; + } +} + +static struct nfc_hci_ops st21nfca_hci_ops = { + .open = st21nfca_hci_open, + .close = st21nfca_hci_close, + .hci_ready = st21nfca_hci_ready, + .xmit = st21nfca_hci_xmit, + .start_poll = st21nfca_hci_start_poll, + .target_from_gate = st21nfca_hci_target_from_gate, + .im_transceive = st21nfca_hci_im_transceive, + .check_presence = st21nfca_hci_check_presence, +}; + +int st21nfca_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops, + char *llc_name, int phy_headroom, int phy_tailroom, + int phy_payload, struct nfc_hci_dev **hdev) +{ + struct st21nfca_hci_info *info; + int r = 0; + int dev_num; + u32 protocols; + struct nfc_hci_init_data init_data; + unsigned long quirks = 0; + + info = kzalloc(sizeof(struct st21nfca_hci_info), GFP_KERNEL); + if (!info) { + r = -ENOMEM; + goto err_alloc_hdev; + } + + info->phy_ops = phy_ops; + info->phy_id = phy_id; + info->state = ST21NFCA_ST_COLD; + mutex_init(&info->info_lock); + + init_data.gate_count = ARRAY_SIZE(st21nfca_gates); + + memcpy(init_data.gates, st21nfca_gates, sizeof(st21nfca_gates)); + + /* + * Session id must include the driver name + i2c bus addr + * persistent info to discriminate 2 identical chips + */ + dev_num = find_first_zero_bit(dev_mask, ST21NFCA_NUM_DEVICES); + if (dev_num >= ST21NFCA_NUM_DEVICES) + goto err_alloc_hdev; + + scnprintf(init_data.session_id, sizeof(init_data.session_id), "%s%2x", + "ST21AH", dev_num); + + protocols = NFC_PROTO_JEWEL_MASK | + NFC_PROTO_MIFARE_MASK | + NFC_PROTO_FELICA_MASK | + NFC_PROTO_ISO14443_MASK | + NFC_PROTO_ISO14443_B_MASK; + + set_bit(NFC_HCI_QUIRK_SHORT_CLEAR, &quirks); + + info->hdev = + nfc_hci_allocate_device(&st21nfca_hci_ops, &init_data, quirks, + protocols, llc_name, + phy_headroom + ST21NFCA_CMDS_HEADROOM, + phy_tailroom, phy_payload); + + if (!info->hdev) { + pr_err("Cannot allocate nfc hdev.\n"); + r = -ENOMEM; + goto err_alloc_hdev; + } + + nfc_hci_set_clientdata(info->hdev, info); + + r = nfc_hci_register_device(info->hdev); + if (r) + goto err_regdev; + + *hdev = info->hdev; + + return 0; + +err_regdev: + nfc_hci_free_device(info->hdev); + +err_alloc_hdev: + kfree(info); + + return r; +} +EXPORT_SYMBOL(st21nfca_hci_probe); + +void st21nfca_hci_remove(struct nfc_hci_dev *hdev) +{ + struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev); + + nfc_hci_unregister_device(hdev); + nfc_hci_free_device(hdev); + kfree(info); +} +EXPORT_SYMBOL(st21nfca_hci_remove); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/nfc/st21nfca/st21nfca.h b/drivers/nfc/st21nfca/st21nfca.h new file mode 100644 index 000000000000..334cd90bcc8c --- /dev/null +++ b/drivers/nfc/st21nfca/st21nfca.h @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef __LOCAL_ST21NFCA_H_ +#define __LOCAL_ST21NFCA_H_ + +#include + +#define HCI_MODE 0 + +/* framing in HCI mode */ +#define ST21NFCA_SOF_EOF_LEN 2 + +/* Almost every time value is 0 */ +#define ST21NFCA_HCI_LLC_LEN 1 + +/* Size in worst case : + * In normal case CRC len = 2 but byte stuffing + * may appear in case one CRC byte = ST21NFCA_SOF_EOF + */ +#define ST21NFCA_HCI_LLC_CRC 4 + +#define ST21NFCA_HCI_LLC_LEN_CRC (ST21NFCA_SOF_EOF_LEN + \ + ST21NFCA_HCI_LLC_LEN + \ + ST21NFCA_HCI_LLC_CRC) +#define ST21NFCA_HCI_LLC_MIN_SIZE (1 + ST21NFCA_HCI_LLC_LEN_CRC) + +/* Worst case when adding byte stuffing between each byte */ +#define ST21NFCA_HCI_LLC_MAX_PAYLOAD 29 +#define ST21NFCA_HCI_LLC_MAX_SIZE (ST21NFCA_HCI_LLC_LEN_CRC + 1 + \ + ST21NFCA_HCI_LLC_MAX_PAYLOAD) + +#define DRIVER_DESC "HCI NFC driver for ST21NFCA" + +#define ST21NFCA_HCI_MODE 0 + +#define ST21NFCA_NUM_DEVICES 256 + +int st21nfca_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops, + char *llc_name, int phy_headroom, int phy_tailroom, + int phy_payload, struct nfc_hci_dev **hdev); +void st21nfca_hci_remove(struct nfc_hci_dev *hdev); + +enum st21nfca_state { + ST21NFCA_ST_COLD, + ST21NFCA_ST_READY, +}; + +struct st21nfca_hci_info { + struct nfc_phy_ops *phy_ops; + void *phy_id; + + struct nfc_hci_dev *hdev; + + enum st21nfca_state state; + + struct mutex info_lock; + + int async_cb_type; + data_exchange_cb_t async_cb; + void *async_cb_context; + +} __packed; + +/* Reader RF commands */ +#define ST21NFCA_WR_XCHG_DATA 0x10 + +#define ST21NFCA_RF_READER_F_GATE 0x14 +#define ST21NFCA_RF_READER_F_DATARATE 0x01 +#define ST21NFCA_RF_READER_F_DATARATE_106 0x01 +#define ST21NFCA_RF_READER_F_DATARATE_212 0x02 +#define ST21NFCA_RF_READER_F_DATARATE_424 0x04 + +#endif /* __LOCAL_ST21NFCA_H_ */ diff --git a/include/linux/platform_data/st21nfca.h b/include/linux/platform_data/st21nfca.h new file mode 100644 index 000000000000..1730312398ff --- /dev/null +++ b/include/linux/platform_data/st21nfca.h @@ -0,0 +1,32 @@ +/* + * Driver include for the ST21NFCA NFC chip. + * + * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _ST21NFCA_HCI_H_ +#define _ST21NFCA_HCI_H_ + +#include + +#define ST21NFCA_HCI_DRIVER_NAME "st21nfca_hci" + +struct st21nfca_nfc_platform_data { + unsigned int gpio_irq; + unsigned int gpio_ena; + unsigned int irq_polarity; +}; + +#endif /* _ST21NFCA_HCI_H_ */ -- cgit v1.2.3-71-gd317 From e240bc36125691b0e18e70407c2d18ca6117c2f5 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Tue, 25 Mar 2014 06:51:49 +0100 Subject: NFC: hci: Add load_session HCI operand load_session allows a CLF to restore the gate <-> pipe table from some proprietary location. The main advantage to add this function is to reduce the memory wear by running pipe creation (and storing) only once. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- include/net/nfc/hci.h | 1 + net/nfc/hci/core.c | 45 +++++++++++++++++++++------------------------ 2 files changed, 22 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h index 03c4650b548c..61286db54388 100644 --- a/include/net/nfc/hci.h +++ b/include/net/nfc/hci.h @@ -27,6 +27,7 @@ struct nfc_hci_dev; struct nfc_hci_ops { int (*open) (struct nfc_hci_dev *hdev); void (*close) (struct nfc_hci_dev *hdev); + int (*load_session) (struct nfc_hci_dev *hdev); int (*hci_ready) (struct nfc_hci_dev *hdev); /* * xmit must always send the complete buffer before diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index d45b638e77c7..c4d251a6fd67 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -380,34 +380,31 @@ static int hci_dev_session_init(struct nfc_hci_dev *hdev) if (r < 0) goto disconnect_all; - if (skb->len && skb->len == strlen(hdev->init_data.session_id)) - if (memcmp(hdev->init_data.session_id, skb->data, - skb->len) == 0) { - /* TODO ELa: restore gate<->pipe table from - * some TBD location. - * note: it doesn't seem possible to get the chip - * currently open gate/pipe table. - * It is only possible to obtain the supported - * gate list. - */ + if (skb->len && skb->len == strlen(hdev->init_data.session_id) && + (memcmp(hdev->init_data.session_id, skb->data, + skb->len) == 0) && hdev->ops->load_session) { + /* Restore gate<->pipe table from some proprietary location. */ - /* goto exit - * For now, always do a full initialization */ - } + r = hdev->ops->load_session(hdev); - r = nfc_hci_disconnect_all_gates(hdev); - if (r < 0) - goto exit; + if (r < 0) + goto disconnect_all; + } else { - r = hci_dev_connect_gates(hdev, hdev->init_data.gate_count, - hdev->init_data.gates); - if (r < 0) - goto disconnect_all; + r = nfc_hci_disconnect_all_gates(hdev); + if (r < 0) + goto exit; - r = nfc_hci_set_param(hdev, NFC_HCI_ADMIN_GATE, - NFC_HCI_ADMIN_SESSION_IDENTITY, - hdev->init_data.session_id, - strlen(hdev->init_data.session_id)); + r = hci_dev_connect_gates(hdev, hdev->init_data.gate_count, + hdev->init_data.gates); + if (r < 0) + goto disconnect_all; + + r = nfc_hci_set_param(hdev, NFC_HCI_ADMIN_GATE, + NFC_HCI_ADMIN_SESSION_IDENTITY, + hdev->init_data.session_id, + strlen(hdev->init_data.session_id)); + } if (r == 0) goto exit; -- cgit v1.2.3-71-gd317 From 51d98fa47c9c3f5d34cd4097ce08e8e8669a89b4 Mon Sep 17 00:00:00 2001 From: "Mark A. Greer" Date: Mon, 31 Mar 2014 17:36:37 -0700 Subject: NFC: digital: Add macros for the ISO/IEC 14443-B Protocol Add RF tech and framing macros for the ISO/IEC 14443-B Protocol. Cc: Thierry Escande Signed-off-by: Mark A. Greer Signed-off-by: Samuel Ortiz --- include/net/nfc/digital.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/nfc/digital.h b/include/net/nfc/digital.h index 7655cfe27c34..bdf55c3b7a19 100644 --- a/include/net/nfc/digital.h +++ b/include/net/nfc/digital.h @@ -36,6 +36,7 @@ enum { NFC_DIGITAL_RF_TECH_212F, NFC_DIGITAL_RF_TECH_424F, NFC_DIGITAL_RF_TECH_ISO15693, + NFC_DIGITAL_RF_TECH_106B, NFC_DIGITAL_RF_TECH_LAST, }; @@ -62,6 +63,9 @@ enum { NFC_DIGITAL_FRAMING_ISO15693_INVENTORY, NFC_DIGITAL_FRAMING_ISO15693_T5T, + NFC_DIGITAL_FRAMING_NFCB, + NFC_DIGITAL_FRAMING_NFCB_T4T, + NFC_DIGITAL_FRAMING_LAST, }; -- cgit v1.2.3-71-gd317 From f1370cc4a01e61007ab3020c761cef6b88ae3729 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 18 Apr 2014 16:23:46 +0900 Subject: xfrm: Remove useless secid field from xfrm_audit. It seems to me that commit ab5f5e8b "[XFRM]: xfrm audit calls" is doing something strange at xfrm_audit_helper_usrinfo(). If secid != 0 && security_secid_to_secctx(secid) != 0, the caller calls audit_log_task_context() which basically does secid != 0 && security_secid_to_secctx(secid) == 0 case except that secid is obtained from current thread's context. Oh, what happens if secid passed to xfrm_audit_helper_usrinfo() was obtained from other thread's context? It might audit current thread's context rather than other thread's context if security_secid_to_secctx() in xfrm_audit_helper_usrinfo() failed for some reason. Then, are all the caller of xfrm_audit_helper_usrinfo() passing either secid obtained from current thread's context or secid == 0? It seems to me that they are. If I didn't miss something, we don't need to pass secid to xfrm_audit_helper_usrinfo() because audit_log_task_context() will obtain secid from current thread's context. Signed-off-by: Tetsuo Handa Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 29 ++++++++++------------------- net/key/af_key.c | 12 +++++------- net/xfrm/xfrm_policy.c | 22 ++++++++-------------- net/xfrm/xfrm_state.c | 17 +++++++---------- net/xfrm/xfrm_user.c | 27 ++++++--------------------- 5 files changed, 36 insertions(+), 71 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 116e9c7e19cb..882889eb156b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -693,7 +693,6 @@ struct xfrm_spi_skb_cb { /* Audit Information */ struct xfrm_audit { - u32 secid; kuid_t loginuid; unsigned int sessionid; }; @@ -713,30 +712,22 @@ static inline struct audit_buffer *xfrm_audit_start(const char *op) return audit_buf; } -static inline void xfrm_audit_helper_usrinfo(kuid_t auid, unsigned int ses, u32 secid, +static inline void xfrm_audit_helper_usrinfo(kuid_t auid, unsigned int ses, struct audit_buffer *audit_buf) { - char *secctx; - u32 secctx_len; - audit_log_format(audit_buf, " auid=%u ses=%u", from_kuid(&init_user_ns, auid), ses); - if (secid != 0 && - security_secid_to_secctx(secid, &secctx, &secctx_len) == 0) { - audit_log_format(audit_buf, " subj=%s", secctx); - security_release_secctx(secctx, secctx_len); - } else - audit_log_task_context(audit_buf); + audit_log_task_context(audit_buf); } void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, kuid_t auid, - unsigned int ses, u32 secid); + unsigned int ses); void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, kuid_t auid, - unsigned int ses, u32 secid); + unsigned int ses); void xfrm_audit_state_add(struct xfrm_state *x, int result, kuid_t auid, - unsigned int ses, u32 secid); + unsigned int ses); void xfrm_audit_state_delete(struct xfrm_state *x, int result, kuid_t auid, - unsigned int ses, u32 secid); + unsigned int ses); void xfrm_audit_state_replay_overflow(struct xfrm_state *x, struct sk_buff *skb); void xfrm_audit_state_replay(struct xfrm_state *x, struct sk_buff *skb, @@ -749,22 +740,22 @@ void xfrm_audit_state_icvfail(struct xfrm_state *x, struct sk_buff *skb, #else static inline void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, - kuid_t auid, unsigned int ses, u32 secid) + kuid_t auid, unsigned int ses) { } static inline void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, - kuid_t auid, unsigned int ses, u32 secid) + kuid_t auid, unsigned int ses) { } static inline void xfrm_audit_state_add(struct xfrm_state *x, int result, - kuid_t auid, unsigned int ses, u32 secid) + kuid_t auid, unsigned int ses) { } static inline void xfrm_audit_state_delete(struct xfrm_state *x, int result, - kuid_t auid, unsigned int ses, u32 secid) + kuid_t auid, unsigned int ses) { } diff --git a/net/key/af_key.c b/net/key/af_key.c index f3c83073afc4..d66ff72adefb 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1478,7 +1478,7 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, const struct sadb_msg xfrm_audit_state_add(x, err ? 0 : 1, audit_get_loginuid(current), - audit_get_sessionid(current), 0); + audit_get_sessionid(current)); if (err < 0) { x->km.state = XFRM_STATE_DEAD; @@ -1534,7 +1534,7 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, const struct sadb_ out: xfrm_audit_state_delete(x, err ? 0 : 1, audit_get_loginuid(current), - audit_get_sessionid(current), 0); + audit_get_sessionid(current)); xfrm_state_put(x); return err; @@ -1735,7 +1735,6 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); - audit_info.secid = 0; err = xfrm_state_flush(net, proto, &audit_info); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { @@ -2290,7 +2289,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_ xfrm_audit_policy_add(xp, err ? 0 : 1, audit_get_loginuid(current), - audit_get_sessionid(current), 0); + audit_get_sessionid(current)); if (err) goto out; @@ -2374,7 +2373,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa xfrm_audit_policy_delete(xp, err ? 0 : 1, audit_get_loginuid(current), - audit_get_sessionid(current), 0); + audit_get_sessionid(current)); if (err) goto out; @@ -2624,7 +2623,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_ if (delete) { xfrm_audit_policy_delete(xp, err ? 0 : 1, audit_get_loginuid(current), - audit_get_sessionid(current), 0); + audit_get_sessionid(current)); if (err) goto out; @@ -2738,7 +2737,6 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); - audit_info.secid = 0; err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c08fbd11ceff..bd001b7062c0 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -785,8 +785,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi if (err) { xfrm_audit_policy_delete(pol, 0, audit_info->loginuid, - audit_info->sessionid, - audit_info->secid); + audit_info->sessionid); return err; } } @@ -801,8 +800,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi if (err) { xfrm_audit_policy_delete(pol, 0, audit_info->loginuid, - audit_info->sessionid, - audit_info->secid); + audit_info->sessionid); return err; } } @@ -842,8 +840,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) cnt++; xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, - audit_info->sessionid, - audit_info->secid); + audit_info->sessionid); xfrm_policy_kill(pol); @@ -864,8 +861,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, - audit_info->sessionid, - audit_info->secid); + audit_info->sessionid); xfrm_policy_kill(pol); write_lock_bh(&net->xfrm.xfrm_policy_lock); @@ -2870,12 +2866,10 @@ static void xfrm_policy_fini(struct net *net) #ifdef CONFIG_XFRM_SUB_POLICY audit_info.loginuid = INVALID_UID; audit_info.sessionid = (unsigned int)-1; - audit_info.secid = 0; xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info); #endif audit_info.loginuid = INVALID_UID; audit_info.sessionid = (unsigned int)-1; - audit_info.secid = 0; xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); WARN_ON(!list_empty(&net->xfrm.policy_all)); @@ -2992,14 +2986,14 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, } void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, - kuid_t auid, unsigned int sessionid, u32 secid) + kuid_t auid, unsigned int sessionid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SPD-add"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); + xfrm_audit_helper_usrinfo(auid, sessionid, audit_buf); audit_log_format(audit_buf, " res=%u", result); xfrm_audit_common_policyinfo(xp, audit_buf); audit_log_end(audit_buf); @@ -3007,14 +3001,14 @@ void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, - kuid_t auid, unsigned int sessionid, u32 secid) + kuid_t auid, unsigned int sessionid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SPD-delete"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); + xfrm_audit_helper_usrinfo(auid, sessionid, audit_buf); audit_log_format(audit_buf, " res=%u", result); xfrm_audit_common_policyinfo(xp, audit_buf); audit_log_end(audit_buf); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 8e9c781a6bba..d91312b5ceb0 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -465,7 +465,7 @@ expired: xfrm_audit_state_delete(x, err ? 0 : 1, audit_get_loginuid(current), - audit_get_sessionid(current), 0); + audit_get_sessionid(current)); out: spin_unlock(&x->lock); @@ -574,8 +574,7 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audi (err = security_xfrm_state_delete(x)) != 0) { xfrm_audit_state_delete(x, 0, audit_info->loginuid, - audit_info->sessionid, - audit_info->secid); + audit_info->sessionid); return err; } } @@ -613,8 +612,7 @@ restart: err = xfrm_state_delete(x); xfrm_audit_state_delete(x, err ? 0 : 1, audit_info->loginuid, - audit_info->sessionid, - audit_info->secid); + audit_info->sessionid); xfrm_state_put(x); if (!err) cnt++; @@ -2134,7 +2132,6 @@ void xfrm_state_fini(struct net *net) flush_work(&net->xfrm.state_hash_work); audit_info.loginuid = INVALID_UID; audit_info.sessionid = (unsigned int)-1; - audit_info.secid = 0; xfrm_state_flush(net, IPSEC_PROTO_ANY, &audit_info); flush_work(&net->xfrm.state_gc_work); @@ -2199,14 +2196,14 @@ static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family, } void xfrm_audit_state_add(struct xfrm_state *x, int result, - kuid_t auid, unsigned int sessionid, u32 secid) + kuid_t auid, unsigned int sessionid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SAD-add"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); + xfrm_audit_helper_usrinfo(auid, sessionid, audit_buf); xfrm_audit_helper_sainfo(x, audit_buf); audit_log_format(audit_buf, " res=%u", result); audit_log_end(audit_buf); @@ -2214,14 +2211,14 @@ void xfrm_audit_state_add(struct xfrm_state *x, int result, EXPORT_SYMBOL_GPL(xfrm_audit_state_add); void xfrm_audit_state_delete(struct xfrm_state *x, int result, - kuid_t auid, unsigned int sessionid, u32 secid) + kuid_t auid, unsigned int sessionid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SAD-delete"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); + xfrm_audit_helper_usrinfo(auid, sessionid, audit_buf); xfrm_audit_helper_sainfo(x, audit_buf); audit_log_format(audit_buf, " res=%u", result); audit_log_end(audit_buf); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8f131c10a6f3..d6409d927b82 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -599,7 +599,6 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct km_event c; kuid_t loginuid = audit_get_loginuid(current); unsigned int sessionid = audit_get_sessionid(current); - u32 sid; err = verify_newsa_info(p, attrs); if (err) @@ -615,8 +614,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, else err = xfrm_state_update(x); - security_task_getsecid(current, &sid); - xfrm_audit_state_add(x, err ? 0 : 1, loginuid, sessionid, sid); + xfrm_audit_state_add(x, err ? 0 : 1, loginuid, sessionid); if (err < 0) { x->km.state = XFRM_STATE_DEAD; @@ -678,7 +676,6 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_usersa_id *p = nlmsg_data(nlh); kuid_t loginuid = audit_get_loginuid(current); unsigned int sessionid = audit_get_sessionid(current); - u32 sid; x = xfrm_user_state_lookup(net, p, attrs, &err); if (x == NULL) @@ -703,8 +700,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, km_state_notify(x, &c); out: - security_task_getsecid(current, &sid); - xfrm_audit_state_delete(x, err ? 0 : 1, loginuid, sessionid, sid); + xfrm_audit_state_delete(x, err ? 0 : 1, loginuid, sessionid); xfrm_state_put(x); return err; } @@ -1416,7 +1412,6 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, int excl; kuid_t loginuid = audit_get_loginuid(current); unsigned int sessionid = audit_get_sessionid(current); - u32 sid; err = verify_newpolicy_info(p); if (err) @@ -1435,8 +1430,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, * a type XFRM_MSG_UPDPOLICY - JHS */ excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; err = xfrm_policy_insert(p->dir, xp, excl); - security_task_getsecid(current, &sid); - xfrm_audit_policy_add(xp, err ? 0 : 1, loginuid, sessionid, sid); + xfrm_audit_policy_add(xp, err ? 0 : 1, loginuid, sessionid); if (err) { security_xfrm_policy_free(xp->security); @@ -1675,11 +1669,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, } else { kuid_t loginuid = audit_get_loginuid(current); unsigned int sessionid = audit_get_sessionid(current); - u32 sid; - security_task_getsecid(current, &sid); - xfrm_audit_policy_delete(xp, err ? 0 : 1, loginuid, sessionid, - sid); + xfrm_audit_policy_delete(xp, err ? 0 : 1, loginuid, sessionid); if (err != 0) goto out; @@ -1709,7 +1700,6 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); - security_task_getsecid(current, &audit_info.secid); err = xfrm_state_flush(net, p->proto, &audit_info); if (err) { if (err == -ESRCH) /* empty table */ @@ -1902,7 +1892,6 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); - security_task_getsecid(current, &audit_info.secid); err = xfrm_policy_flush(net, type, &audit_info); if (err) { if (err == -ESRCH) /* empty table */ @@ -1971,11 +1960,9 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, if (up->hard) { kuid_t loginuid = audit_get_loginuid(current); unsigned int sessionid = audit_get_sessionid(current); - u32 sid; - security_task_getsecid(current, &sid); xfrm_policy_delete(xp, p->dir); - xfrm_audit_policy_delete(xp, 1, loginuid, sessionid, sid); + xfrm_audit_policy_delete(xp, 1, loginuid, sessionid); } else { // reset the timers here? @@ -2014,11 +2001,9 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, if (ue->hard) { kuid_t loginuid = audit_get_loginuid(current); unsigned int sessionid = audit_get_sessionid(current); - u32 sid; - security_task_getsecid(current, &sid); __xfrm_state_delete(x); - xfrm_audit_state_delete(x, 1, loginuid, sessionid, sid); + xfrm_audit_state_delete(x, 1, loginuid, sessionid); } err = 0; out: -- cgit v1.2.3-71-gd317 From 0967e6a5070e336507ce52f09f7297413b966981 Mon Sep 17 00:00:00 2001 From: Vandana Kannan Date: Tue, 1 Apr 2014 16:26:59 +0530 Subject: drm/edid: Fill PAR in AVI infoframe based on CEA mode list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Populate PAR in infoframe structure. If there is a user setting for PAR, then that value is set. Else, value is taken from CEA mode list if VIC is found. Else, PAR is calculated from resolution. If none of these conditions are satisfied, PAR is NONE as per initialization. v2: Removed the part which sets PAR according to user input, based on Daniel's review comments. A separate patch will be submitted to create a property that would enable a user space app to set aspect ratio for AVI infoframe. v2: Removed the part which sets PAR according to user input, based on Daniel's review comments. v3: Removed calculation of PAR for non-CEA modes as per discussion with Ville. A separate patch will be submitted to create a property that would enable a user space app to set aspect ratio for AVI infoframe. Signed-off-by: Vandana Kannan Cc: Jesse Barnes Cc: Vijay Purushothaman Cc: Ville Syrjälä Cc: intel-gfx@lists.freedesktop.org Reviewed-by: Jesse Barnes [danvet: Squash in fixup for htmldocs.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_edid.c | 22 ++++++++++++++++++++++ include/drm/drm_crtc.h | 1 + 2 files changed, 23 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index d4e3f9d9370f..b8d6c5163575 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -2452,6 +2452,22 @@ u8 drm_match_cea_mode(const struct drm_display_mode *to_match) } EXPORT_SYMBOL(drm_match_cea_mode); +/** + * drm_get_cea_aspect_ratio - get the picture aspect ratio corresponding to + * the input VIC from the CEA mode list + * @video_code: ID given to each of the CEA modes + * + * Returns picture aspect ratio + */ +enum hdmi_picture_aspect drm_get_cea_aspect_ratio(const u8 video_code) +{ + /* return picture aspect ratio for video_code - 1 to access the + * right array element + */ + return edid_cea_modes[video_code-1].picture_aspect_ratio; +} +EXPORT_SYMBOL(drm_get_cea_aspect_ratio); + /* * Calculate the alternate clock for HDMI modes (those from the HDMI vendor * specific block). @@ -3613,6 +3629,12 @@ drm_hdmi_avi_infoframe_from_display_mode(struct hdmi_avi_infoframe *frame, frame->video_code = drm_match_cea_mode(mode); frame->picture_aspect = HDMI_PICTURE_ASPECT_NONE; + + /* Populate picture aspect ratio from CEA mode list */ + if (frame->video_code > 0) + frame->picture_aspect = drm_get_cea_aspect_ratio( + frame->video_code); + frame->active_aspect = HDMI_ACTIVE_ASPECT_PICTURE; frame->scan_mode = HDMI_SCAN_MODE_UNDERSCAN; diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index e55fccbe7c42..c061bb372199 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -1020,6 +1020,7 @@ extern int drm_mode_gamma_get_ioctl(struct drm_device *dev, extern int drm_mode_gamma_set_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); extern u8 drm_match_cea_mode(const struct drm_display_mode *to_match); +extern enum hdmi_picture_aspect drm_get_cea_aspect_ratio(const u8 video_code); extern bool drm_detect_hdmi_monitor(struct edid *edid); extern bool drm_detect_monitor_audio(struct edid *edid); extern bool drm_rgb_quant_range_selectable(struct edid *edid); -- cgit v1.2.3-71-gd317 From f9b0e251dfbf2c4da642ec9210db29a7ac63b81a Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Wed, 2 Apr 2014 12:29:46 +0200 Subject: drm: make mode_valid callback optional Many drm connectors do not need mode validation. The patch makes this callback optional and removes dumb implementations. v2: Rebase: - imx move to a shared (but still dummy) ->mode_valid implementation. - probe helpers have been extracted to drm_probe_helper.c Signed-off-by: Andrzej Hajda (v1) Signed-off-by: Daniel Vetter --- Documentation/DocBook/drm.tmpl | 6 +++--- drivers/gpu/drm/ast/ast_mode.c | 7 ------- drivers/gpu/drm/bridge/ptn3460.c | 7 ------- drivers/gpu/drm/cirrus/cirrus_mode.c | 8 -------- drivers/gpu/drm/drm_probe_helper.c | 2 +- drivers/gpu/drm/exynos/exynos_dp_core.c | 7 ------- drivers/gpu/drm/exynos/exynos_drm_dpi.c | 7 ------- drivers/gpu/drm/exynos/exynos_drm_vidi.c | 7 ------- drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c | 7 ------- drivers/gpu/drm/rcar-du/rcar_du_vgacon.c | 7 ------- drivers/gpu/drm/shmobile/shmob_drm_crtc.c | 7 ------- drivers/staging/imx-drm/imx-drm-core.c | 7 ------- drivers/staging/imx-drm/imx-drm.h | 2 -- drivers/staging/imx-drm/imx-hdmi.c | 1 - drivers/staging/imx-drm/imx-ldb.c | 1 - drivers/staging/imx-drm/imx-tve.c | 4 ---- drivers/staging/imx-drm/parallel-display.c | 1 - include/drm/drm_crtc_helper.h | 2 +- 18 files changed, 5 insertions(+), 85 deletions(-) (limited to 'include') diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index 677a02553ec0..c72e146e58d3 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -1903,8 +1903,8 @@ void intel_crt_init(struct drm_device *dev) The function filters out modes larger than max_width and max_height - if specified. It then calls the connector - mode_valid helper operation for each mode in + if specified. It then calls the optional connector + mode_valid helper operation for each mode in the probed list to check whether the mode is valid for the connector. @@ -2265,7 +2265,7 @@ void intel_crt_init(struct drm_device *dev) Verify whether a mode is valid for the connector. Return MODE_OK for supported modes and one of the enum drm_mode_status values (MODE_*) - for unsupported modes. This operation is mandatory. + for unsupported modes. This operation is optional. As the mode rejection reason is currently not used beside for diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index a4afdc8bb578..e599d64a2620 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -743,12 +743,6 @@ static int ast_get_modes(struct drm_connector *connector) return 0; } -static int ast_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) -{ - return MODE_OK; -} - static void ast_connector_destroy(struct drm_connector *connector) { struct ast_connector *ast_connector = to_ast_connector(connector); @@ -765,7 +759,6 @@ ast_connector_detect(struct drm_connector *connector, bool force) } static const struct drm_connector_helper_funcs ast_connector_helper_funcs = { - .mode_valid = ast_mode_valid, .get_modes = ast_get_modes, .best_encoder = ast_best_single_encoder, }; diff --git a/drivers/gpu/drm/bridge/ptn3460.c b/drivers/gpu/drm/bridge/ptn3460.c index b171901a3553..98fd17ae4916 100644 --- a/drivers/gpu/drm/bridge/ptn3460.c +++ b/drivers/gpu/drm/bridge/ptn3460.c @@ -225,12 +225,6 @@ out: return num_modes; } -static int ptn3460_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) -{ - return MODE_OK; -} - struct drm_encoder *ptn3460_best_encoder(struct drm_connector *connector) { struct ptn3460_bridge *ptn_bridge; @@ -242,7 +236,6 @@ struct drm_encoder *ptn3460_best_encoder(struct drm_connector *connector) struct drm_connector_helper_funcs ptn3460_connector_helper_funcs = { .get_modes = ptn3460_get_modes, - .mode_valid = ptn3460_mode_valid, .best_encoder = ptn3460_best_encoder, }; diff --git a/drivers/gpu/drm/cirrus/cirrus_mode.c b/drivers/gpu/drm/cirrus/cirrus_mode.c index f59433b7610c..49332c5fe35b 100644 --- a/drivers/gpu/drm/cirrus/cirrus_mode.c +++ b/drivers/gpu/drm/cirrus/cirrus_mode.c @@ -505,13 +505,6 @@ static int cirrus_vga_get_modes(struct drm_connector *connector) return count; } -static int cirrus_vga_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) -{ - /* Any mode we've added is valid */ - return MODE_OK; -} - static struct drm_encoder *cirrus_connector_best_encoder(struct drm_connector *connector) { @@ -546,7 +539,6 @@ static void cirrus_connector_destroy(struct drm_connector *connector) struct drm_connector_helper_funcs cirrus_vga_connector_helper_funcs = { .get_modes = cirrus_vga_get_modes, - .mode_valid = cirrus_vga_mode_valid, .best_encoder = cirrus_connector_best_encoder, }; diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index e70f54d4a581..d06340985a72 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -169,7 +169,7 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector, drm_mode_validate_flag(connector, mode_flags); list_for_each_entry(mode, &connector->modes, head) { - if (mode->status == MODE_OK) + if (mode->status == MODE_OK && connector_funcs->mode_valid) mode->status = connector_funcs->mode_valid(connector, mode); } diff --git a/drivers/gpu/drm/exynos/exynos_dp_core.c b/drivers/gpu/drm/exynos/exynos_dp_core.c index aed533bbfd31..bb74472b4e4b 100644 --- a/drivers/gpu/drm/exynos/exynos_dp_core.c +++ b/drivers/gpu/drm/exynos/exynos_dp_core.c @@ -949,12 +949,6 @@ static int exynos_dp_get_modes(struct drm_connector *connector) return 1; } -static int exynos_dp_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) -{ - return MODE_OK; -} - static struct drm_encoder *exynos_dp_best_encoder( struct drm_connector *connector) { @@ -965,7 +959,6 @@ static struct drm_encoder *exynos_dp_best_encoder( static struct drm_connector_helper_funcs exynos_dp_connector_helper_funcs = { .get_modes = exynos_dp_get_modes, - .mode_valid = exynos_dp_mode_valid, .best_encoder = exynos_dp_best_encoder, }; diff --git a/drivers/gpu/drm/exynos/exynos_drm_dpi.c b/drivers/gpu/drm/exynos/exynos_drm_dpi.c index 2b09c7c0bfcc..82e52c71bccc 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dpi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dpi.c @@ -94,12 +94,6 @@ static int exynos_dpi_get_modes(struct drm_connector *connector) return 0; } -static int exynos_dpi_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) -{ - return MODE_OK; -} - static struct drm_encoder * exynos_dpi_best_encoder(struct drm_connector *connector) { @@ -110,7 +104,6 @@ exynos_dpi_best_encoder(struct drm_connector *connector) static struct drm_connector_helper_funcs exynos_dpi_connector_helper_funcs = { .get_modes = exynos_dpi_get_modes, - .mode_valid = exynos_dpi_mode_valid, .best_encoder = exynos_dpi_best_encoder, }; diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c index 7afead9c3f30..b6980865dd50 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c @@ -533,12 +533,6 @@ static int vidi_get_modes(struct drm_connector *connector) return drm_add_edid_modes(connector, edid); } -static int vidi_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) -{ - return MODE_OK; -} - static struct drm_encoder *vidi_best_encoder(struct drm_connector *connector) { struct vidi_context *ctx = ctx_from_connector(connector); @@ -548,7 +542,6 @@ static struct drm_encoder *vidi_best_encoder(struct drm_connector *connector) static struct drm_connector_helper_funcs vidi_connector_helper_funcs = { .get_modes = vidi_get_modes, - .mode_valid = vidi_mode_valid, .best_encoder = vidi_best_encoder, }; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c index 4f3ba93cd91d..289048d1c7b2 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c @@ -57,15 +57,8 @@ static int rcar_du_lvds_connector_get_modes(struct drm_connector *connector) return 1; } -static int rcar_du_lvds_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) -{ - return MODE_OK; -} - static const struct drm_connector_helper_funcs connector_helper_funcs = { .get_modes = rcar_du_lvds_connector_get_modes, - .mode_valid = rcar_du_lvds_connector_mode_valid, .best_encoder = rcar_du_connector_best_encoder, }; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vgacon.c b/drivers/gpu/drm/rcar-du/rcar_du_vgacon.c index 41d563adfeaa..ccfe64c7188f 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_vgacon.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_vgacon.c @@ -25,15 +25,8 @@ static int rcar_du_vga_connector_get_modes(struct drm_connector *connector) return 0; } -static int rcar_du_vga_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) -{ - return MODE_OK; -} - static const struct drm_connector_helper_funcs connector_helper_funcs = { .get_modes = rcar_du_vga_connector_get_modes, - .mode_valid = rcar_du_vga_connector_mode_valid, .best_encoder = rcar_du_connector_best_encoder, }; diff --git a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c index e9e5e6d368cc..faf176b2daf9 100644 --- a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c +++ b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c @@ -674,12 +674,6 @@ static int shmob_drm_connector_get_modes(struct drm_connector *connector) return 1; } -static int shmob_drm_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) -{ - return MODE_OK; -} - static struct drm_encoder * shmob_drm_connector_best_encoder(struct drm_connector *connector) { @@ -690,7 +684,6 @@ shmob_drm_connector_best_encoder(struct drm_connector *connector) static const struct drm_connector_helper_funcs connector_helper_funcs = { .get_modes = shmob_drm_connector_get_modes, - .mode_valid = shmob_drm_connector_mode_valid, .best_encoder = shmob_drm_connector_best_encoder, }; diff --git a/drivers/staging/imx-drm/imx-drm-core.c b/drivers/staging/imx-drm/imx-drm-core.c index 4144a75e5f71..53ff005245c5 100644 --- a/drivers/staging/imx-drm/imx-drm-core.c +++ b/drivers/staging/imx-drm/imx-drm-core.c @@ -200,13 +200,6 @@ static const struct file_operations imx_drm_driver_fops = { .llseek = noop_llseek, }; -int imx_drm_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) -{ - return MODE_OK; -} -EXPORT_SYMBOL(imx_drm_connector_mode_valid); - void imx_drm_connector_destroy(struct drm_connector *connector) { drm_sysfs_connector_remove(connector); diff --git a/drivers/staging/imx-drm/imx-drm.h b/drivers/staging/imx-drm/imx-drm.h index a322bac55414..7453ae00c412 100644 --- a/drivers/staging/imx-drm/imx-drm.h +++ b/drivers/staging/imx-drm/imx-drm.h @@ -50,8 +50,6 @@ int imx_drm_encoder_get_mux_id(struct device_node *node, int imx_drm_encoder_parse_of(struct drm_device *drm, struct drm_encoder *encoder, struct device_node *np); -int imx_drm_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode); void imx_drm_connector_destroy(struct drm_connector *connector); void imx_drm_encoder_destroy(struct drm_encoder *encoder); diff --git a/drivers/staging/imx-drm/imx-hdmi.c b/drivers/staging/imx-drm/imx-hdmi.c index d47dedd2cdb4..9fbe6d6a989d 100644 --- a/drivers/staging/imx-drm/imx-hdmi.c +++ b/drivers/staging/imx-drm/imx-hdmi.c @@ -1492,7 +1492,6 @@ static struct drm_connector_funcs imx_hdmi_connector_funcs = { static struct drm_connector_helper_funcs imx_hdmi_connector_helper_funcs = { .get_modes = imx_hdmi_connector_get_modes, - .mode_valid = imx_drm_connector_mode_valid, .best_encoder = imx_hdmi_connector_best_encoder, }; diff --git a/drivers/staging/imx-drm/imx-ldb.c b/drivers/staging/imx-drm/imx-ldb.c index fe4c1ef4e7a5..7e3f019d7e72 100644 --- a/drivers/staging/imx-drm/imx-ldb.c +++ b/drivers/staging/imx-drm/imx-ldb.c @@ -317,7 +317,6 @@ static struct drm_connector_funcs imx_ldb_connector_funcs = { static struct drm_connector_helper_funcs imx_ldb_connector_helper_funcs = { .get_modes = imx_ldb_connector_get_modes, .best_encoder = imx_ldb_connector_best_encoder, - .mode_valid = imx_drm_connector_mode_valid, }; static struct drm_encoder_funcs imx_ldb_encoder_funcs = { diff --git a/drivers/staging/imx-drm/imx-tve.c b/drivers/staging/imx-drm/imx-tve.c index 575533f4fd64..5a5a5287a86a 100644 --- a/drivers/staging/imx-drm/imx-tve.c +++ b/drivers/staging/imx-drm/imx-tve.c @@ -251,10 +251,6 @@ static int imx_tve_connector_mode_valid(struct drm_connector *connector, unsigned long rate; int ret; - ret = imx_drm_connector_mode_valid(connector, mode); - if (ret != MODE_OK) - return ret; - /* pixel clock with 2x oversampling */ rate = clk_round_rate(tve->clk, 2000UL * mode->clock) / 2000; if (rate == mode->clock) diff --git a/drivers/staging/imx-drm/parallel-display.c b/drivers/staging/imx-drm/parallel-display.c index c60b6c645f42..52598e489a4b 100644 --- a/drivers/staging/imx-drm/parallel-display.c +++ b/drivers/staging/imx-drm/parallel-display.c @@ -148,7 +148,6 @@ static struct drm_connector_funcs imx_pd_connector_funcs = { static struct drm_connector_helper_funcs imx_pd_connector_helper_funcs = { .get_modes = imx_pd_connector_get_modes, .best_encoder = imx_pd_connector_best_encoder, - .mode_valid = imx_drm_connector_mode_valid, }; static struct drm_encoder_funcs imx_pd_encoder_funcs = { diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h index 36a5febac2a6..7ffb59232e84 100644 --- a/include/drm/drm_crtc_helper.h +++ b/include/drm/drm_crtc_helper.h @@ -114,7 +114,7 @@ struct drm_encoder_helper_funcs { /** * drm_connector_helper_funcs - helper operations for connectors * @get_modes: get mode list for this connector - * @mode_valid: is this mode valid on the given connector? + * @mode_valid (optional): is this mode valid on the given connector? * * The helper operations are called by the mid-layer CRTC helper. */ -- cgit v1.2.3-71-gd317 From eaaf8f0fc32468d4dedbb5b5f9c5bfb27744be4c Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 28 Aug 2013 15:19:23 +0200 Subject: drm/pci: fold in irq_by_busid support This is a ums-only ioctl, and we've only ever supported ums (at least in upstream) on pci devices. So no point in keeping that piece of legacy logic abstracted within the drm bus driver. To keep things work without CONFIG_PCI also add a dummy ioctl. v2: Block the irq_by_busid ioctl for modeset drivers. v3: Spelling/whitespace polish (Thierry) Reviewed-by: Thierry Reding Reviewed-by: Laurent Pinchart Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 27 --------------------------- drivers/gpu/drm/drm_pci.c | 39 +++++++++++++++++++++++++++++++++++++-- include/drm/drmP.h | 1 - 3 files changed, 37 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index d4cc47690d65..e5920e7a4392 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -56,33 +56,6 @@ */ #define DRM_REDUNDANT_VBLIRQ_THRESH_NS 1000000 -/** - * Get interrupt from bus id. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg user argument, pointing to a drm_irq_busid structure. - * \return zero on success or a negative number on failure. - * - * Finds the PCI device with the specified bus id and gets its IRQ number. - * This IOCTL is deprecated, and will now return EINVAL for any busid not equal - * to that of the device that this DRM instance attached to. - */ -int drm_irq_by_busid(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_irq_busid *p = data; - - if (!dev->driver->bus->irq_by_busid) - return -EINVAL; - - if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ)) - return -EINVAL; - - return dev->driver->bus->irq_by_busid(dev, p); -} - /* * Clear vblank timestamp buffer for a crtc. */ diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c index 9c696a5ad74d..8d4221683af4 100644 --- a/drivers/gpu/drm/drm_pci.c +++ b/drivers/gpu/drm/drm_pci.c @@ -247,7 +247,6 @@ err: return ret; } - static int drm_pci_irq_by_busid(struct drm_device *dev, struct drm_irq_busid *p) { if ((p->busnum >> 8) != drm_get_pci_domain(dev) || @@ -262,6 +261,37 @@ static int drm_pci_irq_by_busid(struct drm_device *dev, struct drm_irq_busid *p) return 0; } +/** + * Get interrupt from bus id. + * + * \param inode device inode. + * \param file_priv DRM file private. + * \param cmd command. + * \param arg user argument, pointing to a drm_irq_busid structure. + * \return zero on success or a negative number on failure. + * + * Finds the PCI device with the specified bus id and gets its IRQ number. + * This IOCTL is deprecated, and will now return EINVAL for any busid not equal + * to that of the device that this DRM instance attached to. + */ +int drm_irq_by_busid(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_irq_busid *p = data; + + if (drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + + /* UMS was only ever support on PCI devices. */ + if (WARN_ON(!dev->pdev)) + return -EINVAL; + + if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ)) + return -EINVAL; + + return drm_pci_irq_by_busid(dev, p); +} + static void drm_pci_agp_init(struct drm_device *dev) { if (drm_core_check_feature(dev, DRIVER_USE_AGP)) { @@ -292,7 +322,6 @@ static struct drm_bus drm_pci_bus = { .get_name = drm_pci_get_name, .set_busid = drm_pci_set_busid, .set_unique = drm_pci_set_unique, - .irq_by_busid = drm_pci_irq_by_busid, }; /** @@ -453,6 +482,12 @@ int drm_pci_init(struct drm_driver *driver, struct pci_driver *pdriver) } void drm_pci_agp_destroy(struct drm_device *dev) {} + +int drm_irq_by_busid(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + return -EINVAL; +} #endif EXPORT_SYMBOL(drm_pci_init); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index a7c2a862b4f4..6d7ca98d0143 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -737,7 +737,6 @@ struct drm_bus { int (*set_busid)(struct drm_device *dev, struct drm_master *master); int (*set_unique)(struct drm_device *dev, struct drm_master *master, struct drm_unique *unique); - int (*irq_by_busid)(struct drm_device *dev, struct drm_irq_busid *p); }; /** -- cgit v1.2.3-71-gd317 From ebfa4324930618e72645d2eb7db1c9773228a868 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 3 Nov 2013 20:27:09 +0100 Subject: drm: remove drm_dev_to_irq from drivers Only used in some legacy pci drivers, and dereferencing the PCI irq is actually shorter ... Since this removes all users for drm_dev_to_irq from the tree except in drm_irq.c, move the inline helper in there. It'll disappear soon, too. v2: Polish commit message (Thierry) Reviewed-by: Thierry Reding Reviewed-by: Laurent Pinchart Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 5 +++++ drivers/gpu/drm/mga/mga_state.c | 2 +- drivers/gpu/drm/r128/r128_state.c | 2 +- drivers/gpu/drm/radeon/radeon_state.c | 2 +- include/drm/drmP.h | 5 ----- 5 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 06a53f8b618c..589e865832cd 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -233,6 +233,11 @@ static void drm_irq_vgaarb_nokms(void *cookie, bool state) } } +static inline int drm_dev_to_irq(struct drm_device *dev) +{ + return dev->driver->bus->get_irq(dev); +} + /** * Install IRQ handler. * diff --git a/drivers/gpu/drm/mga/mga_state.c b/drivers/gpu/drm/mga/mga_state.c index 314685b7f41f..3cb58df5237e 100644 --- a/drivers/gpu/drm/mga/mga_state.c +++ b/drivers/gpu/drm/mga/mga_state.c @@ -1020,7 +1020,7 @@ static int mga_getparam(struct drm_device *dev, void *data, struct drm_file *fil switch (param->param) { case MGA_PARAM_IRQ_NR: - value = drm_dev_to_irq(dev); + value = dev->pdev->irq; break; case MGA_PARAM_CARD_TYPE: value = dev_priv->chipset; diff --git a/drivers/gpu/drm/r128/r128_state.c b/drivers/gpu/drm/r128/r128_state.c index e806dacd452f..97064dd434c2 100644 --- a/drivers/gpu/drm/r128/r128_state.c +++ b/drivers/gpu/drm/r128/r128_state.c @@ -1594,7 +1594,7 @@ static int r128_getparam(struct drm_device *dev, void *data, struct drm_file *fi switch (param->param) { case R128_PARAM_IRQ_NR: - value = drm_dev_to_irq(dev); + value = dev->pdev->irq; break; default: return -EINVAL; diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c index 956ab7f14e16..b576549fc783 100644 --- a/drivers/gpu/drm/radeon/radeon_state.c +++ b/drivers/gpu/drm/radeon/radeon_state.c @@ -3054,7 +3054,7 @@ static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_fil if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) value = 0; else - value = drm_dev_to_irq(dev); + value = dev->pdev->irq; break; case RADEON_PARAM_GART_BASE: value = dev_priv->gart_vm_start; diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 6d7ca98d0143..41839ea0c1ee 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1185,11 +1185,6 @@ static __inline__ int drm_core_check_feature(struct drm_device *dev, return ((dev->driver->driver_features & feature) ? 1 : 0); } -static inline int drm_dev_to_irq(struct drm_device *dev) -{ - return dev->driver->bus->get_irq(dev); -} - static inline void drm_device_set_unplugged(struct drm_device *dev) { smp_wmb(); -- cgit v1.2.3-71-gd317 From 42b21049fc26513ca8e732f47559b1525b04a992 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 3 Nov 2013 20:30:25 +0100 Subject: drm: kill drm_bus->bus_type Completely unused. Hooray, midlayer mistakes that didn't cause work to undo! v2: Rebase on top of the recent tegra changes which added a host1x drm bus. Reviewed-by: Thierry Reding Reviewed-by: Laurent Pinchart Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_pci.c | 1 - drivers/gpu/drm/drm_platform.c | 1 - drivers/gpu/drm/drm_usb.c | 1 - drivers/gpu/drm/tegra/bus.c | 1 - include/drm/drmP.h | 6 ------ 5 files changed, 10 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c index 8d4221683af4..08c76af920d6 100644 --- a/drivers/gpu/drm/drm_pci.c +++ b/drivers/gpu/drm/drm_pci.c @@ -317,7 +317,6 @@ void drm_pci_agp_destroy(struct drm_device *dev) } static struct drm_bus drm_pci_bus = { - .bus_type = DRIVER_BUS_PCI, .get_irq = drm_pci_get_irq, .get_name = drm_pci_get_name, .set_busid = drm_pci_set_busid, diff --git a/drivers/gpu/drm/drm_platform.c b/drivers/gpu/drm/drm_platform.c index 319ff5385601..5cd8c695824f 100644 --- a/drivers/gpu/drm/drm_platform.c +++ b/drivers/gpu/drm/drm_platform.c @@ -123,7 +123,6 @@ err: } static struct drm_bus drm_platform_bus = { - .bus_type = DRIVER_BUS_PLATFORM, .get_irq = drm_platform_get_irq, .get_name = drm_platform_get_name, .set_busid = drm_platform_set_busid, diff --git a/drivers/gpu/drm/drm_usb.c b/drivers/gpu/drm/drm_usb.c index c3406aad2944..ed60f887c805 100644 --- a/drivers/gpu/drm/drm_usb.c +++ b/drivers/gpu/drm/drm_usb.c @@ -53,7 +53,6 @@ static int drm_usb_set_busid(struct drm_device *dev, } static struct drm_bus drm_usb_bus = { - .bus_type = DRIVER_BUS_USB, .get_irq = drm_usb_get_irq, .get_name = drm_usb_get_name, .set_busid = drm_usb_set_busid, diff --git a/drivers/gpu/drm/tegra/bus.c b/drivers/gpu/drm/tegra/bus.c index 71cef5c13dc8..6916fa51cfa4 100644 --- a/drivers/gpu/drm/tegra/bus.c +++ b/drivers/gpu/drm/tegra/bus.c @@ -37,7 +37,6 @@ static int drm_host1x_set_busid(struct drm_device *dev, } static struct drm_bus drm_host1x_bus = { - .bus_type = DRIVER_BUS_HOST1X, .set_busid = drm_host1x_set_busid, }; diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 41839ea0c1ee..9f1fb8d36b67 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -143,11 +143,6 @@ int drm_err(const char *func, const char *format, ...); #define DRIVER_PRIME 0x4000 #define DRIVER_RENDER 0x8000 -#define DRIVER_BUS_PCI 0x1 -#define DRIVER_BUS_PLATFORM 0x2 -#define DRIVER_BUS_USB 0x3 -#define DRIVER_BUS_HOST1X 0x4 - /***********************************************************************/ /** \name Begin the DRM... */ /*@{*/ @@ -731,7 +726,6 @@ struct drm_master { #define DRM_SCANOUTPOS_ACCURATE (1 << 2) struct drm_bus { - int bus_type; int (*get_irq)(struct drm_device *dev); const char *(*get_name)(struct drm_device *dev); int (*set_busid)(struct drm_device *dev, struct drm_master *master); -- cgit v1.2.3-71-gd317 From fa372a51cb5f93800f711473e5a36e0e0c9a8f00 Mon Sep 17 00:00:00 2001 From: Markus Mayer Date: Tue, 8 Apr 2014 15:19:43 -0700 Subject: mmc: Delay the card_event callback into the mmc_rescan worker This change removes the callback from atomic context which it doesn't need to be in, and puts it in line with the debounced rescan. This code is based on these e-mail threads with Christian Daudt: https://lkml.org/lkml/2013/8/19/539 https://lkml.org/lkml/2014/3/19/79 Signed-off-by: Markus Mayer Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- drivers/mmc/core/core.c | 5 +++++ drivers/mmc/core/slot-gpio.c | 4 +--- include/linux/mmc/host.h | 2 ++ 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index acbc3f2aaaf9..f396d1bb4ac4 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -2403,6 +2403,11 @@ void mmc_rescan(struct work_struct *work) container_of(work, struct mmc_host, detect.work); int i; + if (host->trigger_card_event && host->ops->card_event) { + host->ops->card_event(host); + host->trigger_card_event = false; + } + if (host->rescan_disable) return; diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c index f7650b899e3d..5f89cb83d5f0 100644 --- a/drivers/mmc/core/slot-gpio.c +++ b/drivers/mmc/core/slot-gpio.c @@ -32,9 +32,7 @@ static irqreturn_t mmc_gpio_cd_irqt(int irq, void *dev_id) /* Schedule a card detection after a debounce timeout */ struct mmc_host *host = dev_id; - if (host->ops->card_event) - host->ops->card_event(host); - + host->trigger_card_event = true; mmc_detect_change(host, msecs_to_jiffies(200)); return IRQ_HANDLED; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 35354207e71f..0cf705c83998 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -319,6 +319,8 @@ struct mmc_host { int rescan_disable; /* disable card detection */ int rescan_entered; /* used with nonremovable devices */ + bool trigger_card_event; /* card_event necessary */ + struct mmc_card *card; /* device attached to this host */ wait_queue_head_t wq; -- cgit v1.2.3-71-gd317 From 297d40560bc8f474adbb43178e3118321fa702ea Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 4 Apr 2014 22:42:48 -0300 Subject: mmc: card.h: Use NULL instead of 0 for END_FIXUP Fix the following sparse warnings: drivers/mmc/card/block.c:2421:9: warning: Using plain integer as NULL pointer drivers/mmc/core/quirks.c:69:9: warning: Using plain integer as NULL pointer Signed-off-by: Fabio Estevam Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- include/linux/mmc/card.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index b73027298b3a..aa7e57f60fb2 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -353,7 +353,7 @@ struct mmc_fixup { #define CID_OEMID_ANY ((unsigned short) -1) #define CID_NAME_ANY (NULL) -#define END_FIXUP { 0 } +#define END_FIXUP { NULL } #define _FIXUP_EXT(_name, _manfid, _oemid, _rev_start, _rev_end, \ _cis_vendor, _cis_device, \ -- cgit v1.2.3-71-gd317 From 3de0b592394d17b2c41a261a6a493a521213f299 Mon Sep 17 00:00:00 2001 From: Venkata Duvvuru Date: Mon, 21 Apr 2014 15:37:59 +0530 Subject: ethtool: Support for configurable RSS hash key This ethtool patch primarily copies the ioctl command data structures from/to the User space and invokes the driver hook. Signed-off-by: Venkat Duvvuru Signed-off-by: David S. Miller --- include/linux/ethtool.h | 13 +++ include/uapi/linux/ethtool.h | 32 +++++++ net/core/ethtool.c | 221 ++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 252 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 0a114d05f68d..212f537fc686 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -154,13 +154,23 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) * @reset: Reset (part of) the device, as specified by a bitmask of * flags from &enum ethtool_reset_flags. Returns a negative * error code or zero. + * @get_rxfh_key_size: Get the size of the RX flow hash key. + * Returns zero if not supported for this specific device. * @get_rxfh_indir_size: Get the size of the RX flow hash indirection table. * Returns zero if not supported for this specific device. * @get_rxfh_indir: Get the contents of the RX flow hash indirection table. * Will not be called if @get_rxfh_indir_size returns zero. + * @get_rxfh: Get the contents of the RX flow hash indirection table and hash + * key. + * Will not be called if @get_rxfh_indir_size and @get_rxfh_key_size + * returns zero. * Returns a negative error code or zero. * @set_rxfh_indir: Set the contents of the RX flow hash indirection table. * Will not be called if @get_rxfh_indir_size returns zero. + * @set_rxfh: Set the contents of the RX flow hash indirection table and + * hash key. + * Will not be called if @get_rxfh_indir_size and @get_rxfh_key_size + * returns zero. * Returns a negative error code or zero. * @get_channels: Get number of channels. * @set_channels: Set number of channels. Returns a negative error code or @@ -232,7 +242,10 @@ struct ethtool_ops { int (*set_rxnfc)(struct net_device *, struct ethtool_rxnfc *); int (*flash_device)(struct net_device *, struct ethtool_flash *); int (*reset)(struct net_device *, u32 *); + u32 (*get_rxfh_key_size)(struct net_device *); u32 (*get_rxfh_indir_size)(struct net_device *); + int (*get_rxfh)(struct net_device *, u32 *, u8 *); + int (*set_rxfh)(struct net_device *, u32 *, u8 *); int (*get_rxfh_indir)(struct net_device *, u32 *); int (*set_rxfh_indir)(struct net_device *, const u32 *); void (*get_channels)(struct net_device *, struct ethtool_channels *); diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index fd161e91b6d7..d47d31d6fa0e 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -846,6 +846,35 @@ struct ethtool_rxfh_indir { __u32 ring_index[0]; }; +/** + * struct ethtool_rxfh - command to get/set RX flow hash indir or/and hash key. + * @cmd: Specific command number - %ETHTOOL_GRSSH or %ETHTOOL_SRSSH + * @rss_context: RSS context identifier. + * @indir_size: On entry, the array size of the user buffer, which may be zero. + * On return from %ETHTOOL_GRSSH, the array size of the hardware + * indirection table. + * @key_size: On entry, the array size of the user buffer in bytes, + * which may be zero. + * On return from %ETHTOOL_GRSSH, the size of the RSS hash key. + * @rsvd: Reserved for future extensions. + * @rss_config: RX ring/queue index for each hash value i.e., indirection table + * of size @indir_size followed by hash key of size @key_size. + * + * For %ETHTOOL_GRSSH, a @indir_size and key_size of zero means that only the + * size should be returned. For %ETHTOOL_SRSSH, a @indir_size of 0xDEADBEEF + * means that indir table setting is not requested and a @indir_size of zero + * means the indir table should be reset to default values. This last feature + * is not supported by the original implementations. + */ +struct ethtool_rxfh { + __u32 cmd; + __u32 rss_context; + __u32 indir_size; + __u32 key_size; + __u32 rsvd[2]; + __u32 rss_config[0]; +}; + /** * struct ethtool_rx_ntuple_flow_spec - specification for RX flow filter * @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW @@ -1118,6 +1147,9 @@ enum ethtool_sfeatures_retval_bits { #define ETHTOOL_GEEE 0x00000044 /* Get EEE settings */ #define ETHTOOL_SEEE 0x00000045 /* Set EEE settings */ +#define ETHTOOL_GRSSH 0x00000046 /* Get RX flow hash configuration */ +#define ETHTOOL_SRSSH 0x00000047 /* Set RX flow hash configuration */ + /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET #define SPARC_ETH_SSET ETHTOOL_SSET diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 640ba0e5831c..1d72786ef866 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -557,6 +557,23 @@ err_out: return ret; } +static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr, + struct ethtool_rxnfc *rx_rings, + u32 size) +{ + int ret = 0, i; + + if (copy_from_user(indir, useraddr, size * sizeof(indir[0]))) + ret = -EFAULT; + + /* Validate ring indices */ + for (i = 0; i < size; i++) { + if (indir[i] >= rx_rings->data) + ret = -EINVAL; + } + return ret; +} + static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, void __user *useraddr) { @@ -613,6 +630,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, u32 *indir; const struct ethtool_ops *ops = dev->ethtool_ops; int ret; + u32 ringidx_offset = offsetof(struct ethtool_rxfh_indir, ring_index[0]); if (!ops->get_rxfh_indir_size || !ops->set_rxfh_indir || !ops->get_rxnfc) @@ -643,28 +661,196 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, for (i = 0; i < dev_size; i++) indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); } else { - if (copy_from_user(indir, - useraddr + - offsetof(struct ethtool_rxfh_indir, - ring_index[0]), - dev_size * sizeof(indir[0]))) { + ret = ethtool_copy_validate_indir(indir, + useraddr + ringidx_offset, + &rx_rings, + dev_size); + if (ret) + goto out; + } + + ret = ops->set_rxfh_indir(dev, indir); + +out: + kfree(indir); + return ret; +} + +static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, + void __user *useraddr) +{ + int ret; + const struct ethtool_ops *ops = dev->ethtool_ops; + u32 user_indir_size = 0, user_key_size = 0; + u32 dev_indir_size = 0, dev_key_size = 0; + u32 total_size; + u32 indir_offset, indir_bytes; + u32 key_offset; + u32 *indir = NULL; + u8 *hkey = NULL; + u8 *rss_config; + + if (!(dev->ethtool_ops->get_rxfh_indir_size || + dev->ethtool_ops->get_rxfh_key_size) || + !dev->ethtool_ops->get_rxfh) + return -EOPNOTSUPP; + + if (ops->get_rxfh_indir_size) + dev_indir_size = ops->get_rxfh_indir_size(dev); + + indir_offset = offsetof(struct ethtool_rxfh, indir_size); + + if (copy_from_user(&user_indir_size, + useraddr + indir_offset, + sizeof(user_indir_size))) + return -EFAULT; + + if (copy_to_user(useraddr + indir_offset, + &dev_indir_size, sizeof(dev_indir_size))) + return -EFAULT; + + if (ops->get_rxfh_key_size) + dev_key_size = ops->get_rxfh_key_size(dev); + + if ((dev_key_size + dev_indir_size) == 0) + return -EOPNOTSUPP; + + key_offset = offsetof(struct ethtool_rxfh, key_size); + + if (copy_from_user(&user_key_size, + useraddr + key_offset, + sizeof(user_key_size))) + return -EFAULT; + + if (copy_to_user(useraddr + key_offset, + &dev_key_size, sizeof(dev_key_size))) + return -EFAULT; + + /* If the user buffer size is 0, this is just a query for the + * device table size and key size. Otherwise, if the User size is + * not equal to device table size or key size it's an error. + */ + if (!user_indir_size && !user_key_size) + return 0; + + if ((user_indir_size && (user_indir_size != dev_indir_size)) || + (user_key_size && (user_key_size != dev_key_size))) + return -EINVAL; + + indir_bytes = user_indir_size * sizeof(indir[0]); + total_size = indir_bytes + user_key_size; + rss_config = kzalloc(total_size, GFP_USER); + if (!rss_config) + return -ENOMEM; + + if (user_indir_size) + indir = (u32 *)rss_config; + + if (user_key_size) + hkey = rss_config + indir_bytes; + + ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey); + if (!ret) { + if (copy_to_user(useraddr + + offsetof(struct ethtool_rxfh, rss_config[0]), + rss_config, total_size)) ret = -EFAULT; + } + + kfree(rss_config); + + return ret; +} + +static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, + void __user *useraddr) +{ + int ret; + const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_rxnfc rx_rings; + u32 user_indir_size = 0, dev_indir_size = 0, i; + u32 user_key_size = 0, dev_key_size = 0; + u32 *indir = NULL, indir_bytes = 0; + u8 *hkey = NULL; + u8 *rss_config; + u32 indir_offset, key_offset; + u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]); + + if (!(ops->get_rxfh_indir_size || ops->get_rxfh_key_size) || + !ops->get_rxnfc || !ops->set_rxfh) + return -EOPNOTSUPP; + + if (ops->get_rxfh_indir_size) + dev_indir_size = ops->get_rxfh_indir_size(dev); + + indir_offset = offsetof(struct ethtool_rxfh, indir_size); + if (copy_from_user(&user_indir_size, + useraddr + indir_offset, + sizeof(user_indir_size))) + return -EFAULT; + + if (ops->get_rxfh_key_size) + dev_key_size = dev->ethtool_ops->get_rxfh_key_size(dev); + + if ((dev_key_size + dev_indir_size) == 0) + return -EOPNOTSUPP; + + key_offset = offsetof(struct ethtool_rxfh, key_size); + if (copy_from_user(&user_key_size, + useraddr + key_offset, + sizeof(user_key_size))) + return -EFAULT; + + /* If either indir or hash key is valid, proceed further. + */ + if ((user_indir_size && ((user_indir_size != 0xDEADBEEF) && + user_indir_size != dev_indir_size)) || + (user_key_size && (user_key_size != dev_key_size))) + return -EINVAL; + + if (user_indir_size != 0xDEADBEEF) + indir_bytes = dev_indir_size * sizeof(indir[0]); + + rss_config = kzalloc(indir_bytes + user_key_size, GFP_USER); + if (!rss_config) + return -ENOMEM; + + rx_rings.cmd = ETHTOOL_GRXRINGS; + ret = ops->get_rxnfc(dev, &rx_rings, NULL); + if (ret) + goto out; + + /* user_indir_size == 0 means reset the indir table to default. + * user_indir_size == 0xDEADBEEF means indir setting is not requested. + */ + if (user_indir_size && user_indir_size != 0xDEADBEEF) { + indir = (u32 *)rss_config; + ret = ethtool_copy_validate_indir(indir, + useraddr + rss_cfg_offset, + &rx_rings, + user_indir_size); + if (ret) goto out; - } + } else if (user_indir_size == 0) { + indir = (u32 *)rss_config; + for (i = 0; i < dev_indir_size; i++) + indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); + } - /* Validate ring indices */ - for (i = 0; i < dev_size; i++) { - if (indir[i] >= rx_rings.data) { - ret = -EINVAL; - goto out; - } + if (user_key_size) { + hkey = rss_config + indir_bytes; + if (copy_from_user(hkey, + useraddr + rss_cfg_offset + indir_bytes, + user_key_size)) { + ret = -EFAULT; + goto out; } } - ret = ops->set_rxfh_indir(dev, indir); + ret = ops->set_rxfh(dev, indir, hkey); out: - kfree(indir); + kfree(rss_config); return ret; } @@ -1491,6 +1677,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXCLSRULE: case ETHTOOL_GRXCLSRLALL: case ETHTOOL_GRXFHINDIR: + case ETHTOOL_GRSSH: case ETHTOOL_GFEATURES: case ETHTOOL_GCHANNELS: case ETHTOOL_GET_TS_INFO: @@ -1628,6 +1815,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXFHINDIR: rc = ethtool_set_rxfh_indir(dev, useraddr); break; + case ETHTOOL_GRSSH: + rc = ethtool_get_rxfh(dev, useraddr); + break; + case ETHTOOL_SRSSH: + rc = ethtool_set_rxfh(dev, useraddr); + break; case ETHTOOL_GFEATURES: rc = ethtool_get_features(dev, useraddr); break; -- cgit v1.2.3-71-gd317 From 4cd3675ebf74d7f559038ded6aa8088e4099a83d Mon Sep 17 00:00:00 2001 From: Chema Gonzalez Date: Mon, 21 Apr 2014 09:21:24 -0700 Subject: filter: added BPF random opcode Added a new ancillary load (bpf call in eBPF parlance) that produces a 32-bit random number. We are implementing it as an ancillary load (instead of an ISA opcode) because (a) it is simpler, (b) allows easy JITing, and (c) seems more in line with generic ISAs that do not have "get a random number" as a instruction, but as an OS call. The main use for this ancillary load is to perform random packet sampling. Signed-off-by: Chema Gonzalez Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- Documentation/networking/filter.txt | 13 +++++++++++++ include/linux/filter.h | 1 + include/uapi/linux/filter.h | 3 ++- net/core/filter.c | 12 ++++++++++++ tools/net/bpf_exp.l | 1 + tools/net/bpf_exp.y | 11 ++++++++++- 6 files changed, 39 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index 81f940f4e884..82e1cb0b3da8 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -281,6 +281,7 @@ Possible BPF extensions are shown in the following table: cpu raw_smp_processor_id() vlan_tci vlan_tx_tag_get(skb) vlan_pr vlan_tx_tag_present(skb) + rand prandom_u32() These extensions can also be prefixed with '#'. Examples for low-level BPF: @@ -308,6 +309,18 @@ Examples for low-level BPF: ret #-1 drop: ret #0 +** icmp random packet sampling, 1 in 4 + ldh [12] + jne #0x800, drop + ldb [23] + jneq #1, drop + # get a random uint32 number + ld rand + mod #4 + jneq #1, drop + ret #-1 + drop: ret #0 + ** SECCOMP filter example: ld [4] /* offsetof(struct seccomp_data, arch) */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 024fd03e5d18..759abf78dd61 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -223,6 +223,7 @@ enum { BPF_S_ANC_VLAN_TAG, BPF_S_ANC_VLAN_TAG_PRESENT, BPF_S_ANC_PAY_OFFSET, + BPF_S_ANC_RANDOM, }; #endif /* __LINUX_FILTER_H__ */ diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h index 8eb9ccaa5b48..253b4d42cf2b 100644 --- a/include/uapi/linux/filter.h +++ b/include/uapi/linux/filter.h @@ -130,7 +130,8 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ #define SKF_AD_VLAN_TAG 44 #define SKF_AD_VLAN_TAG_PRESENT 48 #define SKF_AD_PAY_OFFSET 52 -#define SKF_AD_MAX 56 +#define SKF_AD_RANDOM 56 +#define SKF_AD_MAX 60 #define SKF_NET_OFF (-0x100000) #define SKF_LL_OFF (-0x200000) diff --git a/net/core/filter.c b/net/core/filter.c index cd58614660cf..78a636e60a0b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -643,6 +643,12 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) return raw_smp_processor_id(); } +/* note that this only generates 32-bit random numbers */ +static u64 __get_random_u32(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +{ + return (u64)prandom_u32(); +} + /* Register mappings for user programs. */ #define A_REG 0 #define X_REG 7 @@ -779,6 +785,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_NLATTR: case SKF_AD_OFF + SKF_AD_NLATTR_NEST: case SKF_AD_OFF + SKF_AD_CPU: + case SKF_AD_OFF + SKF_AD_RANDOM: /* arg1 = ctx */ insn->code = BPF_ALU64 | BPF_MOV | BPF_X; insn->a_reg = ARG1_REG; @@ -812,6 +819,9 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_CPU: insn->imm = __get_raw_cpu_id - __bpf_call_base; break; + case SKF_AD_OFF + SKF_AD_RANDOM: + insn->imm = __get_random_u32 - __bpf_call_base; + break; } break; @@ -1362,6 +1372,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) ANCILLARY(VLAN_TAG); ANCILLARY(VLAN_TAG_PRESENT); ANCILLARY(PAY_OFFSET); + ANCILLARY(RANDOM); } /* ancillary operation unknown or unsupported */ @@ -1746,6 +1757,7 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_RANDOM] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN, [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND, [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND, diff --git a/tools/net/bpf_exp.l b/tools/net/bpf_exp.l index bf7be77ddd62..833a96611da6 100644 --- a/tools/net/bpf_exp.l +++ b/tools/net/bpf_exp.l @@ -92,6 +92,7 @@ extern void yyerror(const char *str); "#"?("cpu") { return K_CPU; } "#"?("vlan_tci") { return K_VLANT; } "#"?("vlan_pr") { return K_VLANP; } +"#"?("rand") { return K_RAND; } ":" { return ':'; } "," { return ','; } diff --git a/tools/net/bpf_exp.y b/tools/net/bpf_exp.y index d15efc989ef5..e6306c51c26f 100644 --- a/tools/net/bpf_exp.y +++ b/tools/net/bpf_exp.y @@ -56,7 +56,7 @@ static void bpf_set_jmp_label(char *label, enum jmp_type type); %token OP_LDXI %token K_PKT_LEN K_PROTO K_TYPE K_NLATTR K_NLATTR_NEST K_MARK K_QUEUE K_HATYPE -%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_POFF +%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_POFF K_RAND %token ':' ',' '[' ']' '(' ')' 'x' 'a' '+' 'M' '*' '&' '#' '%' @@ -164,6 +164,9 @@ ldb | OP_LDB K_POFF { bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_PAY_OFFSET); } + | OP_LDB K_RAND { + bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, + SKF_AD_OFF + SKF_AD_RANDOM); } ; ldh @@ -212,6 +215,9 @@ ldh | OP_LDH K_POFF { bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_PAY_OFFSET); } + | OP_LDH K_RAND { + bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, + SKF_AD_OFF + SKF_AD_RANDOM); } ; ldi @@ -265,6 +271,9 @@ ld | OP_LD K_POFF { bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_PAY_OFFSET); } + | OP_LD K_RAND { + bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, + SKF_AD_OFF + SKF_AD_RANDOM); } | OP_LD 'M' '[' number ']' { bpf_set_curr_instr(BPF_LD | BPF_MEM, 0, 0, $4); } | OP_LD '[' 'x' '+' number ']' { -- cgit v1.2.3-71-gd317 From 4f520900522fd596e336c07e9aafd5b7a9564235 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Tue, 22 Apr 2014 21:31:54 -0400 Subject: netlink: have netlink per-protocol bind function return an error code. Have the netlink per-protocol optional bind function return an int error code rather than void to signal a failure. This will enable netlink protocols to perform extra checks including capabilities and permissions verifications when updating memberships in multicast groups. In netlink_bind() and netlink_setsockopt() the call to the per-protocol bind function was moved above the multicast group update to prevent any access to the multicast socket groups before checking with the per-protocol bind function. This will enable the per-protocol bind function to be used to check permissions which could be denied before making them available, and to avoid the messy job of undoing the addition should the per-protocol bind function fail. The netfilter subsystem seems to be the only one currently using the per-protocol bind function. Signed-off-by: Richard Guy Briggs Signed-off-by: David S. Miller --- include/linux/netlink.h | 3 ++- net/netfilter/nfnetlink.c | 3 ++- net/netlink/af_netlink.c | 68 +++++++++++++++++++++++++++++++++-------------- net/netlink/af_netlink.h | 6 +++-- 4 files changed, 56 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index aad8eeaf416d..5146ce066498 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -45,7 +45,8 @@ struct netlink_kernel_cfg { unsigned int flags; void (*input)(struct sk_buff *skb); struct mutex *cb_mutex; - void (*bind)(int group); + int (*bind)(int group); + void (*unbind)(int group); bool (*compare)(struct net *net, struct sock *sk); }; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 0df800a454ec..6e42dcfad40a 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -400,7 +400,7 @@ static void nfnetlink_rcv(struct sk_buff *skb) } #ifdef CONFIG_MODULES -static void nfnetlink_bind(int group) +static int nfnetlink_bind(int group) { const struct nfnetlink_subsystem *ss; int type = nfnl_group2type[group]; @@ -410,6 +410,7 @@ static void nfnetlink_bind(int group) rcu_read_unlock(); if (!ss) request_module("nfnetlink-subsys-%d", type); + return 0; } #endif diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 894cda0206bb..7e8d229bc010 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1206,7 +1206,8 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, struct module *module = NULL; struct mutex *cb_mutex; struct netlink_sock *nlk; - void (*bind)(int group); + int (*bind)(int group); + void (*unbind)(int group); int err = 0; sock->state = SS_UNCONNECTED; @@ -1232,6 +1233,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, err = -EPROTONOSUPPORT; cb_mutex = nl_table[protocol].cb_mutex; bind = nl_table[protocol].bind; + unbind = nl_table[protocol].unbind; netlink_unlock_table(); if (err < 0) @@ -1248,6 +1250,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, nlk = nlk_sk(sock->sk); nlk->module = module; nlk->netlink_bind = bind; + nlk->netlink_unbind = unbind; out: return err; @@ -1301,6 +1304,7 @@ static int netlink_release(struct socket *sock) kfree_rcu(old, rcu); nl_table[sk->sk_protocol].module = NULL; nl_table[sk->sk_protocol].bind = NULL; + nl_table[sk->sk_protocol].unbind = NULL; nl_table[sk->sk_protocol].flags = 0; nl_table[sk->sk_protocol].registered = 0; } @@ -1411,6 +1415,19 @@ static int netlink_realloc_groups(struct sock *sk) return err; } +static void netlink_unbind(int group, long unsigned int groups, + struct netlink_sock *nlk) +{ + int undo; + + if (!nlk->netlink_unbind) + return; + + for (undo = 0; undo < group; undo++) + if (test_bit(group, &groups)) + nlk->netlink_unbind(undo); +} + static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { @@ -1419,6 +1436,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err; + long unsigned int groups = nladdr->nl_groups; if (addr_len < sizeof(struct sockaddr_nl)) return -EINVAL; @@ -1427,7 +1445,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, return -EINVAL; /* Only superuser is allowed to listen multicasts */ - if (nladdr->nl_groups) { + if (groups) { if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV)) return -EPERM; err = netlink_realloc_groups(sk); @@ -1435,37 +1453,45 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, return err; } - if (nlk->portid) { + if (nlk->portid) if (nladdr->nl_pid != nlk->portid) return -EINVAL; - } else { + + if (nlk->netlink_bind && groups) { + int group; + + for (group = 0; group < nlk->ngroups; group++) { + if (!test_bit(group, &groups)) + continue; + err = nlk->netlink_bind(group); + if (!err) + continue; + netlink_unbind(group, groups, nlk); + return err; + } + } + + if (!nlk->portid) { err = nladdr->nl_pid ? netlink_insert(sk, net, nladdr->nl_pid) : netlink_autobind(sock); - if (err) + if (err) { + netlink_unbind(nlk->ngroups - 1, groups, nlk); return err; + } } - if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) + if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) return 0; netlink_table_grab(); netlink_update_subscriptions(sk, nlk->subscriptions + - hweight32(nladdr->nl_groups) - + hweight32(groups) - hweight32(nlk->groups[0])); - nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups; + nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | groups; netlink_update_listeners(sk); netlink_table_ungrab(); - if (nlk->netlink_bind && nlk->groups[0]) { - int i; - - for (i = 0; i < nlk->ngroups; i++) { - if (test_bit(i, nlk->groups)) - nlk->netlink_bind(i); - } - } - return 0; } @@ -2103,14 +2129,16 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, return err; if (!val || val - 1 >= nlk->ngroups) return -EINVAL; + if (nlk->netlink_bind) { + err = nlk->netlink_bind(val); + if (err) + return err; + } netlink_table_grab(); netlink_update_socket_mc(nlk, val, optname == NETLINK_ADD_MEMBERSHIP); netlink_table_ungrab(); - if (nlk->netlink_bind) - nlk->netlink_bind(val); - err = 0; break; } diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index ed13a790b00e..0b59d441f5b6 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -38,7 +38,8 @@ struct netlink_sock { struct mutex *cb_mutex; struct mutex cb_def_mutex; void (*netlink_rcv)(struct sk_buff *skb); - void (*netlink_bind)(int group); + int (*netlink_bind)(int group); + void (*netlink_unbind)(int group); struct module *module; #ifdef CONFIG_NETLINK_MMAP struct mutex pg_vec_lock; @@ -74,7 +75,8 @@ struct netlink_table { unsigned int groups; struct mutex *cb_mutex; struct module *module; - void (*bind)(int group); + int (*bind)(int group); + void (*unbind)(int group); bool (*compare)(struct net *net, struct sock *sock); int registered; }; -- cgit v1.2.3-71-gd317 From 3a101b8de0d39403b2c7e5c23fd0b005668acf48 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Tue, 22 Apr 2014 21:31:56 -0400 Subject: audit: add netlink audit protocol bind to check capabilities on multicast join Register a netlink per-protocol bind fuction for audit to check userspace process capabilities before allowing a multicast group connection. Signed-off-by: Richard Guy Briggs Signed-off-by: David S. Miller --- include/uapi/linux/capability.h | 7 ++++++- kernel/audit.c | 10 ++++++++++ security/selinux/include/classmap.h | 2 +- 3 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index 154dd6d3c8fe..12c37a197d24 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -347,7 +347,12 @@ struct vfs_cap_data { #define CAP_BLOCK_SUSPEND 36 -#define CAP_LAST_CAP CAP_BLOCK_SUSPEND +/* Allow reading the audit log via multicast netlink socket */ + +#define CAP_AUDIT_READ 37 + + +#define CAP_LAST_CAP CAP_AUDIT_READ #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) diff --git a/kernel/audit.c b/kernel/audit.c index 7c2893602d06..223cb746f141 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1076,10 +1076,20 @@ static void audit_receive(struct sk_buff *skb) mutex_unlock(&audit_cmd_mutex); } +/* Run custom bind function on netlink socket group connect or bind requests. */ +static int audit_bind(int group) +{ + if (!capable(CAP_AUDIT_READ)) + return -EPERM; + + return 0; +} + static int __net_init audit_net_init(struct net *net) { struct netlink_kernel_cfg cfg = { .input = audit_receive, + .bind = audit_bind, }; struct audit_net *aunet = net_generic(net, audit_net_id); diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index 14d04e63b1f0..be491a74c1ed 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -147,7 +147,7 @@ struct security_class_mapping secclass_map[] = { { "peer", { "recv", NULL } }, { "capability2", { "mac_override", "mac_admin", "syslog", "wake_alarm", "block_suspend", - NULL } }, + "audit_read", NULL } }, { "kernel_service", { "use_as_override", "create_files_as", NULL } }, { "tun_socket", { COMMON_SOCK_PERMS, "attach_queue", NULL } }, -- cgit v1.2.3-71-gd317 From 451f921639fea4600dfb9ab2889332bdcc7b48d3 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Tue, 22 Apr 2014 21:31:57 -0400 Subject: audit: add netlink multicast group for log read Add a netlink multicast socket with one group to kaudit for "best-effort" delivery to read-only userspace clients such as systemd, in addition to the existing bidirectional unicast auditd userspace client. Currently, auditd is intended to use the CAP_AUDIT_CONTROL and CAP_AUDIT_WRITE capabilities, but actually uses CAP_NET_ADMIN. The CAP_AUDIT_READ capability is added for use by read-only AUDIT_NLGRP_READLOG netlink multicast group clients to the kaudit subsystem. This will safely give access to services such as systemd to consume audit logs while ensuring write access remains restricted for integrity. Signed-off-by: Richard Guy Briggs Signed-off-by: David S. Miller --- include/uapi/linux/audit.h | 8 ++++++++ kernel/audit.c | 51 ++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 55 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 11917f747cb4..dfa4c860ccef 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -373,6 +373,14 @@ enum { */ #define AUDIT_MESSAGE_TEXT_MAX 8560 +/* Multicast Netlink socket groups (default up to 32) */ +enum audit_nlgrps { + AUDIT_NLGRP_NONE, /* Group 0 not used */ + AUDIT_NLGRP_READLOG, /* "best effort" read only socket */ + __AUDIT_NLGRP_MAX +}; +#define AUDIT_NLGRP_MAX (__AUDIT_NLGRP_MAX - 1) + struct audit_status { __u32 mask; /* Bit mask for valid entries */ __u32 enabled; /* 1 = enabled, 0 = disabled */ diff --git a/kernel/audit.c b/kernel/audit.c index 223cb746f141..d272cc11dff4 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -423,6 +423,35 @@ static void kauditd_send_skb(struct sk_buff *skb) consume_skb(skb); } +/* + * kauditd_send_multicast_skb - send the skb to multicast userspace listeners + * + * This function doesn't consume an skb as might be expected since it has to + * copy it anyways. + */ +static void kauditd_send_multicast_skb(struct sk_buff *skb) +{ + struct sk_buff *copy; + struct audit_net *aunet = net_generic(&init_net, audit_net_id); + struct sock *sock = aunet->nlsk; + + /* + * The seemingly wasteful skb_copy() rather than bumping the refcount + * using skb_get() is necessary because non-standard mods are made to + * the skb by the original kaudit unicast socket send routine. The + * existing auditd daemon assumes this breakage. Fixing this would + * require co-ordinating a change in the established protocol between + * the kaudit kernel subsystem and the auditd userspace code. There is + * no reason for new multicast clients to continue with this + * non-compliance. + */ + copy = skb_copy(skb, GFP_KERNEL); + if (!copy) + return; + + nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, GFP_KERNEL); +} + /* * flush_hold_queue - empty the hold queue if auditd appears * @@ -1090,6 +1119,8 @@ static int __net_init audit_net_init(struct net *net) struct netlink_kernel_cfg cfg = { .input = audit_receive, .bind = audit_bind, + .flags = NL_CFG_F_NONROOT_RECV, + .groups = AUDIT_NLGRP_MAX, }; struct audit_net *aunet = net_generic(net, audit_net_id); @@ -1911,10 +1942,10 @@ out: * audit_log_end - end one audit record * @ab: the audit_buffer * - * The netlink_* functions cannot be called inside an irq context, so - * the audit buffer is placed on a queue and a tasklet is scheduled to - * remove them from the queue outside the irq context. May be called in - * any context. + * netlink_unicast() cannot be called inside an irq context because it blocks + * (last arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed + * on a queue and a tasklet is scheduled to remove them from the queue outside + * the irq context. May be called in any context. */ void audit_log_end(struct audit_buffer *ab) { @@ -1924,6 +1955,18 @@ void audit_log_end(struct audit_buffer *ab) audit_log_lost("rate limit exceeded"); } else { struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); + + kauditd_send_multicast_skb(ab->skb); + + /* + * The original kaudit unicast socket sends up messages with + * nlmsg_len set to the payload length rather than the entire + * message length. This breaks the standard set by netlink. + * The existing auditd daemon assumes this breakage. Fixing + * this would require co-ordinating a change in the established + * protocol between the kaudit kernel subsystem and the auditd + * userspace code. + */ nlh->nlmsg_len = ab->skb->len - NLMSG_HDRLEN; if (audit_pid) { -- cgit v1.2.3-71-gd317 From 2e71029e2c32ecd59a2e8f351517bfbbad42ac11 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 22 Apr 2014 21:48:30 +0900 Subject: xfrm: Remove useless xfrm_audit struct. Commit f1370cc4 "xfrm: Remove useless secid field from xfrm_audit." changed "struct xfrm_audit" to have either { audit_get_loginuid(current) / audit_get_sessionid(current) } or { INVALID_UID / -1 } pair. This means that we can represent "struct xfrm_audit" as "bool". This patch replaces "struct xfrm_audit" argument with "bool". Signed-off-by: Tetsuo Handa Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 42 +++++++++++++++++++----------------------- net/key/af_key.c | 30 +++++++----------------------- net/xfrm/xfrm_policy.c | 40 ++++++++++++++-------------------------- net/xfrm/xfrm_state.c | 34 ++++++++++++---------------------- net/xfrm/xfrm_user.c | 38 ++++++++------------------------------ 5 files changed, 60 insertions(+), 124 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 882889eb156b..721e9c3b11bd 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -691,12 +691,6 @@ struct xfrm_spi_skb_cb { #define XFRM_SPI_SKB_CB(__skb) ((struct xfrm_spi_skb_cb *)&((__skb)->cb[0])) -/* Audit Information */ -struct xfrm_audit { - kuid_t loginuid; - unsigned int sessionid; -}; - #ifdef CONFIG_AUDITSYSCALL static inline struct audit_buffer *xfrm_audit_start(const char *op) { @@ -712,22 +706,24 @@ static inline struct audit_buffer *xfrm_audit_start(const char *op) return audit_buf; } -static inline void xfrm_audit_helper_usrinfo(kuid_t auid, unsigned int ses, +static inline void xfrm_audit_helper_usrinfo(bool task_valid, struct audit_buffer *audit_buf) { - audit_log_format(audit_buf, " auid=%u ses=%u", - from_kuid(&init_user_ns, auid), ses); + const unsigned int auid = from_kuid(&init_user_ns, task_valid ? + audit_get_loginuid(current) : + INVALID_UID); + const unsigned int ses = task_valid ? audit_get_sessionid(current) : + (unsigned int) -1; + + audit_log_format(audit_buf, " auid=%u ses=%u", auid, ses); audit_log_task_context(audit_buf); } -void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, kuid_t auid, - unsigned int ses); -void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, kuid_t auid, - unsigned int ses); -void xfrm_audit_state_add(struct xfrm_state *x, int result, kuid_t auid, - unsigned int ses); -void xfrm_audit_state_delete(struct xfrm_state *x, int result, kuid_t auid, - unsigned int ses); +void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid); +void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, + bool task_valid); +void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid); +void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid); void xfrm_audit_state_replay_overflow(struct xfrm_state *x, struct sk_buff *skb); void xfrm_audit_state_replay(struct xfrm_state *x, struct sk_buff *skb, @@ -740,22 +736,22 @@ void xfrm_audit_state_icvfail(struct xfrm_state *x, struct sk_buff *skb, #else static inline void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, - kuid_t auid, unsigned int ses) + bool task_valid) { } static inline void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, - kuid_t auid, unsigned int ses) + bool task_valid) { } static inline void xfrm_audit_state_add(struct xfrm_state *x, int result, - kuid_t auid, unsigned int ses) + bool task_valid) { } static inline void xfrm_audit_state_delete(struct xfrm_state *x, int result, - kuid_t auid, unsigned int ses) + bool task_valid) { } @@ -1499,7 +1495,7 @@ struct xfrmk_spdinfo { struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq); int xfrm_state_delete(struct xfrm_state *x); -int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info); +int xfrm_state_flush(struct net *net, u8 proto, bool task_valid); void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si); void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq); @@ -1594,7 +1590,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, int *err); struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8, int dir, u32 id, int delete, int *err); -int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info); +int xfrm_policy_flush(struct net *net, u8 type, bool task_valid); u32 xfrm_get_acqseq(void); int verify_spi_info(u8 proto, u32 min, u32 max); int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi); diff --git a/net/key/af_key.c b/net/key/af_key.c index d66ff72adefb..b47f8e542aae 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1476,9 +1476,7 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, const struct sadb_msg else err = xfrm_state_update(x); - xfrm_audit_state_add(x, err ? 0 : 1, - audit_get_loginuid(current), - audit_get_sessionid(current)); + xfrm_audit_state_add(x, err ? 0 : 1, true); if (err < 0) { x->km.state = XFRM_STATE_DEAD; @@ -1532,9 +1530,7 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, const struct sadb_ c.event = XFRM_MSG_DELSA; km_state_notify(x, &c); out: - xfrm_audit_state_delete(x, err ? 0 : 1, - audit_get_loginuid(current), - audit_get_sessionid(current)); + xfrm_audit_state_delete(x, err ? 0 : 1, true); xfrm_state_put(x); return err; @@ -1726,16 +1722,13 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m struct net *net = sock_net(sk); unsigned int proto; struct km_event c; - struct xfrm_audit audit_info; int err, err2; proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) return -EINVAL; - audit_info.loginuid = audit_get_loginuid(current); - audit_info.sessionid = audit_get_sessionid(current); - err = xfrm_state_flush(net, proto, &audit_info); + err = xfrm_state_flush(net, proto, true); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { if (err == -ESRCH) /* empty table - go quietly */ @@ -2287,9 +2280,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_ err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp, hdr->sadb_msg_type != SADB_X_SPDUPDATE); - xfrm_audit_policy_add(xp, err ? 0 : 1, - audit_get_loginuid(current), - audit_get_sessionid(current)); + xfrm_audit_policy_add(xp, err ? 0 : 1, true); if (err) goto out; @@ -2371,9 +2362,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa if (xp == NULL) return -ENOENT; - xfrm_audit_policy_delete(xp, err ? 0 : 1, - audit_get_loginuid(current), - audit_get_sessionid(current)); + xfrm_audit_policy_delete(xp, err ? 0 : 1, true); if (err) goto out; @@ -2621,9 +2610,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_ return -ENOENT; if (delete) { - xfrm_audit_policy_delete(xp, err ? 0 : 1, - audit_get_loginuid(current), - audit_get_sessionid(current)); + xfrm_audit_policy_delete(xp, err ? 0 : 1, true); if (err) goto out; @@ -2732,12 +2719,9 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad { struct net *net = sock_net(sk); struct km_event c; - struct xfrm_audit audit_info; int err, err2; - audit_info.loginuid = audit_get_loginuid(current); - audit_info.sessionid = audit_get_sessionid(current); - err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); + err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { if (err == -ESRCH) /* empty table - old silent behavior */ diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index bd001b7062c0..375267d15c8f 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -769,7 +769,7 @@ EXPORT_SYMBOL(xfrm_policy_byid); #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int -xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) +xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid) { int dir, err = 0; @@ -783,9 +783,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi continue; err = security_xfrm_policy_delete(pol->security); if (err) { - xfrm_audit_policy_delete(pol, 0, - audit_info->loginuid, - audit_info->sessionid); + xfrm_audit_policy_delete(pol, 0, task_valid); return err; } } @@ -799,8 +797,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi pol->security); if (err) { xfrm_audit_policy_delete(pol, 0, - audit_info->loginuid, - audit_info->sessionid); + task_valid); return err; } } @@ -810,19 +807,19 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi } #else static inline int -xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) +xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid) { return 0; } #endif -int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) +int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) { int dir, err = 0, cnt = 0; write_lock_bh(&net->xfrm.xfrm_policy_lock); - err = xfrm_policy_flush_secctx_check(net, type, audit_info); + err = xfrm_policy_flush_secctx_check(net, type, task_valid); if (err) goto out; @@ -839,8 +836,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) write_unlock_bh(&net->xfrm.xfrm_policy_lock); cnt++; - xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, - audit_info->sessionid); + xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); @@ -859,9 +855,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) write_unlock_bh(&net->xfrm.xfrm_policy_lock); cnt++; - xfrm_audit_policy_delete(pol, 1, - audit_info->loginuid, - audit_info->sessionid); + xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); write_lock_bh(&net->xfrm.xfrm_policy_lock); @@ -2858,19 +2852,14 @@ out_byidx: static void xfrm_policy_fini(struct net *net) { - struct xfrm_audit audit_info; unsigned int sz; int dir; flush_work(&net->xfrm.policy_hash_work); #ifdef CONFIG_XFRM_SUB_POLICY - audit_info.loginuid = INVALID_UID; - audit_info.sessionid = (unsigned int)-1; - xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info); + xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false); #endif - audit_info.loginuid = INVALID_UID; - audit_info.sessionid = (unsigned int)-1; - xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); + xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false); WARN_ON(!list_empty(&net->xfrm.policy_all)); @@ -2985,15 +2974,14 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, } } -void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, - kuid_t auid, unsigned int sessionid) +void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SPD-add"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, sessionid, audit_buf); + xfrm_audit_helper_usrinfo(task_valid, audit_buf); audit_log_format(audit_buf, " res=%u", result); xfrm_audit_common_policyinfo(xp, audit_buf); audit_log_end(audit_buf); @@ -3001,14 +2989,14 @@ void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, - kuid_t auid, unsigned int sessionid) + bool task_valid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SPD-delete"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, sessionid, audit_buf); + xfrm_audit_helper_usrinfo(task_valid, audit_buf); audit_log_format(audit_buf, " res=%u", result); xfrm_audit_common_policyinfo(xp, audit_buf); audit_log_end(audit_buf); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index d91312b5ceb0..0ab54134bb40 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -463,9 +463,7 @@ expired: if (!err) km_state_expired(x, 1, 0); - xfrm_audit_state_delete(x, err ? 0 : 1, - audit_get_loginuid(current), - audit_get_sessionid(current)); + xfrm_audit_state_delete(x, err ? 0 : 1, true); out: spin_unlock(&x->lock); @@ -562,7 +560,7 @@ EXPORT_SYMBOL(xfrm_state_delete); #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int -xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info) +xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid) { int i, err = 0; @@ -572,9 +570,7 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audi hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto) && (err = security_xfrm_state_delete(x)) != 0) { - xfrm_audit_state_delete(x, 0, - audit_info->loginuid, - audit_info->sessionid); + xfrm_audit_state_delete(x, 0, task_valid); return err; } } @@ -584,18 +580,18 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audi } #else static inline int -xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info) +xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid) { return 0; } #endif -int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info) +int xfrm_state_flush(struct net *net, u8 proto, bool task_valid) { int i, err = 0, cnt = 0; spin_lock_bh(&net->xfrm.xfrm_state_lock); - err = xfrm_state_flush_secctx_check(net, proto, audit_info); + err = xfrm_state_flush_secctx_check(net, proto, task_valid); if (err) goto out; @@ -611,8 +607,7 @@ restart: err = xfrm_state_delete(x); xfrm_audit_state_delete(x, err ? 0 : 1, - audit_info->loginuid, - audit_info->sessionid); + task_valid); xfrm_state_put(x); if (!err) cnt++; @@ -2126,13 +2121,10 @@ out_bydst: void xfrm_state_fini(struct net *net) { - struct xfrm_audit audit_info; unsigned int sz; flush_work(&net->xfrm.state_hash_work); - audit_info.loginuid = INVALID_UID; - audit_info.sessionid = (unsigned int)-1; - xfrm_state_flush(net, IPSEC_PROTO_ANY, &audit_info); + xfrm_state_flush(net, IPSEC_PROTO_ANY, false); flush_work(&net->xfrm.state_gc_work); WARN_ON(!list_empty(&net->xfrm.state_all)); @@ -2195,30 +2187,28 @@ static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family, } } -void xfrm_audit_state_add(struct xfrm_state *x, int result, - kuid_t auid, unsigned int sessionid) +void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SAD-add"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, sessionid, audit_buf); + xfrm_audit_helper_usrinfo(task_valid, audit_buf); xfrm_audit_helper_sainfo(x, audit_buf); audit_log_format(audit_buf, " res=%u", result); audit_log_end(audit_buf); } EXPORT_SYMBOL_GPL(xfrm_audit_state_add); -void xfrm_audit_state_delete(struct xfrm_state *x, int result, - kuid_t auid, unsigned int sessionid) +void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SAD-delete"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, sessionid, audit_buf); + xfrm_audit_helper_usrinfo(task_valid, audit_buf); xfrm_audit_helper_sainfo(x, audit_buf); audit_log_format(audit_buf, " res=%u", result); audit_log_end(audit_buf); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index d6409d927b82..3d4b4c464091 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -597,8 +597,6 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_state *x; int err; struct km_event c; - kuid_t loginuid = audit_get_loginuid(current); - unsigned int sessionid = audit_get_sessionid(current); err = verify_newsa_info(p, attrs); if (err) @@ -614,7 +612,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, else err = xfrm_state_update(x); - xfrm_audit_state_add(x, err ? 0 : 1, loginuid, sessionid); + xfrm_audit_state_add(x, err ? 0 : 1, true); if (err < 0) { x->km.state = XFRM_STATE_DEAD; @@ -674,8 +672,6 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, int err = -ESRCH; struct km_event c; struct xfrm_usersa_id *p = nlmsg_data(nlh); - kuid_t loginuid = audit_get_loginuid(current); - unsigned int sessionid = audit_get_sessionid(current); x = xfrm_user_state_lookup(net, p, attrs, &err); if (x == NULL) @@ -700,7 +696,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, km_state_notify(x, &c); out: - xfrm_audit_state_delete(x, err ? 0 : 1, loginuid, sessionid); + xfrm_audit_state_delete(x, err ? 0 : 1, true); xfrm_state_put(x); return err; } @@ -1410,8 +1406,6 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct km_event c; int err; int excl; - kuid_t loginuid = audit_get_loginuid(current); - unsigned int sessionid = audit_get_sessionid(current); err = verify_newpolicy_info(p); if (err) @@ -1430,7 +1424,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, * a type XFRM_MSG_UPDPOLICY - JHS */ excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; err = xfrm_policy_insert(p->dir, xp, excl); - xfrm_audit_policy_add(xp, err ? 0 : 1, loginuid, sessionid); + xfrm_audit_policy_add(xp, err ? 0 : 1, true); if (err) { security_xfrm_policy_free(xp->security); @@ -1667,10 +1661,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, NETLINK_CB(skb).portid); } } else { - kuid_t loginuid = audit_get_loginuid(current); - unsigned int sessionid = audit_get_sessionid(current); - - xfrm_audit_policy_delete(xp, err ? 0 : 1, loginuid, sessionid); + xfrm_audit_policy_delete(xp, err ? 0 : 1, true); if (err != 0) goto out; @@ -1695,12 +1686,9 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct km_event c; struct xfrm_usersa_flush *p = nlmsg_data(nlh); - struct xfrm_audit audit_info; int err; - audit_info.loginuid = audit_get_loginuid(current); - audit_info.sessionid = audit_get_sessionid(current); - err = xfrm_state_flush(net, p->proto, &audit_info); + err = xfrm_state_flush(net, p->proto, true); if (err) { if (err == -ESRCH) /* empty table */ return 0; @@ -1884,15 +1872,12 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct km_event c; u8 type = XFRM_POLICY_TYPE_MAIN; int err; - struct xfrm_audit audit_info; err = copy_from_user_policy_type(&type, attrs); if (err) return err; - audit_info.loginuid = audit_get_loginuid(current); - audit_info.sessionid = audit_get_sessionid(current); - err = xfrm_policy_flush(net, type, &audit_info); + err = xfrm_policy_flush(net, type, true); if (err) { if (err == -ESRCH) /* empty table */ return 0; @@ -1958,12 +1943,8 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, err = 0; if (up->hard) { - kuid_t loginuid = audit_get_loginuid(current); - unsigned int sessionid = audit_get_sessionid(current); - xfrm_policy_delete(xp, p->dir); - xfrm_audit_policy_delete(xp, 1, loginuid, sessionid); - + xfrm_audit_policy_delete(xp, 1, true); } else { // reset the timers here? WARN(1, "Dont know what to do with soft policy expire\n"); @@ -1999,11 +1980,8 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, km_state_expired(x, ue->hard, nlh->nlmsg_pid); if (ue->hard) { - kuid_t loginuid = audit_get_loginuid(current); - unsigned int sessionid = audit_get_sessionid(current); - __xfrm_state_delete(x); - xfrm_audit_state_delete(x, 1, loginuid, sessionid); + xfrm_audit_state_delete(x, 1, true); } err = 0; out: -- cgit v1.2.3-71-gd317 From fc8fd40eb29a936cc689d0008863d39a67741c67 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 3 Nov 2013 20:46:34 +0100 Subject: drm: Rip out totally bogus vga_switcheroo->can_switch locking So I just wanted to add a new field to struct drm_device and accidentally stumbled over something. According to comments dev->open_count is protected by dev->count_lock, but that's totally not the case. It's protected by drm_global_mutex. Unfortunately the vga switcheroo callbacks took this comment at face value. The problem is that we can't just take the drm_global_mutex because: - It would lead to a locking inversion with the driver load/unload paths. - It wouldn't actually protect anything, for that we'd need to wrap the entire vga switcheroo code in the drm_global_mutex. And I'm not sure whether that would actually solve anything. What we probably want is a try_to_grab_switcheroo reference kind of thing which is used in the driver's ->open callback. Then we could move all that ->can_switch madness into the vga switcheroo core where it really belongs. But since that would amount to real work take the easy way out and just add a comment. It's definitely not going to make anything worse since doing switcheroo state changes while restarting X just isn't recommended. Even though the delayed switching code does exactly that. v2: - Simplify the ->can_switch implementations more (Thierry) - Fix comment about the dev->open_count locking (Thierry) Cc: Thierry Reding Reviewed-by: Laurent Pinchart (v1) Reviewed-by: Thierry Reding Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 11 ++++++----- drivers/gpu/drm/nouveau/nouveau_vga.c | 11 ++++++----- drivers/gpu/drm/radeon/radeon_device.c | 11 ++++++----- include/drm/drmP.h | 2 +- 4 files changed, 19 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 96177eec0a0e..283ff06001bc 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1277,12 +1277,13 @@ static void i915_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_ static bool i915_switcheroo_can_switch(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); - bool can_switch; - spin_lock(&dev->count_lock); - can_switch = (dev->open_count == 0); - spin_unlock(&dev->count_lock); - return can_switch; + /* + * FIXME: open_count is protected by drm_global_mutex but that would lead to + * locking inversion with the driver load path. And the access here is + * completely racy anyway. So don't bother with locking for now. + */ + return dev->open_count == 0; } static const struct vga_switcheroo_client_ops i915_switcheroo_ops = { diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c b/drivers/gpu/drm/nouveau/nouveau_vga.c index fb84da3cb50d..4f4c3fec6916 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vga.c +++ b/drivers/gpu/drm/nouveau/nouveau_vga.c @@ -64,12 +64,13 @@ static bool nouveau_switcheroo_can_switch(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); - bool can_switch; - spin_lock(&dev->count_lock); - can_switch = (dev->open_count == 0); - spin_unlock(&dev->count_lock); - return can_switch; + /* + * FIXME: open_count is protected by drm_global_mutex but that would lead to + * locking inversion with the driver load path. And the access here is + * completely racy anyway. So don't bother with locking for now. + */ + return dev->open_count == 0; } static const struct vga_switcheroo_client_ops diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 511fe26198e4..9aa1afd1786e 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1125,12 +1125,13 @@ static void radeon_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero static bool radeon_switcheroo_can_switch(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); - bool can_switch; - spin_lock(&dev->count_lock); - can_switch = (dev->open_count == 0); - spin_unlock(&dev->count_lock); - return can_switch; + /* + * FIXME: open_count is protected by drm_global_mutex but that would lead to + * locking inversion with the driver load path. And the access here is + * completely racy anyway. So don't bother with locking for now. + */ + return dev->open_count == 0; } static const struct vga_switcheroo_client_ops radeon_switcheroo_ops = { diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 9f1fb8d36b67..a20d882ca265 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1076,7 +1076,7 @@ struct drm_device { /** \name Usage Counters */ /*@{ */ - int open_count; /**< Outstanding files open */ + int open_count; /**< Outstanding files open, protected by drm_global_mutex. */ int buf_use; /**< Buffers in use -- cannot alloc */ atomic_t buf_alloc; /**< Buffer allocation in progress */ /*@} */ -- cgit v1.2.3-71-gd317 From 2177a2182f3f375f64d9938dd884895c3872c380 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 16 Dec 2013 11:21:06 +0100 Subject: drm: rename dev->count_lock to dev->buf_lock Since really that's all it protects - legacy horror stories in drm_bufs.c. Since I don't want to waste any more time on this I didn't bother to actually look at what it protects in there, but it's at least contained now. v2: Move the spurious hunk to the right patch (Thierry). Cc: Thierry Reding Reviewed-by: Thierry Reding Reviewed-by: Laurent Pinchart Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_bufs.c | 32 ++++++++++++++++---------------- drivers/gpu/drm/drm_stub.c | 2 +- include/drm/drmP.h | 2 +- 3 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c index edec31fe3fed..ef7f0199a0f1 100644 --- a/drivers/gpu/drm/drm_bufs.c +++ b/drivers/gpu/drm/drm_bufs.c @@ -656,13 +656,13 @@ int drm_addbufs_agp(struct drm_device * dev, struct drm_buf_desc * request) DRM_DEBUG("zone invalid\n"); return -EINVAL; } - spin_lock(&dev->count_lock); + spin_lock(&dev->buf_lock); if (dev->buf_use) { - spin_unlock(&dev->count_lock); + spin_unlock(&dev->buf_lock); return -EBUSY; } atomic_inc(&dev->buf_alloc); - spin_unlock(&dev->count_lock); + spin_unlock(&dev->buf_lock); mutex_lock(&dev->struct_mutex); entry = &dma->bufs[order]; @@ -805,13 +805,13 @@ int drm_addbufs_pci(struct drm_device * dev, struct drm_buf_desc * request) page_order = order - PAGE_SHIFT > 0 ? order - PAGE_SHIFT : 0; total = PAGE_SIZE << page_order; - spin_lock(&dev->count_lock); + spin_lock(&dev->buf_lock); if (dev->buf_use) { - spin_unlock(&dev->count_lock); + spin_unlock(&dev->buf_lock); return -EBUSY; } atomic_inc(&dev->buf_alloc); - spin_unlock(&dev->count_lock); + spin_unlock(&dev->buf_lock); mutex_lock(&dev->struct_mutex); entry = &dma->bufs[order]; @@ -1015,13 +1015,13 @@ static int drm_addbufs_sg(struct drm_device * dev, struct drm_buf_desc * request if (order < DRM_MIN_ORDER || order > DRM_MAX_ORDER) return -EINVAL; - spin_lock(&dev->count_lock); + spin_lock(&dev->buf_lock); if (dev->buf_use) { - spin_unlock(&dev->count_lock); + spin_unlock(&dev->buf_lock); return -EBUSY; } atomic_inc(&dev->buf_alloc); - spin_unlock(&dev->count_lock); + spin_unlock(&dev->buf_lock); mutex_lock(&dev->struct_mutex); entry = &dma->bufs[order]; @@ -1175,7 +1175,7 @@ int drm_addbufs(struct drm_device *dev, void *data, * \param arg pointer to a drm_buf_info structure. * \return zero on success or a negative number on failure. * - * Increments drm_device::buf_use while holding the drm_device::count_lock + * Increments drm_device::buf_use while holding the drm_device::buf_lock * lock, preventing of allocating more buffers after this call. Information * about each requested buffer is then copied into user space. */ @@ -1196,13 +1196,13 @@ int drm_infobufs(struct drm_device *dev, void *data, if (!dma) return -EINVAL; - spin_lock(&dev->count_lock); + spin_lock(&dev->buf_lock); if (atomic_read(&dev->buf_alloc)) { - spin_unlock(&dev->count_lock); + spin_unlock(&dev->buf_lock); return -EBUSY; } ++dev->buf_use; /* Can't allocate more after this call */ - spin_unlock(&dev->count_lock); + spin_unlock(&dev->buf_lock); for (i = 0, count = 0; i < DRM_MAX_ORDER + 1; i++) { if (dma->bufs[i].buf_count) @@ -1381,13 +1381,13 @@ int drm_mapbufs(struct drm_device *dev, void *data, if (!dma) return -EINVAL; - spin_lock(&dev->count_lock); + spin_lock(&dev->buf_lock); if (atomic_read(&dev->buf_alloc)) { - spin_unlock(&dev->count_lock); + spin_unlock(&dev->buf_lock); return -EBUSY; } dev->buf_use++; /* Can't allocate more after this call */ - spin_unlock(&dev->count_lock); + spin_unlock(&dev->buf_lock); if (request->count >= dma->buf_count) { if ((dev->agp && (dma->flags & _DRM_DMA_USE_AGP)) diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c index 4c24c3ac1efa..5394b201c3d0 100644 --- a/drivers/gpu/drm/drm_stub.c +++ b/drivers/gpu/drm/drm_stub.c @@ -569,7 +569,7 @@ struct drm_device *drm_dev_alloc(struct drm_driver *driver, INIT_LIST_HEAD(&dev->maplist); INIT_LIST_HEAD(&dev->vblank_event_list); - spin_lock_init(&dev->count_lock); + spin_lock_init(&dev->buf_lock); spin_lock_init(&dev->event_lock); mutex_init(&dev->struct_mutex); mutex_init(&dev->ctxlist_mutex); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index a20d882ca265..85682d959c7e 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1069,7 +1069,6 @@ struct drm_device { /** \name Locks */ /*@{ */ - spinlock_t count_lock; /**< For inuse, drm_device::open_count, drm_device::buf_use */ struct mutex struct_mutex; /**< For others */ struct mutex master_mutex; /**< For drm_minor::master and drm_file::is_master */ /*@} */ @@ -1077,6 +1076,7 @@ struct drm_device { /** \name Usage Counters */ /*@{ */ int open_count; /**< Outstanding files open, protected by drm_global_mutex. */ + spinlock_t buf_lock; /**< For drm_device::buf_use and a few other things. */ int buf_use; /**< Buffers in use -- cannot alloc */ atomic_t buf_alloc; /**< Buffer allocation in progress */ /*@} */ -- cgit v1.2.3-71-gd317 From 7c1a38e391745cca72627979e5e02924bed5b43d Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 16 Dec 2013 11:21:15 +0100 Subject: drm/irq: track the irq installed in drm_irq_install in dev->irq To get rid of the dev->bus->get_irq callback we need to pass in the desired irq explicitly into drm_irq_install. To avoid having to do the same for drm_irq_unistall just track it internally. That leaves drivers with less room to botch things up. v2: Add the hunk lost in an earlier patch to this one (Thierry). v3: Fix up the totally fumbled logic in drm_irq_install and use the local variable consistently. Spotted by both Thierry and Laurent. Shame on me for failing to properly test the rebase version of this patch ... Cc: Thierry Reding Cc: Laurent Pinchart Reviewed-by: Thierry Reding Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 18 +++++++++++------- include/drm/drmP.h | 2 ++ 2 files changed, 13 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 589e865832cd..7cf407bbfed5 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -249,14 +249,16 @@ static inline int drm_dev_to_irq(struct drm_device *dev) */ int drm_irq_install(struct drm_device *dev) { - int ret; + int ret, irq; unsigned long sh_flags = 0; char *irqname; + irq = drm_dev_to_irq(dev); + if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ)) return -EINVAL; - if (drm_dev_to_irq(dev) == 0) + if (irq == 0) return -EINVAL; /* Driver must have been initialized */ @@ -267,7 +269,7 @@ int drm_irq_install(struct drm_device *dev) return -EBUSY; dev->irq_enabled = true; - DRM_DEBUG("irq=%d\n", drm_dev_to_irq(dev)); + DRM_DEBUG("irq=%d\n", irq); /* Before installing handler */ if (dev->driver->irq_preinstall) @@ -282,7 +284,7 @@ int drm_irq_install(struct drm_device *dev) else irqname = dev->driver->name; - ret = request_irq(drm_dev_to_irq(dev), dev->driver->irq_handler, + ret = request_irq(irq, dev->driver->irq_handler, sh_flags, irqname, dev); if (ret < 0) { @@ -301,7 +303,9 @@ int drm_irq_install(struct drm_device *dev) dev->irq_enabled = false; if (!drm_core_check_feature(dev, DRIVER_MODESET)) vga_client_register(dev->pdev, NULL, NULL, NULL); - free_irq(drm_dev_to_irq(dev), dev); + free_irq(irq, dev); + } else { + dev->irq = irq; } return ret; @@ -344,7 +348,7 @@ int drm_irq_uninstall(struct drm_device *dev) if (!irq_enabled) return -EINVAL; - DRM_DEBUG("irq=%d\n", drm_dev_to_irq(dev)); + DRM_DEBUG("irq=%d\n", dev->irq); if (!drm_core_check_feature(dev, DRIVER_MODESET)) vga_client_register(dev->pdev, NULL, NULL, NULL); @@ -352,7 +356,7 @@ int drm_irq_uninstall(struct drm_device *dev) if (dev->driver->irq_uninstall) dev->driver->irq_uninstall(dev); - free_irq(drm_dev_to_irq(dev), dev); + free_irq(dev->irq, dev); return 0; } diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 85682d959c7e..8e8c392d6fa8 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1107,6 +1107,8 @@ struct drm_device { /** \name Context support */ /*@{ */ bool irq_enabled; /**< True if irq handler is enabled */ + int irq; + __volatile__ long context_flag; /**< Context swapping flag */ int last_context; /**< Last current context */ /*@} */ -- cgit v1.2.3-71-gd317 From bb0f1b5c1695b4399cfd2359c114ae63edbb3ad8 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 3 Nov 2013 21:09:27 +0100 Subject: drm: pass the irq explicitly to drm_irq_install Unfortunately this requires a drm-wide change, and I didn't see a sane way around that. Luckily it's fairly simple, we just need to inline the respective get_irq implementation from either drm_pci.c or drm_platform.c. With that we can now also remove drm_dev_to_irq from drm_irq.c. Reviewed-by: Thierry Reding Reviewed-by: Laurent Pinchart Signed-off-by: Daniel Vetter --- Documentation/DocBook/drm.tmpl | 10 +--------- drivers/gpu/drm/armada/armada_drv.c | 2 +- drivers/gpu/drm/drm_irq.c | 13 +++---------- drivers/gpu/drm/gma500/psb_drv.c | 2 +- drivers/gpu/drm/i915/i915_dma.c | 2 +- drivers/gpu/drm/i915/i915_drv.c | 4 ++-- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/msm/msm_drv.c | 2 +- drivers/gpu/drm/qxl/qxl_irq.c | 2 +- drivers/gpu/drm/radeon/radeon_irq_kms.c | 2 +- drivers/gpu/drm/shmobile/shmob_drm_drv.c | 2 +- drivers/gpu/drm/tilcdc/tilcdc_drv.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- include/drm/drmP.h | 2 +- 14 files changed, 17 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index 677a02553ec0..83dd0b043c28 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -341,14 +341,6 @@ char *date; Managed IRQ Registration - - Both the drm_irq_install and - drm_irq_uninstall functions get the device IRQ by - calling drm_dev_to_irq. This inline function will - call a bus-specific operation to retrieve the IRQ number. For platform - devices, platform_get_irq(..., 0) is used to - retrieve the IRQ number. - drm_irq_install starts by calling the irq_preinstall driver operation. The operation @@ -356,7 +348,7 @@ char *date; clearing all pending interrupt flags or disabling the interrupt. - The IRQ will then be requested by a call to + The passed-in IRQ will then be requested by a call to request_irq. If the DRIVER_IRQ_SHARED driver feature flag is set, a shared (IRQF_SHARED) IRQ handler will be requested. diff --git a/drivers/gpu/drm/armada/armada_drv.c b/drivers/gpu/drm/armada/armada_drv.c index 32982da82694..567cfbde0883 100644 --- a/drivers/gpu/drm/armada/armada_drv.c +++ b/drivers/gpu/drm/armada/armada_drv.c @@ -173,7 +173,7 @@ static int armada_drm_load(struct drm_device *dev, unsigned long flags) if (ret) goto err_kms; - ret = drm_irq_install(dev); + ret = drm_irq_install(dev, platform_get_irq(dev->platformdev, 0)); if (ret) goto err_kms; diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index cbf6f259bfe4..de38bc9b6581 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -233,11 +233,6 @@ static void drm_irq_vgaarb_nokms(void *cookie, bool state) } } -static inline int drm_dev_to_irq(struct drm_device *dev) -{ - return dev->driver->bus->get_irq(dev); -} - /** * Install IRQ handler. * @@ -247,14 +242,12 @@ static inline int drm_dev_to_irq(struct drm_device *dev) * \c irq_preinstall() and \c irq_postinstall() functions * before and after the installation. */ -int drm_irq_install(struct drm_device *dev) +int drm_irq_install(struct drm_device *dev, int irq) { - int ret, irq; + int ret; unsigned long sh_flags = 0; char *irqname; - irq = drm_dev_to_irq(dev); - if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ)) return -EINVAL; @@ -399,7 +392,7 @@ int drm_control(struct drm_device *dev, void *data, ctl->irq != irq) return -EINVAL; mutex_lock(&dev->struct_mutex); - ret = drm_irq_install(dev); + ret = drm_irq_install(dev, irq); mutex_unlock(&dev->struct_mutex); return ret; diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c index b686e56646eb..0a3101a3db19 100644 --- a/drivers/gpu/drm/gma500/psb_drv.c +++ b/drivers/gpu/drm/gma500/psb_drv.c @@ -354,7 +354,7 @@ static int psb_driver_load(struct drm_device *dev, unsigned long flags) PSB_WVDC32(0xFFFFFFFF, PSB_INT_MASK_R); spin_unlock_irqrestore(&dev_priv->irqmask_lock, irqflags); - drm_irq_install(dev); + drm_irq_install(dev, dev->pdev->irq); dev->vblank_disable_allowed = true; dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */ diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 283ff06001bc..42de2808e53d 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1327,7 +1327,7 @@ static int i915_load_modeset_init(struct drm_device *dev) intel_power_domains_init_hw(dev_priv); - ret = drm_irq_install(dev); + ret = drm_irq_install(dev, dev->pdev->irq); if (ret) goto cleanup_gem_stolen; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 87ce105910f2..6124b491a19e 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -574,7 +574,7 @@ static int __i915_drm_thaw(struct drm_device *dev, bool restore_gtt_mappings) mutex_unlock(&dev->struct_mutex); /* We need working interrupts for modeset enabling ... */ - drm_irq_install(dev); + drm_irq_install(dev, dev->pdev->irq); intel_modeset_init_hw(dev); @@ -752,7 +752,7 @@ int i915_reset(struct drm_device *dev) * being false when they shouldn't be able to. */ drm_irq_uninstall(dev); - drm_irq_install(dev); + drm_irq_install(dev, dev->pdev->irq); /* rps/rc6 re-init is necessary to restore state lost after the * reset and the re-install of drm irq. Skip for ironlake per diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d2cc19a7023f..a94938f08698 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4523,7 +4523,7 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, BUG_ON(!list_empty(&dev_priv->gtt.base.active_list)); - ret = drm_irq_install(dev); + ret = drm_irq_install(dev, dev->pdev->irq); if (ret) goto cleanup_ringbuffer; mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index f9de156b9e65..50ec1bed5820 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -288,7 +288,7 @@ static int msm_load(struct drm_device *dev, unsigned long flags) } pm_runtime_get_sync(dev->dev); - ret = drm_irq_install(dev); + ret = drm_irq_install(dev, platform_get_irq(dev->platformdev, 0)); pm_runtime_put_sync(dev->dev); if (ret < 0) { dev_err(dev->dev, "failed to install IRQ handler\n"); diff --git a/drivers/gpu/drm/qxl/qxl_irq.c b/drivers/gpu/drm/qxl/qxl_irq.c index 28f84b4fce32..34d6a85e9023 100644 --- a/drivers/gpu/drm/qxl/qxl_irq.c +++ b/drivers/gpu/drm/qxl/qxl_irq.c @@ -87,7 +87,7 @@ int qxl_irq_init(struct qxl_device *qdev) atomic_set(&qdev->irq_received_cursor, 0); atomic_set(&qdev->irq_received_io_cmd, 0); qdev->irq_received_error = 0; - ret = drm_irq_install(qdev->ddev); + ret = drm_irq_install(qdev->ddev, qdev->ddev->pdev->irq); qdev->ram_header->int_mask = QXL_INTERRUPT_MASK; if (unlikely(ret != 0)) { DRM_ERROR("Failed installing irq: %d\n", ret); diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 089c9ffb0aa9..16807afab362 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -287,7 +287,7 @@ int radeon_irq_kms_init(struct radeon_device *rdev) INIT_WORK(&rdev->reset_work, radeon_irq_reset_work_func); rdev->irq.installed = true; - r = drm_irq_install(rdev->ddev); + r = drm_irq_install(rdev->ddev, rdev->ddev->pdev->irq); if (r) { rdev->irq.installed = false; flush_work(&rdev->hotplug_work); diff --git a/drivers/gpu/drm/shmobile/shmob_drm_drv.c b/drivers/gpu/drm/shmobile/shmob_drm_drv.c index c839c9c89efb..82c84c7fd4f6 100644 --- a/drivers/gpu/drm/shmobile/shmob_drm_drv.c +++ b/drivers/gpu/drm/shmobile/shmob_drm_drv.c @@ -185,7 +185,7 @@ static int shmob_drm_load(struct drm_device *dev, unsigned long flags) goto done; } - ret = drm_irq_install(dev); + ret = drm_irq_install(dev, platform_get_irq(dev->platformdev, 0)); if (ret < 0) { dev_err(&pdev->dev, "failed to install IRQ handler\n"); goto done; diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c index 171a8203892c..b20b69488dc9 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c @@ -268,7 +268,7 @@ static int tilcdc_load(struct drm_device *dev, unsigned long flags) } pm_runtime_get_sync(dev->dev); - ret = drm_irq_install(dev); + ret = drm_irq_install(dev, platform_get_irq(dev->platformdev, 0)); pm_runtime_put_sync(dev->dev); if (ret < 0) { dev_err(dev->dev, "failed to install IRQ handler\n"); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 4a223bbea3b3..6bdd15eea7e8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -806,7 +806,7 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) } if (dev_priv->capabilities & SVGA_CAP_IRQMASK) { - ret = drm_irq_install(dev); + ret = drm_irq_install(dev, dev->pdev->irq); if (ret != 0) { DRM_ERROR("Failed installing irq: %d\n", ret); goto out_no_irq; diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 8e8c392d6fa8..7a7cfe88b9bf 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1353,7 +1353,7 @@ extern void drm_core_reclaim_buffers(struct drm_device *dev, /* IRQ support (drm_irq.h) */ extern int drm_control(struct drm_device *dev, void *data, struct drm_file *file_priv); -extern int drm_irq_install(struct drm_device *dev); +extern int drm_irq_install(struct drm_device *dev, int irq); extern int drm_irq_uninstall(struct drm_device *dev); extern int drm_vblank_init(struct drm_device *dev, int num_crtcs); -- cgit v1.2.3-71-gd317 From b2a21aa25a39837d06eb24a7f0fef1733f9843eb Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 3 Nov 2013 21:13:09 +0100 Subject: drm: remove bus->get_irq implementations Now that they're all unused we can get rid of them, including the dummy version in drm_usb.c. Reviewed-by: Thierry Reding Reviewed-by: Laurent Pinchart Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_pci.c | 6 ------ drivers/gpu/drm/drm_platform.c | 6 ------ drivers/gpu/drm/drm_usb.c | 6 ------ include/drm/drmP.h | 1 - 4 files changed, 19 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c index 08c76af920d6..eff29e311f70 100644 --- a/drivers/gpu/drm/drm_pci.c +++ b/drivers/gpu/drm/drm_pci.c @@ -137,11 +137,6 @@ static int drm_get_pci_domain(struct drm_device *dev) return pci_domain_nr(dev->pdev->bus); } -static int drm_pci_get_irq(struct drm_device *dev) -{ - return dev->pdev->irq; -} - static const char *drm_pci_get_name(struct drm_device *dev) { struct pci_driver *pdriver = dev->driver->kdriver.pci; @@ -317,7 +312,6 @@ void drm_pci_agp_destroy(struct drm_device *dev) } static struct drm_bus drm_pci_bus = { - .get_irq = drm_pci_get_irq, .get_name = drm_pci_get_name, .set_busid = drm_pci_set_busid, .set_unique = drm_pci_set_unique, diff --git a/drivers/gpu/drm/drm_platform.c b/drivers/gpu/drm/drm_platform.c index 5cd8c695824f..5b918212b1c3 100644 --- a/drivers/gpu/drm/drm_platform.c +++ b/drivers/gpu/drm/drm_platform.c @@ -68,11 +68,6 @@ err_free: return ret; } -static int drm_platform_get_irq(struct drm_device *dev) -{ - return platform_get_irq(dev->platformdev, 0); -} - static const char *drm_platform_get_name(struct drm_device *dev) { return dev->platformdev->name; @@ -123,7 +118,6 @@ err: } static struct drm_bus drm_platform_bus = { - .get_irq = drm_platform_get_irq, .get_name = drm_platform_get_name, .set_busid = drm_platform_set_busid, }; diff --git a/drivers/gpu/drm/drm_usb.c b/drivers/gpu/drm/drm_usb.c index ed60f887c805..abdc265c9cc8 100644 --- a/drivers/gpu/drm/drm_usb.c +++ b/drivers/gpu/drm/drm_usb.c @@ -36,11 +36,6 @@ err_free: } EXPORT_SYMBOL(drm_get_usb_dev); -static int drm_usb_get_irq(struct drm_device *dev) -{ - return 0; -} - static const char *drm_usb_get_name(struct drm_device *dev) { return "USB"; @@ -53,7 +48,6 @@ static int drm_usb_set_busid(struct drm_device *dev, } static struct drm_bus drm_usb_bus = { - .get_irq = drm_usb_get_irq, .get_name = drm_usb_get_name, .set_busid = drm_usb_set_busid, }; diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 7a7cfe88b9bf..f4d0eaa3da46 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -726,7 +726,6 @@ struct drm_master { #define DRM_SCANOUTPOS_ACCURATE (1 << 2) struct drm_bus { - int (*get_irq)(struct drm_device *dev); const char *(*get_name)(struct drm_device *dev); int (*set_busid)(struct drm_device *dev, struct drm_master *master); int (*set_unique)(struct drm_device *dev, struct drm_master *master, -- cgit v1.2.3-71-gd317 From 53bf2a2bca9b56e23fa8862159c75868672d7f1e Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 3 Nov 2013 21:47:18 +0100 Subject: drm: inline drm_pci_set_unique This is only used for drm versions 1.0, and kms drivers have never been there. So we can appropriately restrict this to legacy and hence pci devices and inline everything. v2: Make the dummy function actually return something, caught by Wu Fengguang's 0-day tester. v3: Fix spelling in comment (Thierry) Reviewed-by: Thierry Reding Reviewed-by: Laurent Pinchart Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_ioctl.c | 10 +++++++--- drivers/gpu/drm/drm_pci.c | 14 ++++++++++---- include/drm/drmP.h | 5 +++-- 3 files changed, 20 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 93a42040bedb..53560ef53f99 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -93,7 +93,8 @@ drm_unset_busid(struct drm_device *dev, * Copies the bus id from userspace into drm_device::unique, and verifies that * it matches the device this DRM is attached to (EINVAL otherwise). Deprecated * in interface version 1.1 and will return EBUSY when setversion has requested - * version 1.1 or greater. + * version 1.1 or greater. Also note that KMS is all version 1.1 and later and + * UMS was only ever supported on pci devices. */ int drm_setunique(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -108,10 +109,13 @@ int drm_setunique(struct drm_device *dev, void *data, if (!u->unique_len || u->unique_len > 1024) return -EINVAL; - if (!dev->driver->bus->set_unique) + if (drm_core_check_feature(dev, DRIVER_MODESET)) + return 0; + + if (WARN_ON(!dev->pdev)) return -EINVAL; - ret = dev->driver->bus->set_unique(dev, master, u); + ret = drm_pci_set_unique(dev, master, u); if (ret) goto err; diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c index eff29e311f70..c550b349f202 100644 --- a/drivers/gpu/drm/drm_pci.c +++ b/drivers/gpu/drm/drm_pci.c @@ -185,9 +185,9 @@ err: return ret; } -static int drm_pci_set_unique(struct drm_device *dev, - struct drm_master *master, - struct drm_unique *u) +int drm_pci_set_unique(struct drm_device *dev, + struct drm_master *master, + struct drm_unique *u) { int domain, bus, slot, func, ret; const char *bus_name; @@ -314,7 +314,6 @@ void drm_pci_agp_destroy(struct drm_device *dev) static struct drm_bus drm_pci_bus = { .get_name = drm_pci_get_name, .set_busid = drm_pci_set_busid, - .set_unique = drm_pci_set_unique, }; /** @@ -481,6 +480,13 @@ int drm_irq_by_busid(struct drm_device *dev, void *data, { return -EINVAL; } + +int drm_pci_set_unique(struct drm_device *dev, + struct drm_master *master, + struct drm_unique *u) +{ + return -EINVAL; +} #endif EXPORT_SYMBOL(drm_pci_init); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index f4d0eaa3da46..1a9d509bef66 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -728,8 +728,6 @@ struct drm_master { struct drm_bus { const char *(*get_name)(struct drm_device *dev); int (*set_busid)(struct drm_device *dev, struct drm_master *master); - int (*set_unique)(struct drm_device *dev, struct drm_master *master, - struct drm_unique *unique); }; /** @@ -1511,6 +1509,9 @@ extern drm_dma_handle_t *drm_pci_alloc(struct drm_device *dev, size_t size, size_t align); extern void __drm_pci_free(struct drm_device *dev, drm_dma_handle_t * dmah); extern void drm_pci_free(struct drm_device *dev, drm_dma_handle_t * dmah); +extern int drm_pci_set_unique(struct drm_device *dev, + struct drm_master *master, + struct drm_unique *u); /* sysfs support (drm_sysfs.c) */ struct drm_sysfs_class; -- cgit v1.2.3-71-gd317 From 5829d1834e5486e83547f36576b160023c9609c2 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 3 Nov 2013 21:48:48 +0100 Subject: drm: rip out dev->devname This was only ever used to pretty-print the irq driver name. And on kms systems due to set_version bonghits we never set up the prettier name, ever. Which make this a bit pointless. Also, we can always dig out the driver-instance/irq relationship through other means, so this isn't that useful. So just rip it out to simplify the set_version/set_busid insanity a bit. Also delete the temporary busname from drm_pci_set_busid, it's now unused. v2: Rebase on top of the new host1x drm_bus for tegra. Reviewed-by: Thierry Reding Reviewed-by: Laurent Pinchart Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_ioctl.c | 3 --- drivers/gpu/drm/drm_irq.c | 8 +------- drivers/gpu/drm/drm_pci.c | 25 ------------------------- drivers/gpu/drm/drm_platform.c | 11 ----------- drivers/gpu/drm/drm_stub.c | 5 ----- drivers/gpu/drm/tegra/bus.c | 10 ---------- include/drm/drmP.h | 1 - 7 files changed, 1 insertion(+), 62 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 53560ef53f99..38269d5aa333 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -72,9 +72,6 @@ static void drm_unset_busid(struct drm_device *dev, struct drm_master *master) { - kfree(dev->devname); - dev->devname = NULL; - kfree(master->unique); master->unique = NULL; master->unique_len = 0; diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index de38bc9b6581..7583767ec619 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -246,7 +246,6 @@ int drm_irq_install(struct drm_device *dev, int irq) { int ret; unsigned long sh_flags = 0; - char *irqname; if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ)) return -EINVAL; @@ -272,13 +271,8 @@ int drm_irq_install(struct drm_device *dev, int irq) if (drm_core_check_feature(dev, DRIVER_IRQ_SHARED)) sh_flags = IRQF_SHARED; - if (dev->devname) - irqname = dev->devname; - else - irqname = dev->driver->name; - ret = request_irq(irq, dev->driver->irq_handler, - sh_flags, irqname, dev); + sh_flags, dev->driver->name, dev); if (ret < 0) { dev->irq_enabled = false; diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c index c550b349f202..047c51046d94 100644 --- a/drivers/gpu/drm/drm_pci.c +++ b/drivers/gpu/drm/drm_pci.c @@ -146,7 +146,6 @@ static const char *drm_pci_get_name(struct drm_device *dev) static int drm_pci_set_busid(struct drm_device *dev, struct drm_master *master) { int len, ret; - struct pci_driver *pdriver = dev->driver->kdriver.pci; master->unique_len = 40; master->unique_size = master->unique_len; master->unique = kmalloc(master->unique_size, GFP_KERNEL); @@ -168,18 +167,6 @@ static int drm_pci_set_busid(struct drm_device *dev, struct drm_master *master) } else master->unique_len = len; - dev->devname = - kmalloc(strlen(pdriver->name) + - master->unique_len + 2, GFP_KERNEL); - - if (dev->devname == NULL) { - ret = -ENOMEM; - goto err; - } - - sprintf(dev->devname, "%s@%s", pdriver->name, - master->unique); - return 0; err: return ret; @@ -190,7 +177,6 @@ int drm_pci_set_unique(struct drm_device *dev, struct drm_unique *u) { int domain, bus, slot, func, ret; - const char *bus_name; master->unique_len = u->unique_len; master->unique_size = u->unique_len + 1; @@ -207,17 +193,6 @@ int drm_pci_set_unique(struct drm_device *dev, master->unique[master->unique_len] = '\0'; - bus_name = dev->driver->bus->get_name(dev); - dev->devname = kmalloc(strlen(bus_name) + - strlen(master->unique) + 2, GFP_KERNEL); - if (!dev->devname) { - ret = -ENOMEM; - goto err; - } - - sprintf(dev->devname, "%s@%s", bus_name, - master->unique); - /* Return error if the busid submitted doesn't match the device's actual * busid. */ diff --git a/drivers/gpu/drm/drm_platform.c b/drivers/gpu/drm/drm_platform.c index 5b918212b1c3..b97b11ea2dc4 100644 --- a/drivers/gpu/drm/drm_platform.c +++ b/drivers/gpu/drm/drm_platform.c @@ -101,17 +101,6 @@ static int drm_platform_set_busid(struct drm_device *dev, struct drm_master *mas goto err; } - dev->devname = - kmalloc(strlen(dev->platformdev->name) + - master->unique_len + 2, GFP_KERNEL); - - if (dev->devname == NULL) { - ret = -ENOMEM; - goto err; - } - - sprintf(dev->devname, "%s@%s", dev->platformdev->name, - master->unique); return 0; err: return ret; diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c index 5394b201c3d0..3a8e832ad151 100644 --- a/drivers/gpu/drm/drm_stub.c +++ b/drivers/gpu/drm/drm_stub.c @@ -166,9 +166,6 @@ static void drm_master_destroy(struct kref *kref) master->unique_len = 0; } - kfree(dev->devname); - dev->devname = NULL; - list_for_each_entry_safe(pt, next, &master->magicfree, head) { list_del(&pt->head); drm_ht_remove_item(&master->magiclist, &pt->hash_item); @@ -648,8 +645,6 @@ static void drm_dev_release(struct kref *ref) drm_minor_free(dev, DRM_MINOR_RENDER); drm_minor_free(dev, DRM_MINOR_CONTROL); - kfree(dev->devname); - mutex_destroy(&dev->master_mutex); kfree(dev); } diff --git a/drivers/gpu/drm/tegra/bus.c b/drivers/gpu/drm/tegra/bus.c index 6916fa51cfa4..b3a66d65cb53 100644 --- a/drivers/gpu/drm/tegra/bus.c +++ b/drivers/gpu/drm/tegra/bus.c @@ -12,9 +12,7 @@ static int drm_host1x_set_busid(struct drm_device *dev, struct drm_master *master) { const char *device = dev_name(dev->dev); - const char *driver = dev->driver->name; const char *bus = dev->dev->bus->name; - int length; master->unique_len = strlen(bus) + 1 + strlen(device); master->unique_size = master->unique_len; @@ -25,14 +23,6 @@ static int drm_host1x_set_busid(struct drm_device *dev, snprintf(master->unique, master->unique_len + 1, "%s:%s", bus, device); - length = strlen(driver) + 1 + master->unique_len; - - dev->devname = kmalloc(length + 1, GFP_KERNEL); - if (!dev->devname) - return -ENOMEM; - - snprintf(dev->devname, length + 1, "%s@%s", driver, master->unique); - return 0; } diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 1a9d509bef66..56b028d6bc98 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1048,7 +1048,6 @@ struct drm_vblank_crtc { */ struct drm_device { struct list_head legacy_dev_list;/**< list of devices per driver for stealth attach cleanup */ - char *devname; /**< For /proc/interrupts */ int if_version; /**< Highest interface version set */ /** \name Lifetime Management */ -- cgit v1.2.3-71-gd317 From 9de1b51f1fae6476155350a0670dc637c762e718 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 3 Nov 2013 22:24:37 +0100 Subject: drm: remove drm_bus->get_name The only user is the info debugfs file, so we only need something human readable. Now for both pci and platform devices we've used the name of the underlying device driver, which matches the name of the drm driver in all cases. So we can just use that instead. The exception is usb, which used a generic "USB". Not to harmful with just one usb driver, but better to use "udl", too. With that converted we can rip out all the ->get_name implementations. Reviewed-by: Thierry Reding Reviewed-by: Laurent Pinchart Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_info.c | 6 ++---- drivers/gpu/drm/drm_pci.c | 7 ------- drivers/gpu/drm/drm_platform.c | 6 ------ drivers/gpu/drm/drm_usb.c | 6 ------ include/drm/drmP.h | 1 - 5 files changed, 2 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_info.c b/drivers/gpu/drm/drm_info.c index 7473035dd28b..86feedd5e6f6 100644 --- a/drivers/gpu/drm/drm_info.c +++ b/drivers/gpu/drm/drm_info.c @@ -47,18 +47,16 @@ int drm_name_info(struct seq_file *m, void *data) struct drm_minor *minor = node->minor; struct drm_device *dev = minor->dev; struct drm_master *master = minor->master; - const char *bus_name; if (!master) return 0; - bus_name = dev->driver->bus->get_name(dev); if (master->unique) { seq_printf(m, "%s %s %s\n", - bus_name, + dev->driver->name, dev_name(dev->dev), master->unique); } else { seq_printf(m, "%s %s\n", - bus_name, dev_name(dev->dev)); + dev->driver->name, dev_name(dev->dev)); } return 0; } diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c index 047c51046d94..1fbe22a254e1 100644 --- a/drivers/gpu/drm/drm_pci.c +++ b/drivers/gpu/drm/drm_pci.c @@ -137,12 +137,6 @@ static int drm_get_pci_domain(struct drm_device *dev) return pci_domain_nr(dev->pdev->bus); } -static const char *drm_pci_get_name(struct drm_device *dev) -{ - struct pci_driver *pdriver = dev->driver->kdriver.pci; - return pdriver->name; -} - static int drm_pci_set_busid(struct drm_device *dev, struct drm_master *master) { int len, ret; @@ -287,7 +281,6 @@ void drm_pci_agp_destroy(struct drm_device *dev) } static struct drm_bus drm_pci_bus = { - .get_name = drm_pci_get_name, .set_busid = drm_pci_set_busid, }; diff --git a/drivers/gpu/drm/drm_platform.c b/drivers/gpu/drm/drm_platform.c index b97b11ea2dc4..c7ec27bbe7c6 100644 --- a/drivers/gpu/drm/drm_platform.c +++ b/drivers/gpu/drm/drm_platform.c @@ -68,11 +68,6 @@ err_free: return ret; } -static const char *drm_platform_get_name(struct drm_device *dev) -{ - return dev->platformdev->name; -} - static int drm_platform_set_busid(struct drm_device *dev, struct drm_master *master) { int len, ret, id; @@ -107,7 +102,6 @@ err: } static struct drm_bus drm_platform_bus = { - .get_name = drm_platform_get_name, .set_busid = drm_platform_set_busid, }; diff --git a/drivers/gpu/drm/drm_usb.c b/drivers/gpu/drm/drm_usb.c index abdc265c9cc8..fae4aa4e1426 100644 --- a/drivers/gpu/drm/drm_usb.c +++ b/drivers/gpu/drm/drm_usb.c @@ -36,11 +36,6 @@ err_free: } EXPORT_SYMBOL(drm_get_usb_dev); -static const char *drm_usb_get_name(struct drm_device *dev) -{ - return "USB"; -} - static int drm_usb_set_busid(struct drm_device *dev, struct drm_master *master) { @@ -48,7 +43,6 @@ static int drm_usb_set_busid(struct drm_device *dev, } static struct drm_bus drm_usb_bus = { - .get_name = drm_usb_get_name, .set_busid = drm_usb_set_busid, }; diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 56b028d6bc98..493bbbb300e6 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -726,7 +726,6 @@ struct drm_master { #define DRM_SCANOUTPOS_ACCURATE (1 << 2) struct drm_bus { - const char *(*get_name)(struct drm_device *dev); int (*set_busid)(struct drm_device *dev, struct drm_master *master); }; -- cgit v1.2.3-71-gd317 From f93227759d9bd3b299c288a6bd448161f849cdfd Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 3 Nov 2013 22:32:41 +0100 Subject: drm: Remove dev->kdriver With the last patch to ditch the ->get_name callbacks the last user is now gone. Reviewed-by: Thierry Reding Reviewed-by: Laurent Pinchart Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_pci.c | 1 - drivers/gpu/drm/drm_platform.c | 1 - drivers/gpu/drm/drm_usb.c | 1 - include/drm/drmP.h | 5 ----- 4 files changed, 8 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c index 1fbe22a254e1..d237de36a07a 100644 --- a/drivers/gpu/drm/drm_pci.c +++ b/drivers/gpu/drm/drm_pci.c @@ -364,7 +364,6 @@ int drm_pci_init(struct drm_driver *driver, struct pci_driver *pdriver) DRM_DEBUG("\n"); - driver->kdriver.pci = pdriver; driver->bus = &drm_pci_bus; if (driver->driver_features & DRIVER_MODESET) diff --git a/drivers/gpu/drm/drm_platform.c b/drivers/gpu/drm/drm_platform.c index c7ec27bbe7c6..234e0bc1ae51 100644 --- a/drivers/gpu/drm/drm_platform.c +++ b/drivers/gpu/drm/drm_platform.c @@ -121,7 +121,6 @@ int drm_platform_init(struct drm_driver *driver, struct platform_device *platfor { DRM_DEBUG("\n"); - driver->kdriver.platform_device = platform_device; driver->bus = &drm_platform_bus; return drm_get_platform_dev(platform_device, driver); } diff --git a/drivers/gpu/drm/drm_usb.c b/drivers/gpu/drm/drm_usb.c index fae4aa4e1426..c6c7c29ad46f 100644 --- a/drivers/gpu/drm/drm_usb.c +++ b/drivers/gpu/drm/drm_usb.c @@ -51,7 +51,6 @@ int drm_usb_init(struct drm_driver *driver, struct usb_driver *udriver) int res; DRM_DEBUG("\n"); - driver->kdriver.usb = udriver; driver->bus = &drm_usb_bus; res = usb_register(udriver); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 493bbbb300e6..19daabeeffbe 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -963,11 +963,6 @@ struct drm_driver { const struct drm_ioctl_desc *ioctls; int num_ioctls; const struct file_operations *fops; - union { - struct pci_driver *pci; - struct platform_device *platform_device; - struct usb_driver *usb; - } kdriver; struct drm_bus *bus; /* List of devices hanging off this driver with stealth attach. */ -- cgit v1.2.3-71-gd317 From aa45660c6b59388fac3995a8c2998d710ef28fd4 Mon Sep 17 00:00:00 2001 From: Tomasz Bursztyka Date: Mon, 14 Apr 2014 15:41:27 +0300 Subject: netfilter: nf_tables: Make meta expression core functions public This will be useful to create network family dedicated META expression as for NFPROTO_BRIDGE for instance. Signed-off-by: Tomasz Bursztyka Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nft_meta.h | 36 +++++++++++++++++++++++++++++ net/netfilter/nft_meta.c | 50 ++++++++++++++++++++-------------------- 2 files changed, 61 insertions(+), 25 deletions(-) create mode 100644 include/net/netfilter/nft_meta.h (limited to 'include') diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h new file mode 100644 index 000000000000..0ee47c3e2e31 --- /dev/null +++ b/include/net/netfilter/nft_meta.h @@ -0,0 +1,36 @@ +#ifndef _NFT_META_H_ +#define _NFT_META_H_ + +struct nft_meta { + enum nft_meta_keys key:8; + union { + enum nft_registers dreg:8; + enum nft_registers sreg:8; + }; +}; + +extern const struct nla_policy nft_meta_policy[]; + +int nft_meta_get_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]); + +int nft_meta_set_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]); + +int nft_meta_get_dump(struct sk_buff *skb, + const struct nft_expr *expr); + +int nft_meta_set_dump(struct sk_buff *skb, + const struct nft_expr *expr); + +void nft_meta_get_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt); + +void nft_meta_set_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt); + +#endif diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 6d0b8cc27f2a..852b178c6ae7 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -18,18 +18,11 @@ #include #include /* for TCP_TIME_WAIT */ #include +#include -struct nft_meta { - enum nft_meta_keys key:8; - union { - enum nft_registers dreg:8; - enum nft_registers sreg:8; - }; -}; - -static void nft_meta_get_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +void nft_meta_get_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) { const struct nft_meta *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; @@ -140,10 +133,11 @@ static void nft_meta_get_eval(const struct nft_expr *expr, err: data[NFT_REG_VERDICT].verdict = NFT_BREAK; } +EXPORT_SYMBOL_GPL(nft_meta_get_eval); -static void nft_meta_set_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +void nft_meta_set_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) { const struct nft_meta *meta = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; @@ -163,16 +157,18 @@ static void nft_meta_set_eval(const struct nft_expr *expr, WARN_ON(1); } } +EXPORT_SYMBOL_GPL(nft_meta_set_eval); -static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { +const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { [NFTA_META_DREG] = { .type = NLA_U32 }, [NFTA_META_KEY] = { .type = NLA_U32 }, [NFTA_META_SREG] = { .type = NLA_U32 }, }; +EXPORT_SYMBOL_GPL(nft_meta_policy); -static int nft_meta_get_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) +int nft_meta_get_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) { struct nft_meta *priv = nft_expr_priv(expr); int err; @@ -215,10 +211,11 @@ static int nft_meta_get_init(const struct nft_ctx *ctx, return 0; } +EXPORT_SYMBOL_GPL(nft_meta_get_init); -static int nft_meta_set_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) +int nft_meta_set_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) { struct nft_meta *priv = nft_expr_priv(expr); int err; @@ -240,9 +237,10 @@ static int nft_meta_set_init(const struct nft_ctx *ctx, return 0; } +EXPORT_SYMBOL_GPL(nft_meta_set_init); -static int nft_meta_get_dump(struct sk_buff *skb, - const struct nft_expr *expr) +int nft_meta_get_dump(struct sk_buff *skb, + const struct nft_expr *expr) { const struct nft_meta *priv = nft_expr_priv(expr); @@ -255,9 +253,10 @@ static int nft_meta_get_dump(struct sk_buff *skb, nla_put_failure: return -1; } +EXPORT_SYMBOL_GPL(nft_meta_get_dump); -static int nft_meta_set_dump(struct sk_buff *skb, - const struct nft_expr *expr) +int nft_meta_set_dump(struct sk_buff *skb, + const struct nft_expr *expr) { const struct nft_meta *priv = nft_expr_priv(expr); @@ -271,6 +270,7 @@ static int nft_meta_set_dump(struct sk_buff *skb, nla_put_failure: return -1; } +EXPORT_SYMBOL_GPL(nft_meta_set_dump); static struct nft_expr_type nft_meta_type; static const struct nft_expr_ops nft_meta_get_ops = { -- cgit v1.2.3-71-gd317 From f392e51cd6ae6f6ee5b9b6d611cdc282b4c1711e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Apr 2014 11:13:14 -0400 Subject: cgroup: update cgroup->subsys_mask to ->child_subsys_mask and restore cgroup_root->subsys_mask 944196278d3d ("cgroup: move ->subsys_mask from cgroupfs_root to cgroup") moved ->subsys_mask from cgroup_root to cgroup to prepare for the unified hierarhcy; however, it turns out that carrying the subsys_mask of the children in the parent, instead of itself, is a lot more natural. This patch restores cgroup_root->subsys_mask and morphs cgroup->subsys_mask into cgroup->child_subsys_mask. * Uses of root->cgrp.subsys_mask are restored to root->subsys_mask. * Remove automatic setting and clearing of cgrp->subsys_mask and instead just inherit ->child_subsys_mask from the parent during cgroup creation. Note that this doesn't affect any current behaviors. * Undo __kill_css() separation. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 7 ++++-- kernel/cgroup.c | 64 +++++++++++++++++++++----------------------------- 2 files changed, 32 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c2515851c1aa..1b5b2fe1b228 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -173,8 +173,8 @@ struct cgroup { */ u64 serial_nr; - /* The bitmask of subsystems attached to this cgroup */ - unsigned long subsys_mask; + /* the bitmask of subsystems enabled on the child cgroups */ + unsigned long child_subsys_mask; /* Private pointers for each registered subsystem */ struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; @@ -282,6 +282,9 @@ enum { struct cgroup_root { struct kernfs_root *kf_root; + /* The bitmask of subsystems attached to this hierarchy */ + unsigned long subsys_mask; + /* Unique id for this hierarchy. */ int hierarchy_id; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a6894272353b..f944619077f4 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -529,7 +529,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset, * won't change, so no need for locking. */ for_each_subsys(ss, i) { - if (root->cgrp.subsys_mask & (1UL << i)) { + if (root->subsys_mask & (1UL << i)) { /* Subsystem is in this hierarchy. So we want * the subsystem state from the new * cgroup */ @@ -742,7 +742,7 @@ static void cgroup_destroy_root(struct cgroup_root *root) BUG_ON(!list_empty(&cgrp->children)); /* Rebind all subsystems back to the default hierarchy */ - rebind_subsystems(&cgrp_dfl_root, cgrp->subsys_mask); + rebind_subsystems(&cgrp_dfl_root, root->subsys_mask); /* * Release all the links from cset_links to this hierarchy's @@ -1050,8 +1050,11 @@ static int rebind_subsystems(struct cgroup_root *dst_root, ss->root = dst_root; css->cgroup = &dst_root->cgrp; - src_root->cgrp.subsys_mask &= ~(1 << ssid); - dst_root->cgrp.subsys_mask |= 1 << ssid; + src_root->subsys_mask &= ~(1 << ssid); + src_root->cgrp.child_subsys_mask &= ~(1 << ssid); + + dst_root->subsys_mask |= 1 << ssid; + dst_root->cgrp.child_subsys_mask |= 1 << ssid; if (ss->bind) ss->bind(css); @@ -1069,7 +1072,7 @@ static int cgroup_show_options(struct seq_file *seq, int ssid; for_each_subsys(ss, ssid) - if (root->cgrp.subsys_mask & (1 << ssid)) + if (root->subsys_mask & (1 << ssid)) seq_printf(seq, ",%s", ss->name); if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) seq_puts(seq, ",sane_behavior"); @@ -1273,12 +1276,12 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) if (ret) goto out_unlock; - if (opts.subsys_mask != root->cgrp.subsys_mask || opts.release_agent) + if (opts.subsys_mask != root->subsys_mask || opts.release_agent) pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n", task_tgid_nr(current), current->comm); - added_mask = opts.subsys_mask & ~root->cgrp.subsys_mask; - removed_mask = root->cgrp.subsys_mask & ~opts.subsys_mask; + added_mask = opts.subsys_mask & ~root->subsys_mask; + removed_mask = root->subsys_mask & ~opts.subsys_mask; /* Don't allow flags or name to change at remount */ if (((opts.flags ^ root->flags) & CGRP_ROOT_OPTION_MASK) || @@ -1535,7 +1538,7 @@ retry: * subsystems) then they must match. */ if ((opts.subsys_mask || opts.none) && - (opts.subsys_mask != root->cgrp.subsys_mask)) { + (opts.subsys_mask != root->subsys_mask)) { if (!name_match) continue; ret = -EBUSY; @@ -3658,8 +3661,6 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss) cgroup_get(cgrp); css_get(css->parent); - cgrp->subsys_mask |= 1 << ss->id; - if (ss->broken_hierarchy && !ss->warned_broken_hierarchy && parent->parent) { pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n", @@ -3780,13 +3781,15 @@ static long cgroup_create(struct cgroup *parent, const char *name, /* let's create and online css's */ for_each_subsys(ss, ssid) { - if (root->cgrp.subsys_mask & (1 << ssid)) { + if (parent->child_subsys_mask & (1 << ssid)) { err = create_css(cgrp, ss); if (err) goto err_destroy; } } + cgrp->child_subsys_mask = parent->child_subsys_mask; + kernfs_activate(kn); mutex_unlock(&cgroup_mutex); @@ -3882,7 +3885,16 @@ static void css_killed_ref_fn(struct percpu_ref *ref) queue_work(cgroup_destroy_wq, &css->destroy_work); } -static void __kill_css(struct cgroup_subsys_state *css) +/** + * kill_css - destroy a css + * @css: css to destroy + * + * This function initiates destruction of @css by removing cgroup interface + * files and putting its base reference. ->css_offline() will be invoked + * asynchronously once css_tryget() is guaranteed to fail and when the + * reference count reaches zero, @css will be released. + */ +static void kill_css(struct cgroup_subsys_state *css) { lockdep_assert_held(&cgroup_tree_mutex); @@ -3911,28 +3923,6 @@ static void __kill_css(struct cgroup_subsys_state *css) percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn); } -/** - * kill_css - destroy a css - * @css: css to destroy - * - * This function initiates destruction of @css by removing cgroup interface - * files and putting its base reference. ->css_offline() will be invoked - * asynchronously once css_tryget() is guaranteed to fail and when the - * reference count reaches zero, @css will be released. - */ -static void kill_css(struct cgroup_subsys_state *css) -{ - struct cgroup *cgrp = css->cgroup; - - lockdep_assert_held(&cgroup_tree_mutex); - - /* if already killed, noop */ - if (cgrp->subsys_mask & (1 << css->ss->id)) { - cgrp->subsys_mask &= ~(1 << css->ss->id); - __kill_css(css); - } -} - /** * cgroup_destroy_locked - the first stage of cgroup destruction * @cgrp: cgroup to be destroyed @@ -4145,7 +4135,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) BUG_ON(online_css(css)); - cgrp_dfl_root.cgrp.subsys_mask |= 1 << ss->id; + cgrp_dfl_root.subsys_mask |= 1 << ss->id; mutex_unlock(&cgroup_mutex); mutex_unlock(&cgroup_tree_mutex); @@ -4302,7 +4292,7 @@ int proc_cgroup_show(struct seq_file *m, void *v) seq_printf(m, "%d:", root->hierarchy_id); for_each_subsys(ss, ssid) - if (root->cgrp.subsys_mask & (1 << ssid)) + if (root->subsys_mask & (1 << ssid)) seq_printf(m, "%s%s", count++ ? "," : "", ss->name); if (strlen(root->name)) seq_printf(m, "%sname=%s", count ? "," : "", -- cgit v1.2.3-71-gd317 From 2d8f243a5e6efa57fb7c46fe83fafa45b33d0ec2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Apr 2014 11:13:15 -0400 Subject: cgroup: implement cgroup->e_csets[] On the default unified hierarchy, a cgroup may be associated with csses of its ancestors, which means that a css of a given cgroup may be associated with css_sets of descendant cgroups. This means that we can't walk all tasks associated with a css by iterating the css_sets associated with the cgroup as there are css_sets which are pointing to the css but linked on the descendants. This patch adds per-subsystem list heads cgroup->e_csets[]. Any css_set which is pointing to a css is linked to css->cgroup->e_csets[$SUBSYS_ID] through css_set->e_cset_node[$SUBSYS_ID]. The lists are protected by css_set_rwsem and will allow us to walk all css_sets associated with a given css so that we can find out all associated tasks. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 18 ++++++++++++++++++ kernel/cgroup.c | 30 ++++++++++++++++++++++++++++-- 2 files changed, 46 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 1b5b2fe1b228..33a0043ef454 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -187,6 +187,15 @@ struct cgroup { */ struct list_head cset_links; + /* + * On the default hierarchy, a css_set for a cgroup with some + * susbsys disabled will point to css's which are associated with + * the closest ancestor which has the subsys enabled. The + * following lists all css_sets which point to this cgroup's css + * for the given subsystem. + */ + struct list_head e_csets[CGROUP_SUBSYS_COUNT]; + /* * Linked list running through all cgroups that can * potentially be reaped by the release agent. Protected by @@ -369,6 +378,15 @@ struct css_set { struct cgroup *mg_src_cgrp; struct css_set *mg_dst_cset; + /* + * On the default hierarhcy, ->subsys[ssid] may point to a css + * attached to an ancestor instead of the cgroup this css_set is + * associated with. The following node is anchored at + * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to + * iterate through all css's attached to a given cgroup. + */ + struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; + /* For RCU-protected deletion */ struct rcu_head rcu_head; }; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4eb2dd1bb5b1..37d966289978 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -425,6 +425,8 @@ static unsigned long css_set_hash(struct cgroup_subsys_state *css[]) static void put_css_set_locked(struct css_set *cset, bool taskexit) { struct cgrp_cset_link *link, *tmp_link; + struct cgroup_subsys *ss; + int ssid; lockdep_assert_held(&css_set_rwsem); @@ -432,6 +434,8 @@ static void put_css_set_locked(struct css_set *cset, bool taskexit) return; /* This css_set is dead. unlink it and release cgroup refcounts */ + for_each_subsys(ss, ssid) + list_del(&cset->e_cset_node[ssid]); hash_del(&cset->hlist); css_set_count--; @@ -673,7 +677,9 @@ static struct css_set *find_css_set(struct css_set *old_cset, struct css_set *cset; struct list_head tmp_links; struct cgrp_cset_link *link; + struct cgroup_subsys *ss; unsigned long key; + int ssid; lockdep_assert_held(&cgroup_mutex); @@ -724,10 +730,14 @@ static struct css_set *find_css_set(struct css_set *old_cset, css_set_count++; - /* Add this cgroup group to the hash table */ + /* Add @cset to the hash table */ key = css_set_hash(cset->subsys); hash_add(css_set_table, &cset->hlist, key); + for_each_subsys(ss, ssid) + list_add_tail(&cset->e_cset_node[ssid], + &cset->subsys[ssid]->cgroup->e_csets[ssid]); + up_write(&css_set_rwsem); return cset; @@ -1028,7 +1038,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root, unsigned long ss_mask) { struct cgroup_subsys *ss; - int ssid, ret; + int ssid, i, ret; lockdep_assert_held(&cgroup_tree_mutex); lockdep_assert_held(&cgroup_mutex); @@ -1081,6 +1091,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root, for_each_subsys(ss, ssid) { struct cgroup_root *src_root; struct cgroup_subsys_state *css; + struct css_set *cset; if (!(ss_mask & (1 << ssid))) continue; @@ -1095,6 +1106,12 @@ static int rebind_subsystems(struct cgroup_root *dst_root, ss->root = dst_root; css->cgroup = &dst_root->cgrp; + down_write(&css_set_rwsem); + hash_for_each(css_set_table, i, cset, hlist) + list_move_tail(&cset->e_cset_node[ss->id], + &dst_root->cgrp.e_csets[ss->id]); + up_write(&css_set_rwsem); + src_root->subsys_mask &= ~(1 << ssid); src_root->cgrp.child_subsys_mask &= ~(1 << ssid); @@ -1417,6 +1434,9 @@ out_unlock: static void init_cgroup_housekeeping(struct cgroup *cgrp) { + struct cgroup_subsys *ss; + int ssid; + atomic_set(&cgrp->refcnt, 1); INIT_LIST_HEAD(&cgrp->sibling); INIT_LIST_HEAD(&cgrp->children); @@ -1425,6 +1445,9 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) INIT_LIST_HEAD(&cgrp->pidlists); mutex_init(&cgrp->pidlist_mutex); cgrp->dummy_css.cgroup = cgrp; + + for_each_subsys(ss, ssid) + INIT_LIST_HEAD(&cgrp->e_csets[ssid]); } static void init_cgroup_root(struct cgroup_root *root, @@ -4249,6 +4272,9 @@ int __init cgroup_init(void) if (!ss->early_init) cgroup_init_subsys(ss); + list_add_tail(&init_css_set.e_cset_node[ssid], + &cgrp_dfl_root.cgrp.e_csets[ssid]); + /* * cftype registration needs kmalloc and can't be done * during early_init. Register base cftypes separately. -- cgit v1.2.3-71-gd317 From 0f0a2b4fa6210147131082999f1f16d7fb79abf8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Apr 2014 11:13:15 -0400 Subject: cgroup: reorganize css_task_iter This patch reorganizes css_task_iter so that adding effective css support is easier. * s/->cset_link/->cset_pos/ and s/->task/->task_pos/ for consistency * ->origin_css is used to determine whether the iteration reached the last css_set. Replace it with explicit ->cset_head so that css_advance_task_iter() doesn't have to know the termination condition directly. * css_task_iter_next() currently assumes that it's walking list of cgrp_cset_link and reaches into the current cset through the current link to determine the termination conditions for task walking. As this won't always be true for effective css walking, add ->tasks_head and ->mg_tasks_head and use them to control task walking so that css_task_iter_next() doesn't have to know how css_sets are being walked. This patch doesn't make any behavior changes. The iteration logic stays unchanged after the patch. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 9 ++++++--- kernel/cgroup.c | 33 +++++++++++++++++---------------- 2 files changed, 23 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 33a0043ef454..bee390586120 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -842,9 +842,12 @@ css_next_descendant_post(struct cgroup_subsys_state *pos, /* A css_task_iter should be treated as an opaque object */ struct css_task_iter { - struct cgroup_subsys_state *origin_css; - struct list_head *cset_link; - struct list_head *task; + struct list_head *cset_pos; + struct list_head *cset_head; + + struct list_head *task_pos; + struct list_head *tasks_head; + struct list_head *mg_tasks_head; }; void css_task_iter_start(struct cgroup_subsys_state *css, diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 0edc186cd545..d48163b26196 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2857,27 +2857,30 @@ css_next_descendant_post(struct cgroup_subsys_state *pos, */ static void css_advance_task_iter(struct css_task_iter *it) { - struct list_head *l = it->cset_link; + struct list_head *l = it->cset_pos; struct cgrp_cset_link *link; struct css_set *cset; /* Advance to the next non-empty css_set */ do { l = l->next; - if (l == &it->origin_css->cgroup->cset_links) { - it->cset_link = NULL; + if (l == it->cset_head) { + it->cset_pos = NULL; return; } link = list_entry(l, struct cgrp_cset_link, cset_link); cset = link->cset; } while (list_empty(&cset->tasks) && list_empty(&cset->mg_tasks)); - it->cset_link = l; + it->cset_pos = l; if (!list_empty(&cset->tasks)) - it->task = cset->tasks.next; + it->task_pos = cset->tasks.next; else - it->task = cset->mg_tasks.next; + it->task_pos = cset->mg_tasks.next; + + it->tasks_head = &cset->tasks; + it->mg_tasks_head = &cset->mg_tasks; } /** @@ -2903,8 +2906,8 @@ void css_task_iter_start(struct cgroup_subsys_state *css, down_read(&css_set_rwsem); - it->origin_css = css; - it->cset_link = &css->cgroup->cset_links; + it->cset_pos = &css->cgroup->cset_links; + it->cset_head = it->cset_pos; css_advance_task_iter(it); } @@ -2920,12 +2923,10 @@ void css_task_iter_start(struct cgroup_subsys_state *css, struct task_struct *css_task_iter_next(struct css_task_iter *it) { struct task_struct *res; - struct list_head *l = it->task; - struct cgrp_cset_link *link = list_entry(it->cset_link, - struct cgrp_cset_link, cset_link); + struct list_head *l = it->task_pos; /* If the iterator cg is NULL, we have no tasks */ - if (!it->cset_link) + if (!it->cset_pos) return NULL; res = list_entry(l, struct task_struct, cg_list); @@ -2936,13 +2937,13 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it) */ l = l->next; - if (l == &link->cset->tasks) - l = link->cset->mg_tasks.next; + if (l == it->tasks_head) + l = it->mg_tasks_head->next; - if (l == &link->cset->mg_tasks) + if (l == it->mg_tasks_head) css_advance_task_iter(it); else - it->task = l; + it->task_pos = l; return res; } -- cgit v1.2.3-71-gd317 From 3ebb2b6ef38875b866ec0118bfae7bc52afd0166 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Apr 2014 11:13:15 -0400 Subject: cgroup: teach css_task_iter about effective csses Currently, css_task_iter iterates tasks associated with a css by visiting each css_set associated with the owning cgroup and walking tasks of each of them. This works fine for !unified hierarchies as each cgroup has its own css for each associated subsystem on the hierarchy; however, on the planned unified hierarchy, a cgroup may not have csses associated and its tasks would be considered associated with the matching css of the nearest ancestor which has the subsystem enabled. This means that on the default unified hierarchy, just walking all tasks associated with a cgroup isn't enough to walk all tasks which are associated with the specified css. If any of its children doesn't have the matching css enabled, task iteration should also include all tasks from the subtree. We already added cgroup->e_csets[] to list all css_sets effectively associated with a given css and walk css_sets on that list instead to achieve such iteration. This patch updates css_task_iter iteration such that it walks css_sets on cgroup->e_csets[] instead of cgroup->cset_links if iteration is requested on an non-dummy css. Thanks to the previous iteration update, this change can be achieved with the addition of css_task_iter->ss and minimal updates to css_advance_task_iter() and css_task_iter_start(). Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 2 ++ kernel/cgroup.c | 18 +++++++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index bee390586120..18fcae39e63e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -842,6 +842,8 @@ css_next_descendant_post(struct cgroup_subsys_state *pos, /* A css_task_iter should be treated as an opaque object */ struct css_task_iter { + struct cgroup_subsys *ss; + struct list_head *cset_pos; struct list_head *cset_head; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d48163b26196..ad28866ed44c 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2868,8 +2868,14 @@ static void css_advance_task_iter(struct css_task_iter *it) it->cset_pos = NULL; return; } - link = list_entry(l, struct cgrp_cset_link, cset_link); - cset = link->cset; + + if (it->ss) { + cset = container_of(l, struct css_set, + e_cset_node[it->ss->id]); + } else { + link = list_entry(l, struct cgrp_cset_link, cset_link); + cset = link->cset; + } } while (list_empty(&cset->tasks) && list_empty(&cset->mg_tasks)); it->cset_pos = l; @@ -2906,7 +2912,13 @@ void css_task_iter_start(struct cgroup_subsys_state *css, down_read(&css_set_rwsem); - it->cset_pos = &css->cgroup->cset_links; + it->ss = css->ss; + + if (it->ss) + it->cset_pos = &css->cgroup->e_csets[css->ss->id]; + else + it->cset_pos = &css->cgroup->cset_links; + it->cset_head = it->cset_pos; css_advance_task_iter(it); -- cgit v1.2.3-71-gd317 From 6803c006282768ec850760766a6e4eb1a6ff87df Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Apr 2014 11:13:16 -0400 Subject: cgroup: add css_set->dfl_cgrp To implement the unified hierarchy behavior, we'll need to be able to determine the associated cgroup on the default hierarchy from css_set. Let's add css_set->dfl_cgrp so that it can be accessed conveniently and efficiently. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 3 +++ kernel/cgroup.c | 4 ++++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 18fcae39e63e..c49d161a71cd 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -354,6 +354,9 @@ struct css_set { */ struct list_head cgrp_links; + /* the default cgroup associated with this css_set */ + struct cgroup *dfl_cgrp; + /* * Set of subsystem states, one for each subsystem. This array is * immutable after creation apart from the init_css_set during diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2a4f88db3205..c66bfc8ee8a7 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -651,6 +651,10 @@ static void link_css_set(struct list_head *tmp_links, struct css_set *cset, struct cgrp_cset_link *link; BUG_ON(list_empty(tmp_links)); + + if (cgroup_on_dfl(cgrp)) + cset->dfl_cgrp = cgrp; + link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link); link->cset = cset; link->cgrp = cgrp; -- cgit v1.2.3-71-gd317 From f8f22e53a262ebee37fc98004f16b066cf5bc125 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Apr 2014 11:13:16 -0400 Subject: cgroup: implement dynamic subtree controller enable/disable on the default hierarchy cgroup is switching away from multiple hierarchies and will use one unified default hierarchy where controllers can be dynamically enabled and disabled per subtree. The default hierarchy will serve as the unified hierarchy to which all controllers are attached and a css on the default hierarchy would need to also serve the tasks of descendant cgroups which don't have the controller enabled - ie. the tree may be collapsed from leaf towards root when viewed from specific controllers. This has been implemented through effective css in the previous patches. This patch finally implements dynamic subtree controller enable/disable on the default hierarchy via a new knob - "cgroup.subtree_control" which controls which controllers are enabled on the child cgroups. Let's assume a hierarchy like the following. root - A - B - C \ D root's "cgroup.subtree_control" determines which controllers are enabled on A. A's on B. B's on C and D. This coincides with the fact that controllers on the immediate sub-level are used to distribute the resources of the parent. In fact, it's natural to assume that resource control knobs of a child belong to its parent. Enabling a controller in "cgroup.subtree_control" declares that distribution of the respective resources of the cgroup will be controlled. Note that this means that controller enable states are shared among siblings. The default hierarchy has an extra restriction - only cgroups which don't contain any task may have controllers enabled in "cgroup.subtree_control". Combined with the other properties of the default hierarchy, this guarantees that, from the view point of controllers, tasks are only on the leaf cgroups. In other words, only leaf csses may contain tasks. This rules out situations where child cgroups compete against internal tasks of the parent, which is a competition between two different types of entities without any clear way to determine resource distribution between the two. Different controllers handle it differently and all the implemented behaviors are ambiguous, ad-hoc, cumbersome and/or just wrong. Having this structural constraints imposed from cgroup core removes the burden from controller implementations and enables showing one consistent behavior across all controllers. When a controller is enabled or disabled, css associations for the controller in the subtrees of each child should be updated. After enabling, the whole subtree of a child should point to the new css of the child. After disabling, the whole subtree of a child should point to the cgroup's css. This is implemented by first updating cgroup states such that cgroup_e_css() result points to the appropriate css and then invoking cgroup_update_dfl_csses() which migrates all tasks in the affected subtrees to the self cgroup on the default hierarchy. * When read, "cgroup.subtree_control" lists all the currently enabled controllers on the children of the cgroup. * White-space separated list of controller names prefixed with either '+' or '-' can be written to "cgroup.subtree_control". The ones prefixed with '+' are enabled on the controller and '-' disabled. * A controller can be enabled iff the parent's "cgroup.subtree_control" enables it and disabled iff no child's "cgroup.subtree_control" has it enabled. * If a cgroup has tasks, no controller can be enabled via "cgroup.subtree_control". Likewise, if "cgroup.subtree_control" has some controllers enabled, tasks can't be migrated into the cgroup. * All controllers which aren't bound on other hierarchies are automatically associated with the root cgroup of the default hierarchy. All the controllers which are bound to the default hierarchy are listed in the read-only file "cgroup.controllers" in the root directory. * "cgroup.controllers" in all non-root cgroups is read-only file whose content is equal to that of "cgroup.subtree_control" of the parent. This indicates which controllers can be used in the cgroup's "cgroup.subtree_control". This is still experimental and there are some holes, one of which is that ->can_attach() failure during cgroup_update_dfl_csses() may leave the cgroups in an undefined state. The issues will be addressed by future patches. v2: Non-root cgroups now also have "cgroup.controllers". Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 5 + kernel/cgroup.c | 367 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 370 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c49d161a71cd..ada239253ec7 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -21,6 +21,7 @@ #include #include #include +#include #ifdef CONFIG_CGROUPS @@ -164,6 +165,7 @@ struct cgroup { struct cgroup *parent; /* my parent */ struct kernfs_node *kn; /* cgroup kernfs entry */ + struct kernfs_node *control_kn; /* kn for "cgroup.subtree_control" */ /* * Monotonically increasing unique serial number which defines a @@ -216,6 +218,9 @@ struct cgroup { /* For css percpu_ref killing and RCU-protected deletion */ struct rcu_head rcu_head; struct work_struct destroy_work; + + /* used to wait for offlining of csses */ + wait_queue_head_t offline_waitq; }; #define MAX_CGROUP_ROOT_NAMELEN 64 diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 8c2835a9e192..809dd903ceb8 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -182,6 +182,8 @@ static int rebind_subsystems(struct cgroup_root *dst_root, unsigned long ss_mask); static void cgroup_destroy_css_killed(struct cgroup *cgrp); static int cgroup_destroy_locked(struct cgroup *cgrp); +static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss); +static void kill_css(struct cgroup_subsys_state *css); static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], bool is_add); static void cgroup_pidlist_destroy_all(struct cgroup *cgrp); @@ -338,6 +340,14 @@ static int notify_on_release(const struct cgroup *cgrp) #define for_each_root(root) \ list_for_each_entry((root), &cgroup_roots, root_list) +/* iterate over child cgrps, lock should be held throughout iteration */ +#define cgroup_for_each_live_child(child, cgrp) \ + list_for_each_entry((child), &(cgrp)->children, sibling) \ + if (({ lockdep_assert_held(&cgroup_tree_mutex); \ + cgroup_is_dead(child); })) \ + ; \ + else + /** * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive. * @cgrp: the cgroup to be checked for liveness @@ -1450,6 +1460,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) for_each_subsys(ss, ssid) INIT_LIST_HEAD(&cgrp->e_csets[ssid]); + + init_waitqueue_head(&cgrp->offline_waitq); } static void init_cgroup_root(struct cgroup_root *root, @@ -1938,6 +1950,14 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp, lockdep_assert_held(&cgroup_mutex); + /* + * Except for the root, child_subsys_mask must be zero for a cgroup + * with tasks so that child cgroups don't compete against tasks. + */ + if (dst_cgrp && cgroup_on_dfl(dst_cgrp) && dst_cgrp->parent && + dst_cgrp->child_subsys_mask) + return -EBUSY; + /* look up the dst cset for each src cset and link it to src */ list_for_each_entry_safe(src_cset, tmp_cset, preloaded_csets, mg_preload_node) { struct css_set *dst_cset; @@ -2303,6 +2323,326 @@ static int cgroup_sane_behavior_show(struct seq_file *seq, void *v) return 0; } +static void cgroup_print_ss_mask(struct seq_file *seq, unsigned int ss_mask) +{ + struct cgroup_subsys *ss; + bool printed = false; + int ssid; + + for_each_subsys(ss, ssid) { + if (ss_mask & (1 << ssid)) { + if (printed) + seq_putc(seq, ' '); + seq_printf(seq, "%s", ss->name); + printed = true; + } + } + if (printed) + seq_putc(seq, '\n'); +} + +/* show controllers which are currently attached to the default hierarchy */ +static int cgroup_root_controllers_show(struct seq_file *seq, void *v) +{ + struct cgroup *cgrp = seq_css(seq)->cgroup; + + cgroup_print_ss_mask(seq, cgrp->root->subsys_mask); + return 0; +} + +/* show controllers which are enabled from the parent */ +static int cgroup_controllers_show(struct seq_file *seq, void *v) +{ + struct cgroup *cgrp = seq_css(seq)->cgroup; + + cgroup_print_ss_mask(seq, cgrp->parent->child_subsys_mask); + return 0; +} + +/* show controllers which are enabled for a given cgroup's children */ +static int cgroup_subtree_control_show(struct seq_file *seq, void *v) +{ + struct cgroup *cgrp = seq_css(seq)->cgroup; + + cgroup_print_ss_mask(seq, cgrp->child_subsys_mask); + return 0; +} + +/** + * cgroup_update_dfl_csses - update css assoc of a subtree in default hierarchy + * @cgrp: root of the subtree to update csses for + * + * @cgrp's child_subsys_mask has changed and its subtree's (self excluded) + * css associations need to be updated accordingly. This function looks up + * all css_sets which are attached to the subtree, creates the matching + * updated css_sets and migrates the tasks to the new ones. + */ +static int cgroup_update_dfl_csses(struct cgroup *cgrp) +{ + LIST_HEAD(preloaded_csets); + struct cgroup_subsys_state *css; + struct css_set *src_cset; + int ret; + + lockdep_assert_held(&cgroup_tree_mutex); + lockdep_assert_held(&cgroup_mutex); + + /* look up all csses currently attached to @cgrp's subtree */ + down_read(&css_set_rwsem); + css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) { + struct cgrp_cset_link *link; + + /* self is not affected by child_subsys_mask change */ + if (css->cgroup == cgrp) + continue; + + list_for_each_entry(link, &css->cgroup->cset_links, cset_link) + cgroup_migrate_add_src(link->cset, cgrp, + &preloaded_csets); + } + up_read(&css_set_rwsem); + + /* NULL dst indicates self on default hierarchy */ + ret = cgroup_migrate_prepare_dst(NULL, &preloaded_csets); + if (ret) + goto out_finish; + + list_for_each_entry(src_cset, &preloaded_csets, mg_preload_node) { + struct task_struct *last_task = NULL, *task; + + /* src_csets precede dst_csets, break on the first dst_cset */ + if (!src_cset->mg_src_cgrp) + break; + + /* + * All tasks in src_cset need to be migrated to the + * matching dst_cset. Empty it process by process. We + * walk tasks but migrate processes. The leader might even + * belong to a different cset but such src_cset would also + * be among the target src_csets because the default + * hierarchy enforces per-process membership. + */ + while (true) { + down_read(&css_set_rwsem); + task = list_first_entry_or_null(&src_cset->tasks, + struct task_struct, cg_list); + if (task) { + task = task->group_leader; + WARN_ON_ONCE(!task_css_set(task)->mg_src_cgrp); + get_task_struct(task); + } + up_read(&css_set_rwsem); + + if (!task) + break; + + /* guard against possible infinite loop */ + if (WARN(last_task == task, + "cgroup: update_dfl_csses failed to make progress, aborting in inconsistent state\n")) + goto out_finish; + last_task = task; + + threadgroup_lock(task); + /* raced against de_thread() from another thread? */ + if (!thread_group_leader(task)) { + threadgroup_unlock(task); + put_task_struct(task); + continue; + } + + ret = cgroup_migrate(src_cset->dfl_cgrp, task, true); + + threadgroup_unlock(task); + put_task_struct(task); + + if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret)) + goto out_finish; + } + } + +out_finish: + cgroup_migrate_finish(&preloaded_csets); + return ret; +} + +/* change the enabled child controllers for a cgroup in the default hierarchy */ +static int cgroup_subtree_control_write(struct cgroup_subsys_state *dummy_css, + struct cftype *cft, char *buffer) +{ + unsigned long enable_req = 0, disable_req = 0, enable, disable; + struct cgroup *cgrp = dummy_css->cgroup, *child; + struct cgroup_subsys *ss; + char *tok, *p; + int ssid, ret; + + /* + * Parse input - white space separated list of subsystem names + * prefixed with either + or -. + */ + p = buffer; + while ((tok = strsep(&p, " \t\n"))) { + for_each_subsys(ss, ssid) { + if (ss->disabled || strcmp(tok + 1, ss->name)) + continue; + + if (*tok == '+') { + enable_req |= 1 << ssid; + disable_req &= ~(1 << ssid); + } else if (*tok == '-') { + disable_req |= 1 << ssid; + enable_req &= ~(1 << ssid); + } else { + return -EINVAL; + } + break; + } + if (ssid == CGROUP_SUBSYS_COUNT) + return -EINVAL; + } + + /* + * We're gonna grab cgroup_tree_mutex which nests outside kernfs + * active_ref. cgroup_lock_live_group() already provides enough + * protection. Ensure @cgrp stays accessible and break the + * active_ref protection. + */ + cgroup_get(cgrp); + kernfs_break_active_protection(cgrp->control_kn); +retry: + enable = enable_req; + disable = disable_req; + + mutex_lock(&cgroup_tree_mutex); + + for_each_subsys(ss, ssid) { + if (enable & (1 << ssid)) { + if (cgrp->child_subsys_mask & (1 << ssid)) { + enable &= ~(1 << ssid); + continue; + } + + /* + * Because css offlining is asynchronous, userland + * might try to re-enable the same controller while + * the previous instance is still around. In such + * cases, wait till it's gone using offline_waitq. + */ + cgroup_for_each_live_child(child, cgrp) { + wait_queue_t wait; + + if (!cgroup_css(child, ss)) + continue; + + prepare_to_wait(&child->offline_waitq, &wait, + TASK_UNINTERRUPTIBLE); + mutex_unlock(&cgroup_tree_mutex); + schedule(); + finish_wait(&child->offline_waitq, &wait); + goto retry; + } + + /* unavailable or not enabled on the parent? */ + if (!(cgrp_dfl_root.subsys_mask & (1 << ssid)) || + (cgrp->parent && + !(cgrp->parent->child_subsys_mask & (1 << ssid)))) { + ret = -ENOENT; + goto out_unlock_tree; + } + } else if (disable & (1 << ssid)) { + if (!(cgrp->child_subsys_mask & (1 << ssid))) { + disable &= ~(1 << ssid); + continue; + } + + /* a child has it enabled? */ + cgroup_for_each_live_child(child, cgrp) { + if (child->child_subsys_mask & (1 << ssid)) { + ret = -EBUSY; + goto out_unlock_tree; + } + } + } + } + + if (!enable && !disable) { + ret = 0; + goto out_unlock_tree; + } + + if (!cgroup_lock_live_group(cgrp)) { + ret = -ENODEV; + goto out_unlock_tree; + } + + /* + * Except for the root, child_subsys_mask must be zero for a cgroup + * with tasks so that child cgroups don't compete against tasks. + */ + if (enable && cgrp->parent && !list_empty(&cgrp->cset_links)) { + ret = -EBUSY; + goto out_unlock; + } + + /* + * Create csses for enables and update child_subsys_mask. This + * changes cgroup_e_css() results which in turn makes the + * subsequent cgroup_update_dfl_csses() associate all tasks in the + * subtree to the updated csses. + */ + for_each_subsys(ss, ssid) { + if (!(enable & (1 << ssid))) + continue; + + cgroup_for_each_live_child(child, cgrp) { + ret = create_css(child, ss); + if (ret) + goto err_undo_css; + } + } + + cgrp->child_subsys_mask |= enable; + cgrp->child_subsys_mask &= ~disable; + + ret = cgroup_update_dfl_csses(cgrp); + if (ret) + goto err_undo_css; + + /* all tasks are now migrated away from the old csses, kill them */ + for_each_subsys(ss, ssid) { + if (!(disable & (1 << ssid))) + continue; + + cgroup_for_each_live_child(child, cgrp) + kill_css(cgroup_css(child, ss)); + } + + kernfs_activate(cgrp->kn); + ret = 0; +out_unlock: + mutex_unlock(&cgroup_mutex); +out_unlock_tree: + mutex_unlock(&cgroup_tree_mutex); + kernfs_unbreak_active_protection(cgrp->control_kn); + cgroup_put(cgrp); + return ret; + +err_undo_css: + cgrp->child_subsys_mask &= ~enable; + cgrp->child_subsys_mask |= disable; + + for_each_subsys(ss, ssid) { + if (!(enable & (1 << ssid))) + continue; + + cgroup_for_each_live_child(child, cgrp) { + struct cgroup_subsys_state *css = cgroup_css(child, ss); + if (css) + kill_css(css); + } + } + goto out_unlock; +} + static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { @@ -2462,9 +2802,14 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft) return PTR_ERR(kn); ret = cgroup_kn_set_ugid(kn); - if (ret) + if (ret) { kernfs_remove(kn); - return ret; + return ret; + } + + if (cft->seq_show == cgroup_subtree_control_show) + cgrp->control_kn = kn; + return 0; } /** @@ -3557,6 +3902,22 @@ static struct cftype cgroup_base_files[] = { .flags = CFTYPE_ONLY_ON_ROOT, .seq_show = cgroup_sane_behavior_show, }, + { + .name = "cgroup.controllers", + .flags = CFTYPE_ONLY_ON_DFL | CFTYPE_ONLY_ON_ROOT, + .seq_show = cgroup_root_controllers_show, + }, + { + .name = "cgroup.controllers", + .flags = CFTYPE_ONLY_ON_DFL | CFTYPE_NOT_ON_ROOT, + .seq_show = cgroup_controllers_show, + }, + { + .name = "cgroup.subtree_control", + .flags = CFTYPE_ONLY_ON_DFL, + .seq_show = cgroup_subtree_control_show, + .write_string = cgroup_subtree_control_write, + }, /* * Historical crazy stuff. These don't have "cgroup." prefix and @@ -3725,6 +4086,8 @@ static void offline_css(struct cgroup_subsys_state *css) css->flags &= ~CSS_ONLINE; css->cgroup->nr_css--; RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL); + + wake_up_all(&css->cgroup->offline_waitq); } /** -- cgit v1.2.3-71-gd317 From be8f274323c26ddc7e6fd6c44254b7abcdbe6389 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 17 Apr 2014 17:16:58 +0900 Subject: kprobes: Prohibit probing on .entry.text code .entry.text is a code area which is used for interrupt/syscall entries, which includes many sensitive code. Thus, it is better to prohibit probing on all of such code instead of a part of that. Since some symbols are already registered on kprobe blacklist, this also removes them from the blacklist. Signed-off-by: Masami Hiramatsu Reviewed-by: Steven Rostedt Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Borislav Petkov Cc: David S. Miller Cc: Frederic Weisbecker Cc: Jan Kiszka Cc: Jiri Kosina Cc: Jonathan Lebon Cc: Seiji Aguchi Link: http://lkml.kernel.org/r/20140417081658.26341.57354.stgit@ltc230.yrl.intra.hitachi.co.jp Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 33 --------------------------------- arch/x86/kernel/entry_64.S | 20 -------------------- arch/x86/kernel/kprobes/core.c | 8 ++++++++ include/linux/kprobes.h | 1 + kernel/kprobes.c | 13 ++++++++----- 5 files changed, 17 insertions(+), 58 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index a2a4f4697889..0ca5bf1697bb 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -314,10 +314,6 @@ ENTRY(ret_from_kernel_thread) CFI_ENDPROC ENDPROC(ret_from_kernel_thread) -/* - * Interrupt exit functions should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" /* * Return to user mode is not as complex as all this looks, * but we want the default path for a system call return to @@ -372,10 +368,6 @@ need_resched: END(resume_kernel) #endif CFI_ENDPROC -/* - * End of kprobes section - */ - .popsection /* SYSENTER_RETURN points to after the "sysenter" instruction in the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ @@ -495,10 +487,6 @@ sysexit_audit: PTGS_TO_GS_EX ENDPROC(ia32_sysenter_target) -/* - * syscall stub including irq exit should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" # system call handler stub ENTRY(system_call) RING0_INT_FRAME # can't unwind into user space anyway @@ -691,10 +679,6 @@ syscall_badsys: jmp resume_userspace END(syscall_badsys) CFI_ENDPROC -/* - * End of kprobes section - */ - .popsection .macro FIXUP_ESPFIX_STACK /* @@ -781,10 +765,6 @@ common_interrupt: ENDPROC(common_interrupt) CFI_ENDPROC -/* - * Irq entries should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" #define BUILD_INTERRUPT3(name, nr, fn) \ ENTRY(name) \ RING0_INT_FRAME; \ @@ -961,10 +941,6 @@ ENTRY(spurious_interrupt_bug) jmp error_code CFI_ENDPROC END(spurious_interrupt_bug) -/* - * End of kprobes section - */ - .popsection #ifdef CONFIG_XEN /* Xen doesn't set %esp to be precisely what the normal sysenter @@ -1239,11 +1215,6 @@ return_to_handler: jmp *%ecx #endif -/* - * Some functions should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" - #ifdef CONFIG_TRACING ENTRY(trace_page_fault) RING0_EC_FRAME @@ -1453,7 +1424,3 @@ ENTRY(async_page_fault) END(async_page_fault) #endif -/* - * End of kprobes section - */ - .popsection diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1e96c3628bf2..43bb38951660 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -487,8 +487,6 @@ ENDPROC(native_usergs_sysret64) TRACE_IRQS_OFF .endm -/* save complete stack frame */ - .pushsection .kprobes.text, "ax" ENTRY(save_paranoid) XCPT_FRAME 1 RDI+8 cld @@ -517,7 +515,6 @@ ENTRY(save_paranoid) 1: ret CFI_ENDPROC END(save_paranoid) - .popsection /* * A newly forked process directly context switches into this address. @@ -975,10 +972,6 @@ END(interrupt) call \func .endm -/* - * Interrupt entry/exit should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" /* * The interrupt stubs push (~vector+0x80) onto the stack and * then jump to common_interrupt. @@ -1113,10 +1106,6 @@ ENTRY(retint_kernel) CFI_ENDPROC END(common_interrupt) -/* - * End of kprobes section - */ - .popsection /* * APIC interrupts. @@ -1477,11 +1466,6 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ hyperv_callback_vector hyperv_vector_handler #endif /* CONFIG_HYPERV */ -/* - * Some functions should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" - paranoidzeroentry_ist debug do_debug DEBUG_STACK paranoidzeroentry_ist int3 do_int3 DEBUG_STACK paranoiderrorentry stack_segment do_stack_segment @@ -1898,7 +1882,3 @@ ENTRY(ignore_sysret) CFI_ENDPROC END(ignore_sysret) -/* - * End of kprobes section - */ - .popsection diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index da7bdaa3ce15..7751b3dee53a 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1065,6 +1065,14 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) return 0; } +bool arch_within_kprobe_blacklist(unsigned long addr) +{ + return (addr >= (unsigned long)__kprobes_text_start && + addr < (unsigned long)__kprobes_text_end) || + (addr >= (unsigned long)__entry_text_start && + addr < (unsigned long)__entry_text_end); +} + int __init arch_init_kprobes(void) { return 0; diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 925eaf28fca9..cdf9251f8249 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -265,6 +265,7 @@ extern void arch_disarm_kprobe(struct kprobe *p); extern int arch_init_kprobes(void); extern void show_registers(struct pt_regs *regs); extern void kprobes_inc_nmissed_count(struct kprobe *p); +extern bool arch_within_kprobe_blacklist(unsigned long addr); struct kprobe_insn_cache { struct mutex mutex; diff --git a/kernel/kprobes.c b/kernel/kprobes.c index ceeadfcabb76..5b5ac76671e7 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -96,9 +96,6 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) static struct kprobe_blackpoint kprobe_blacklist[] = { {"preempt_schedule",}, {"native_get_debugreg",}, - {"irq_entries_start",}, - {"common_interrupt",}, - {"mcount",}, /* mcount can be called from everywhere */ {NULL} /* Terminator */ }; @@ -1324,12 +1321,18 @@ out: return ret; } +bool __weak arch_within_kprobe_blacklist(unsigned long addr) +{ + /* The __kprobes marked functions and entry code must not be probed */ + return addr >= (unsigned long)__kprobes_text_start && + addr < (unsigned long)__kprobes_text_end; +} + static int __kprobes in_kprobes_functions(unsigned long addr) { struct kprobe_blackpoint *kb; - if (addr >= (unsigned long)__kprobes_text_start && - addr < (unsigned long)__kprobes_text_end) + if (arch_within_kprobe_blacklist(addr)) return -EINVAL; /* * If there exists a kprobe_blacklist, verify and -- cgit v1.2.3-71-gd317 From 376e242429bf8539ef39a080ac113c8799840b13 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 17 Apr 2014 17:17:05 +0900 Subject: kprobes: Introduce NOKPROBE_SYMBOL() macro to maintain kprobes blacklist MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce NOKPROBE_SYMBOL() macro which builds a kprobes blacklist at kernel build time. The usage of this macro is similar to EXPORT_SYMBOL(), placed after the function definition: NOKPROBE_SYMBOL(function); Since this macro will inhibit inlining of static/inline functions, this patch also introduces a nokprobe_inline macro for static/inline functions. In this case, we must use NOKPROBE_SYMBOL() for the inline function caller. When CONFIG_KPROBES=y, the macro stores the given function address in the "_kprobe_blacklist" section. Since the data structures are not fully initialized by the macro (because there is no "size" information), those are re-initialized at boot time by using kallsyms. Signed-off-by: Masami Hiramatsu Link: http://lkml.kernel.org/r/20140417081705.26341.96719.stgit@ltc230.yrl.intra.hitachi.co.jp Cc: Alok Kataria Cc: Ananth N Mavinakayanahalli Cc: Andrew Morton Cc: Anil S Keshavamurthy Cc: Arnd Bergmann Cc: Christopher Li Cc: Chris Wright Cc: David S. Miller Cc: Jan-Simon Möller Cc: Jeremy Fitzhardinge Cc: Linus Torvalds Cc: Randy Dunlap Cc: Rusty Russell Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-sparse@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Signed-off-by: Ingo Molnar --- Documentation/kprobes.txt | 16 +++++- arch/x86/include/asm/asm.h | 7 +++ arch/x86/kernel/paravirt.c | 4 ++ include/asm-generic/vmlinux.lds.h | 9 ++++ include/linux/compiler.h | 2 + include/linux/kprobes.h | 20 ++++++-- kernel/kprobes.c | 100 ++++++++++++++++++++------------------ kernel/sched/core.c | 1 + 8 files changed, 107 insertions(+), 52 deletions(-) (limited to 'include') diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index 0cfb00fd86ff..4bbeca8483ed 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt @@ -22,8 +22,9 @@ Appendix B: The kprobes sysctl interface Kprobes enables you to dynamically break into any kernel routine and collect debugging and performance information non-disruptively. You -can trap at almost any kernel code address, specifying a handler +can trap at almost any kernel code address(*), specifying a handler routine to be invoked when the breakpoint is hit. +(*: some parts of the kernel code can not be trapped, see 1.5 Blacklist) There are currently three types of probes: kprobes, jprobes, and kretprobes (also called return probes). A kprobe can be inserted @@ -273,6 +274,19 @@ using one of the following techniques: or - Execute 'sysctl -w debug.kprobes_optimization=n' +1.5 Blacklist + +Kprobes can probe most of the kernel except itself. This means +that there are some functions where kprobes cannot probe. Probing +(trapping) such functions can cause a recursive trap (e.g. double +fault) or the nested probe handler may never be called. +Kprobes manages such functions as a blacklist. +If you want to add a function into the blacklist, you just need +to (1) include linux/kprobes.h and (2) use NOKPROBE_SYMBOL() macro +to specify a blacklisted function. +Kprobes checks the given probe address against the blacklist and +rejects registering it, if the given address is in the blacklist. + 2. Architectures Supported Kprobes, jprobes, and return probes are implemented on the following diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 4582e8e1cd1a..7730c1c5c83a 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -57,6 +57,12 @@ .long (from) - . ; \ .long (to) - . + 0x7ffffff0 ; \ .popsection + +# define _ASM_NOKPROBE(entry) \ + .pushsection "_kprobe_blacklist","aw" ; \ + _ASM_ALIGN ; \ + _ASM_PTR (entry); \ + .popsection #else # define _ASM_EXTABLE(from,to) \ " .pushsection \"__ex_table\",\"a\"\n" \ @@ -71,6 +77,7 @@ " .long (" #from ") - .\n" \ " .long (" #to ") - . + 0x7ffffff0\n" \ " .popsection\n" +/* For C file, we already have NOKPROBE_SYMBOL macro */ #endif #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 1b10af835c31..e136869ae42e 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -389,6 +390,9 @@ __visible struct pv_cpu_ops pv_cpu_ops = { .end_context_switch = paravirt_nop, }; +/* At this point, native_get_debugreg has a real function entry */ +NOKPROBE_SYMBOL(native_get_debugreg); + struct pv_apic_ops pv_apic_ops = { #ifdef CONFIG_X86_LOCAL_APIC .startup_ipi_hook = paravirt_nop, diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 146e4fffd710..40ceb3ceba79 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -109,6 +109,14 @@ #define BRANCH_PROFILE() #endif +#ifdef CONFIG_KPROBES +#define KPROBE_BLACKLIST() VMLINUX_SYMBOL(__start_kprobe_blacklist) = .; \ + *(_kprobe_blacklist) \ + VMLINUX_SYMBOL(__stop_kprobe_blacklist) = .; +#else +#define KPROBE_BLACKLIST() +#endif + #ifdef CONFIG_EVENT_TRACING #define FTRACE_EVENTS() . = ALIGN(8); \ VMLINUX_SYMBOL(__start_ftrace_events) = .; \ @@ -507,6 +515,7 @@ *(.init.rodata) \ FTRACE_EVENTS() \ TRACE_SYSCALLS() \ + KPROBE_BLACKLIST() \ MEM_DISCARD(init.rodata) \ CLK_OF_TABLES() \ RESERVEDMEM_OF_TABLES() \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index ee7239ea1583..0300c0f5c88b 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -374,7 +374,9 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); /* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */ #ifdef CONFIG_KPROBES # define __kprobes __attribute__((__section__(".kprobes.text"))) +# define nokprobe_inline __always_inline #else # define __kprobes +# define nokprobe_inline inline #endif #endif /* __LINUX_COMPILER_H */ diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index cdf9251f8249..e059507c465d 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -205,10 +205,10 @@ struct kretprobe_blackpoint { void *addr; }; -struct kprobe_blackpoint { - const char *name; +struct kprobe_blacklist_entry { + struct list_head list; unsigned long start_addr; - unsigned long range; + unsigned long end_addr; }; #ifdef CONFIG_KPROBES @@ -477,4 +477,18 @@ static inline int enable_jprobe(struct jprobe *jp) return enable_kprobe(&jp->kp); } +#ifdef CONFIG_KPROBES +/* + * Blacklist ganerating macro. Specify functions which is not probed + * by using this macro. + */ +#define __NOKPROBE_SYMBOL(fname) \ +static unsigned long __used \ + __attribute__((section("_kprobe_blacklist"))) \ + _kbl_addr_##fname = (unsigned long)fname; +#define NOKPROBE_SYMBOL(fname) __NOKPROBE_SYMBOL(fname) +#else +#define NOKPROBE_SYMBOL(fname) +#endif + #endif /* _LINUX_KPROBES_H */ diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 5b5ac76671e7..5ffc6875d2a7 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -86,18 +86,8 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) return &(kretprobe_table_locks[hash].lock); } -/* - * Normally, functions that we'd want to prohibit kprobes in, are marked - * __kprobes. But, there are cases where such functions already belong to - * a different section (__sched for preempt_schedule) - * - * For such cases, we now have a blacklist - */ -static struct kprobe_blackpoint kprobe_blacklist[] = { - {"preempt_schedule",}, - {"native_get_debugreg",}, - {NULL} /* Terminator */ -}; +/* Blacklist -- list of struct kprobe_blacklist_entry */ +static LIST_HEAD(kprobe_blacklist); #ifdef __ARCH_WANT_KPROBES_INSN_SLOT /* @@ -1328,24 +1318,22 @@ bool __weak arch_within_kprobe_blacklist(unsigned long addr) addr < (unsigned long)__kprobes_text_end; } -static int __kprobes in_kprobes_functions(unsigned long addr) +static bool __kprobes within_kprobe_blacklist(unsigned long addr) { - struct kprobe_blackpoint *kb; + struct kprobe_blacklist_entry *ent; if (arch_within_kprobe_blacklist(addr)) - return -EINVAL; + return true; /* * If there exists a kprobe_blacklist, verify and * fail any probe registration in the prohibited area */ - for (kb = kprobe_blacklist; kb->name != NULL; kb++) { - if (kb->start_addr) { - if (addr >= kb->start_addr && - addr < (kb->start_addr + kb->range)) - return -EINVAL; - } + list_for_each_entry(ent, &kprobe_blacklist, list) { + if (addr >= ent->start_addr && addr < ent->end_addr) + return true; } - return 0; + + return false; } /* @@ -1436,7 +1424,7 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p, /* Ensure it is not in reserved area nor out of text */ if (!kernel_text_address((unsigned long) p->addr) || - in_kprobes_functions((unsigned long) p->addr) || + within_kprobe_blacklist((unsigned long) p->addr) || jump_label_text_reserved(p->addr, p->addr)) { ret = -EINVAL; goto out; @@ -2022,6 +2010,38 @@ void __kprobes dump_kprobe(struct kprobe *kp) kp->symbol_name, kp->addr, kp->offset); } +/* + * Lookup and populate the kprobe_blacklist. + * + * Unlike the kretprobe blacklist, we'll need to determine + * the range of addresses that belong to the said functions, + * since a kprobe need not necessarily be at the beginning + * of a function. + */ +static int __init populate_kprobe_blacklist(unsigned long *start, + unsigned long *end) +{ + unsigned long *iter; + struct kprobe_blacklist_entry *ent; + unsigned long offset = 0, size = 0; + + for (iter = start; iter < end; iter++) { + if (!kallsyms_lookup_size_offset(*iter, &size, &offset)) { + pr_err("Failed to find blacklist %p\n", (void *)*iter); + continue; + } + + ent = kmalloc(sizeof(*ent), GFP_KERNEL); + if (!ent) + return -ENOMEM; + ent->start_addr = *iter; + ent->end_addr = *iter + size; + INIT_LIST_HEAD(&ent->list); + list_add_tail(&ent->list, &kprobe_blacklist); + } + return 0; +} + /* Module notifier call back, checking kprobes on the module */ static int __kprobes kprobes_module_callback(struct notifier_block *nb, unsigned long val, void *data) @@ -2065,14 +2085,13 @@ static struct notifier_block kprobe_module_nb = { .priority = 0 }; +/* Markers of _kprobe_blacklist section */ +extern unsigned long __start_kprobe_blacklist[]; +extern unsigned long __stop_kprobe_blacklist[]; + static int __init init_kprobes(void) { int i, err = 0; - unsigned long offset = 0, size = 0; - char *modname, namebuf[KSYM_NAME_LEN]; - const char *symbol_name; - void *addr; - struct kprobe_blackpoint *kb; /* FIXME allocate the probe table, currently defined statically */ /* initialize all list heads */ @@ -2082,26 +2101,11 @@ static int __init init_kprobes(void) raw_spin_lock_init(&(kretprobe_table_locks[i].lock)); } - /* - * Lookup and populate the kprobe_blacklist. - * - * Unlike the kretprobe blacklist, we'll need to determine - * the range of addresses that belong to the said functions, - * since a kprobe need not necessarily be at the beginning - * of a function. - */ - for (kb = kprobe_blacklist; kb->name != NULL; kb++) { - kprobe_lookup_name(kb->name, addr); - if (!addr) - continue; - - kb->start_addr = (unsigned long)addr; - symbol_name = kallsyms_lookup(kb->start_addr, - &size, &offset, &modname, namebuf); - if (!symbol_name) - kb->range = 0; - else - kb->range = size; + err = populate_kprobe_blacklist(__start_kprobe_blacklist, + __stop_kprobe_blacklist); + if (err) { + pr_err("kprobes: failed to populate blacklist: %d\n", err); + pr_err("Please take care of using kprobes.\n"); } if (kretprobe_blacklist_size) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 268a45ea238c..6863631e8cd0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2804,6 +2804,7 @@ asmlinkage void __sched notrace preempt_schedule(void) barrier(); } while (need_resched()); } +NOKPROBE_SYMBOL(preempt_schedule); EXPORT_SYMBOL(preempt_schedule); #endif /* CONFIG_PREEMPT */ -- cgit v1.2.3-71-gd317 From f5efc696cc711021cc73e7543cc3038e58459707 Mon Sep 17 00:00:00 2001 From: Tomasz Bursztyka Date: Mon, 14 Apr 2014 15:41:28 +0300 Subject: netfilter: nf_tables: Add meta expression key for bridge interface name NFT_META_BRI_IIFNAME to get packet input bridge interface name NFT_META_BRI_OIFNAME to get packet output bridge interface name Such meta key are accessible only through NFPROTO_BRIDGE family, on a dedicated nft meta module: nft_meta_bridge. Suggested-by: Pablo Neira Ayuso Signed-off-by: Tomasz Bursztyka Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 4 + net/bridge/Makefile | 2 +- net/bridge/netfilter/Kconfig | 14 +++- net/bridge/netfilter/Makefile | 1 + net/bridge/netfilter/nft_meta_bridge.c | 139 +++++++++++++++++++++++++++++++ 5 files changed, 158 insertions(+), 2 deletions(-) create mode 100644 net/bridge/netfilter/nft_meta_bridge.c (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 160159274cab..7d6433f66bf8 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -563,6 +563,8 @@ enum nft_exthdr_attributes { * @NFT_META_SECMARK: packet secmark (skb->secmark) * @NFT_META_NFPROTO: netfilter protocol * @NFT_META_L4PROTO: layer 4 protocol number + * @NFT_META_BRI_IIFNAME: packet input bridge interface name + * @NFT_META_BRI_OIFNAME: packet output bridge interface name */ enum nft_meta_keys { NFT_META_LEN, @@ -582,6 +584,8 @@ enum nft_meta_keys { NFT_META_SECMARK, NFT_META_NFPROTO, NFT_META_L4PROTO, + NFT_META_BRI_IIFNAME, + NFT_META_BRI_OIFNAME, }; /** diff --git a/net/bridge/Makefile b/net/bridge/Makefile index e85498b2f166..906a18b4e74a 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -16,4 +16,4 @@ bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o -obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/ +obj-$(CONFIG_BRIDGE_NETFILTER) += netfilter/ diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 5ca74a0e595f..3baf29d34e62 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -2,13 +2,25 @@ # Bridge netfilter configuration # # -config NF_TABLES_BRIDGE +menuconfig NF_TABLES_BRIDGE depends on NF_TABLES + select BRIDGE_NETFILTER tristate "Ethernet Bridge nf_tables support" +if NF_TABLES_BRIDGE + +config NFT_BRIDGE_META + tristate "Netfilter nf_table bridge meta support" + depends on NFT_META + help + Add support for bridge dedicated meta key. + +endif # NF_TABLES_BRIDGE + menuconfig BRIDGE_NF_EBTABLES tristate "Ethernet Bridge tables (ebtables) support" depends on BRIDGE && NETFILTER + select BRIDGE_NETFILTER select NETFILTER_XTABLES help ebtables is a general, extensible frame/packet identification diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile index ea7629f58b3d..6f2f3943d66f 100644 --- a/net/bridge/netfilter/Makefile +++ b/net/bridge/netfilter/Makefile @@ -3,6 +3,7 @@ # obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o +obj-$(CONFIG_NFT_BRIDGE_META) += nft_meta_bridge.o obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c new file mode 100644 index 000000000000..4f02109d708f --- /dev/null +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2014 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../br_private.h" + +static void nft_meta_bridge_get_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + const struct net_device *in = pkt->in, *out = pkt->out; + struct nft_data *dest = &data[priv->dreg]; + const struct net_bridge_port *p; + + switch (priv->key) { + case NFT_META_BRI_IIFNAME: + if (in == NULL || (p = br_port_get_rcu(in)) == NULL) + goto err; + break; + case NFT_META_BRI_OIFNAME: + if (out == NULL || (p = br_port_get_rcu(out)) == NULL) + goto err; + break; + default: + goto out; + } + + strncpy((char *)dest->data, p->br->dev->name, sizeof(dest->data)); + return; +out: + return nft_meta_get_eval(expr, data, pkt); +err: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static int nft_meta_bridge_get_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_meta *priv = nft_expr_priv(expr); + int err; + + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (priv->key) { + case NFT_META_BRI_IIFNAME: + case NFT_META_BRI_OIFNAME: + break; + default: + return nft_meta_get_init(ctx, expr, tb); + } + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + if (err < 0) + return err; + + return 0; +} + +static struct nft_expr_type nft_meta_bridge_type; +static const struct nft_expr_ops nft_meta_bridge_get_ops = { + .type = &nft_meta_bridge_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .eval = nft_meta_bridge_get_eval, + .init = nft_meta_bridge_get_init, + .dump = nft_meta_get_dump, +}; + +static const struct nft_expr_ops nft_meta_bridge_set_ops = { + .type = &nft_meta_bridge_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .eval = nft_meta_set_eval, + .init = nft_meta_set_init, + .dump = nft_meta_set_dump, +}; + +static const struct nft_expr_ops * +nft_meta_bridge_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + if (tb[NFTA_META_KEY] == NULL) + return ERR_PTR(-EINVAL); + + if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG]) + return ERR_PTR(-EINVAL); + + if (tb[NFTA_META_DREG]) + return &nft_meta_bridge_get_ops; + + if (tb[NFTA_META_SREG]) + return &nft_meta_bridge_set_ops; + + return ERR_PTR(-EINVAL); +} + +static struct nft_expr_type nft_meta_bridge_type __read_mostly = { + .family = NFPROTO_BRIDGE, + .name = "meta", + .select_ops = &nft_meta_bridge_select_ops, + .policy = nft_meta_policy, + .maxattr = NFTA_META_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_meta_bridge_module_init(void) +{ + return nft_register_expr(&nft_meta_bridge_type); +} + +static void __exit nft_meta_bridge_module_exit(void) +{ + nft_unregister_expr(&nft_meta_bridge_type); +} + +module_init(nft_meta_bridge_module_init); +module_exit(nft_meta_bridge_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Tomasz Bursztyka "); +MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "meta"); -- cgit v1.2.3-71-gd317 From 7e65eac8e36f3f4e2553e83249e3d9bdf055456d Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 22 Apr 2014 12:03:58 -0700 Subject: 6lowpan: nuke net_ieee802154_lowpan() accessor when 6lowpan is disabled Johannes noted this is not needed, all of the fragment accessors don't need CONFIG_NET_NS. This goes test compiled with CONFIG_BT_6LOWPAN=y and a disabled CONFIG_NET_NS. CC: Alexander Smirnov Cc: Dmitry Eremin-Solenikov Cc: linux-zigbee-devel@lists.sourceforge.net Cc: David S. Miller" Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Johannes Berg Signed-off-by: Luis R. Rodriguez Signed-off-by: David S. Miller --- include/net/net_namespace.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index bc4118ede5b5..361d26077196 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -379,15 +379,8 @@ net_ieee802154_lowpan(struct net *net) { return &net->ieee802154_lowpan; } -#else -static inline struct netns_ieee802154_lowpan * -net_ieee802154_lowpan(struct net *net) -{ - return NULL; -} #endif - /* For callers who don't really care about whether it's IPv4 or IPv6 */ static inline void rt_genid_bump_all(struct net *net) { -- cgit v1.2.3-71-gd317 From f01ec1c017dead42092997a2b8684fcab4cbf126 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 24 Apr 2014 10:02:49 +0200 Subject: vxlan: add x-netns support This patch allows to switch the netns when packet is encapsulated or decapsulated. The vxlan socket is openned into the i/o netns, ie into the netns where encapsulated packets are received. The socket lookup is done into this netns to find the corresponding vxlan tunnel. After decapsulation, the packet is injecting into the corresponding interface which may stand to another netns. When one of the two netns is removed, the tunnel is destroyed. Configuration example: ip netns add netns1 ip netns exec netns1 ip link set lo up ip link add vxlan10 type vxlan id 10 group 239.0.0.10 dev eth0 dstport 0 ip link set vxlan10 netns netns1 ip netns exec netns1 ip addr add 192.168.0.249/24 broadcast 192.168.0.255 dev vxlan10 ip netns exec netns1 ip link set vxlan10 up Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 63 ++++++++++++++++++++++++++++++++----------- include/net/vxlan.h | 2 +- net/openvswitch/vport-vxlan.c | 3 ++- 3 files changed, 50 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 4dbb2ed85b97..1dfee9a7fbf7 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -127,6 +127,7 @@ struct vxlan_dev { struct list_head next; /* vxlan's per namespace list */ struct vxlan_sock *vn_sock; /* listening socket */ struct net_device *dev; + struct net *net; /* netns for packet i/o */ struct vxlan_rdst default_dst; /* default destination */ union vxlan_addr saddr; /* source address */ __be16 dst_port; @@ -1203,6 +1204,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, remote_ip = &vxlan->default_dst.remote_ip; skb_reset_mac_header(skb); + skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev))); skb->protocol = eth_type_trans(skb, vxlan->dev); /* Ignore packet loops (and multicast echo) */ @@ -1618,7 +1620,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, struct dst_entry *dst, struct sk_buff *skb, struct net_device *dev, struct in6_addr *saddr, struct in6_addr *daddr, __u8 prio, __u8 ttl, - __be16 src_port, __be16 dst_port, __be32 vni) + __be16 src_port, __be16 dst_port, __be32 vni, + bool xnet) { struct ipv6hdr *ip6h; struct vxlanhdr *vxh; @@ -1631,7 +1634,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, skb->encapsulation = 1; } - skb_scrub_packet(skb, false); + skb_scrub_packet(skb, xnet); min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len + VXLAN_HLEN + sizeof(struct ipv6hdr) @@ -1711,7 +1714,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, int vxlan_xmit_skb(struct vxlan_sock *vs, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, - __be16 src_port, __be16 dst_port, __be32 vni) + __be16 src_port, __be16 dst_port, __be32 vni, bool xnet) { struct vxlanhdr *vxh; struct udphdr *uh; @@ -1760,7 +1763,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, return err; return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP, - tos, ttl, df, false); + tos, ttl, df, xnet); } EXPORT_SYMBOL_GPL(vxlan_xmit_skb); @@ -1853,7 +1856,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, fl4.daddr = dst->sin.sin_addr.s_addr; fl4.saddr = vxlan->saddr.sin.sin_addr.s_addr; - rt = ip_route_output_key(dev_net(dev), &fl4); + rt = ip_route_output_key(vxlan->net, &fl4); if (IS_ERR(rt)) { netdev_dbg(dev, "no route to %pI4\n", &dst->sin.sin_addr.s_addr); @@ -1874,7 +1877,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct vxlan_dev *dst_vxlan; ip_rt_put(rt); - dst_vxlan = vxlan_find_vni(dev_net(dev), vni, dst_port); + dst_vxlan = vxlan_find_vni(vxlan->net, vni, dst_port); if (!dst_vxlan) goto tx_error; vxlan_encap_bypass(skb, vxlan, dst_vxlan); @@ -1887,7 +1890,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb, fl4.saddr, dst->sin.sin_addr.s_addr, tos, ttl, df, src_port, dst_port, - htonl(vni << 8)); + htonl(vni << 8), + !net_eq(vxlan->net, dev_net(vxlan->dev))); if (err < 0) goto rt_tx_error; @@ -1927,7 +1931,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct vxlan_dev *dst_vxlan; dst_release(ndst); - dst_vxlan = vxlan_find_vni(dev_net(dev), vni, dst_port); + dst_vxlan = vxlan_find_vni(vxlan->net, vni, dst_port); if (!dst_vxlan) goto tx_error; vxlan_encap_bypass(skb, vxlan, dst_vxlan); @@ -1938,7 +1942,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb, dev, &fl6.saddr, &fl6.daddr, 0, ttl, - src_port, dst_port, htonl(vni << 8)); + src_port, dst_port, htonl(vni << 8), + !net_eq(vxlan->net, dev_net(vxlan->dev))); #endif } @@ -2082,7 +2087,7 @@ static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan) static int vxlan_init(struct net_device *dev) { struct vxlan_dev *vxlan = netdev_priv(dev); - struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); + struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); struct vxlan_sock *vs; dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); @@ -2090,7 +2095,7 @@ static int vxlan_init(struct net_device *dev) return -ENOMEM; spin_lock(&vn->sock_lock); - vs = vxlan_find_sock(dev_net(dev), vxlan->dst_port); + vs = vxlan_find_sock(vxlan->net, vxlan->dst_port); if (vs) { /* If we have a socket with same port already, reuse it */ atomic_inc(&vs->refcnt); @@ -2172,8 +2177,8 @@ static void vxlan_flush(struct vxlan_dev *vxlan) /* Cleanup timer and forwarding table on shutdown */ static int vxlan_stop(struct net_device *dev) { - struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); struct vxlan_sock *vs = vxlan->vn_sock; if (vs && vxlan_addr_multicast(&vxlan->default_dst.remote_ip) && @@ -2202,7 +2207,7 @@ static int vxlan_change_mtu(struct net_device *dev, int new_mtu) struct net_device *lowerdev; int max_mtu; - lowerdev = __dev_get_by_index(dev_net(dev), dst->remote_ifindex); + lowerdev = __dev_get_by_index(vxlan->net, dst->remote_ifindex); if (lowerdev == NULL) return eth_change_mtu(dev, new_mtu); @@ -2285,7 +2290,6 @@ static void vxlan_setup(struct net_device *dev) dev->tx_queue_len = 0; dev->features |= NETIF_F_LLTX; - dev->features |= NETIF_F_NETNS_LOCAL; dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; dev->features |= NETIF_F_RXCSUM; dev->features |= NETIF_F_GSO_SOFTWARE; @@ -2578,7 +2582,7 @@ EXPORT_SYMBOL_GPL(vxlan_sock_add); static void vxlan_sock_work(struct work_struct *work) { struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, sock_work); - struct net *net = dev_net(vxlan->dev); + struct net *net = vxlan->net; struct vxlan_net *vn = net_generic(net, vxlan_net_id); __be16 port = vxlan->dst_port; struct vxlan_sock *nvs; @@ -2605,6 +2609,8 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, if (!data[IFLA_VXLAN_ID]) return -EINVAL; + vxlan->net = dev_net(dev); + vni = nla_get_u32(data[IFLA_VXLAN_ID]); dst->remote_vni = vni; @@ -2739,8 +2745,8 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, static void vxlan_dellink(struct net_device *dev, struct list_head *head) { - struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); spin_lock(&vn->sock_lock); if (!hlist_unhashed(&vxlan->hlist)) @@ -2905,8 +2911,33 @@ static __net_init int vxlan_init_net(struct net *net) return 0; } +static void __net_exit vxlan_exit_net(struct net *net) +{ + struct vxlan_net *vn = net_generic(net, vxlan_net_id); + struct vxlan_dev *vxlan, *next; + struct net_device *dev, *aux; + LIST_HEAD(list); + + rtnl_lock(); + for_each_netdev_safe(net, dev, aux) + if (dev->rtnl_link_ops == &vxlan_link_ops) + unregister_netdevice_queue(dev, &list); + + list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) { + /* If vxlan->dev is in the same netns, it has already been added + * to the list by the previous loop. + */ + if (!net_eq(dev_net(vxlan->dev), net)) + unregister_netdevice_queue(dev, &list); + } + + unregister_netdevice_many(&list); + rtnl_unlock(); +} + static struct pernet_operations vxlan_net_ops = { .init = vxlan_init_net, + .exit = vxlan_exit_net, .id = &vxlan_net_id, .size = sizeof(struct vxlan_net), }; diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 5deef1ae78c9..7bb4084b1bd0 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -33,7 +33,7 @@ void vxlan_sock_release(struct vxlan_sock *vs); int vxlan_xmit_skb(struct vxlan_sock *vs, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, - __be16 src_port, __be16 dst_port, __be32 vni); + __be16 src_port, __be16 dst_port, __be32 vni, bool xnet); __be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb); diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index e797a50ac2be..21cceb3bdf78 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -180,7 +180,8 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) OVS_CB(skb)->tun_key->ipv4_tos, OVS_CB(skb)->tun_key->ipv4_ttl, df, src_port, dst_port, - htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8)); + htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8), + false); if (err < 0) ip_rt_put(rt); error: -- cgit v1.2.3-71-gd317 From 65a124dd719d6e90591e4756bb04e1719489705e Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Wed, 9 Apr 2014 15:29:22 +0200 Subject: cfg80211: allow drivers to iterate over matching combinations The patch splits cfg80211_check_combinations() into an iterator function and a simple iteration user. This makes it possible for drivers to asses how many channels can use given iftype setup. This in turn can be used for future multi-interface/multi-channel channel switching. Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 27 +++++++++++++++++++++++++++ net/wireless/util.c | 44 +++++++++++++++++++++++++++++++++++++------- 2 files changed, 64 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9496fe5ea6b4..3dd2cb465540 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4715,6 +4715,33 @@ int cfg80211_check_combinations(struct wiphy *wiphy, const u8 radar_detect, const int iftype_num[NUM_NL80211_IFTYPES]); +/** + * cfg80211_iter_combinations - iterate over matching combinations + * + * @wiphy: the wiphy + * @num_different_channels: the number of different channels we want + * to use for verification + * @radar_detect: a bitmap where each bit corresponds to a channel + * width where radar detection is needed, as in the definition of + * &struct ieee80211_iface_combination.@radar_detect_widths + * @iftype_num: array with the numbers of interfaces of each interface + * type. The index is the interface type as specified in &enum + * nl80211_iftype. + * @iter: function to call for each matching combination + * @data: pointer to pass to iter function + * + * This function can be called by the driver to check what possible + * combinations it fits in at a given moment, e.g. for channel switching + * purposes. + */ +int cfg80211_iter_combinations(struct wiphy *wiphy, + const int num_different_channels, + const u8 radar_detect, + const int iftype_num[NUM_NL80211_IFTYPES], + void (*iter)(const struct ieee80211_iface_combination *c, + void *data), + void *data); + /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ diff --git a/net/wireless/util.c b/net/wireless/util.c index d032a31828f1..d8b599575a5e 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1263,10 +1263,13 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, return res; } -int cfg80211_check_combinations(struct wiphy *wiphy, - const int num_different_channels, - const u8 radar_detect, - const int iftype_num[NUM_NL80211_IFTYPES]) +int cfg80211_iter_combinations(struct wiphy *wiphy, + const int num_different_channels, + const u8 radar_detect, + const int iftype_num[NUM_NL80211_IFTYPES], + void (*iter)(const struct ieee80211_iface_combination *c, + void *data), + void *data) { int i, j, iftype; int num_interfaces = 0; @@ -1323,13 +1326,40 @@ int cfg80211_check_combinations(struct wiphy *wiphy, /* This combination covered all interface types and * supported the requested numbers, so we're good. */ - kfree(limits); - return 0; + + (*iter)(c, data); cont: kfree(limits); } - return -EBUSY; + return 0; +} +EXPORT_SYMBOL(cfg80211_iter_combinations); + +static void +cfg80211_iter_sum_ifcombs(const struct ieee80211_iface_combination *c, + void *data) +{ + int *num = data; + (*num)++; +} + +int cfg80211_check_combinations(struct wiphy *wiphy, + const int num_different_channels, + const u8 radar_detect, + const int iftype_num[NUM_NL80211_IFTYPES]) +{ + int err, num = 0; + + err = cfg80211_iter_combinations(wiphy, num_different_channels, + radar_detect, iftype_num, + cfg80211_iter_sum_ifcombs, &num); + if (err) + return err; + if (num == 0) + return -EBUSY; + + return 0; } EXPORT_SYMBOL(cfg80211_check_combinations); -- cgit v1.2.3-71-gd317 From 17d38fa8c20a9c3ec76943da46264ce657ac56d0 Mon Sep 17 00:00:00 2001 From: Marek Kwaczynski Date: Mon, 14 Apr 2014 11:27:21 +0200 Subject: mac80211: add option to generate CCMP IVs only for mgmt frames Some chips can encrypt managment frames in HW, but require generated IV in the frame. Add a key flag that allows us to achieve this. Signed-off-by: Marek Kwaczynski [use BIT(0) to fill that spot, fix indentation] Signed-off-by: Johannes Berg --- include/net/mac80211.h | 16 ++++++++++------ net/mac80211/wpa.c | 5 ++++- 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a3044e124229..451c1bf00df9 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1202,14 +1202,18 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev); * fall back to software crypto. Note that this flag deals only with * RX, if your crypto engine can't deal with TX you can also set the * %IEEE80211_KEY_FLAG_SW_MGMT_TX flag to encrypt such frames in SW. + * @IEEE80211_KEY_FLAG_GENERATE_IV_MGMT: This flag should be set by the + * driver for a CCMP key to indicate that is requires IV generation + * only for managment frames (MFP). */ enum ieee80211_key_flags { - IEEE80211_KEY_FLAG_GENERATE_IV = 1<<1, - IEEE80211_KEY_FLAG_GENERATE_MMIC= 1<<2, - IEEE80211_KEY_FLAG_PAIRWISE = 1<<3, - IEEE80211_KEY_FLAG_SW_MGMT_TX = 1<<4, - IEEE80211_KEY_FLAG_PUT_IV_SPACE = 1<<5, - IEEE80211_KEY_FLAG_RX_MGMT = 1<<6, + IEEE80211_KEY_FLAG_GENERATE_IV_MGMT = BIT(0), + IEEE80211_KEY_FLAG_GENERATE_IV = BIT(1), + IEEE80211_KEY_FLAG_GENERATE_MMIC = BIT(2), + IEEE80211_KEY_FLAG_PAIRWISE = BIT(3), + IEEE80211_KEY_FLAG_SW_MGMT_TX = BIT(4), + IEEE80211_KEY_FLAG_PUT_IV_SPACE = BIT(5), + IEEE80211_KEY_FLAG_RX_MGMT = BIT(6), }; /** diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index b8600e3c29c8..9b3dcc201145 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -406,7 +406,10 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) if (info->control.hw_key && !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV) && - !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) { + !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) && + !((info->control.hw_key->flags & + IEEE80211_KEY_FLAG_GENERATE_IV_MGMT) && + ieee80211_is_mgmt(hdr->frame_control))) { /* * hwaccel has no need for preallocated room for CCMP * header or MIC fields -- cgit v1.2.3-71-gd317 From ea077c1cea36a6b5ded1256dcd56c72ff2a22c62 Mon Sep 17 00:00:00 2001 From: Rostislav Lisovy Date: Tue, 15 Apr 2014 14:37:55 +0200 Subject: cfg80211: Add attributes describing prohibited channel bandwidth Since there are frequency bands (e.g. 5.9GHz) allowing channels with only 10 or 5 MHz bandwidth, this patch adds attributes that allow keeping track about this information. When channel attributes are reported to user-space, make sure to not break old tools, i.e. if the 'split wiphy dump' is enabled, report the extra attributes (if present) describing the bandwidth restrictions. If the 'split wiphy dump' is not enabled, completely omit those channels that have flags set to either IEEE80211_CHAN_NO_10MHZ or IEEE80211_CHAN_NO_20MHZ. Add the check for new bandwidth restriction flags in cfg80211_chandef_usable() to comply with the restrictions. Signed-off-by: Rostislav Lisovy Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 6 ++++++ include/uapi/linux/nl80211.h | 6 ++++++ net/wireless/chan.c | 2 ++ net/wireless/nl80211.c | 13 +++++++++++++ 4 files changed, 27 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 3dd2cb465540..c98cf08538b9 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -111,6 +111,10 @@ enum ieee80211_band { * restrictions. * @IEEE80211_CHAN_INDOOR_ONLY: see %NL80211_FREQUENCY_ATTR_INDOOR_ONLY * @IEEE80211_CHAN_GO_CONCURRENT: see %NL80211_FREQUENCY_ATTR_GO_CONCURRENT + * @IEEE80211_CHAN_NO_20MHZ: 20 MHz bandwidth is not permitted + * on this channel. + * @IEEE80211_CHAN_NO_10MHZ: 10 MHz bandwidth is not permitted + * on this channel. * */ enum ieee80211_channel_flags { @@ -125,6 +129,8 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_NO_160MHZ = 1<<8, IEEE80211_CHAN_INDOOR_ONLY = 1<<9, IEEE80211_CHAN_GO_CONCURRENT = 1<<10, + IEEE80211_CHAN_NO_20MHZ = 1<<11, + IEEE80211_CHAN_NO_10MHZ = 1<<12, }; #define IEEE80211_CHAN_NO_HT40 \ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 513bfd7b2e5f..0592032ff160 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2358,6 +2358,10 @@ enum nl80211_band_attr { * connected to an AP with DFS and radar detection on the UNII band (it is * up to user-space, i.e., wpa_supplicant to perform the required * verifications) + * @NL80211_FREQUENCY_ATTR_NO_20MHZ: 20 MHz operation is not allowed + * on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_NO_10MHZ: 10 MHz operation is not allowed + * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -2384,6 +2388,8 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_DFS_CAC_TIME, NL80211_FREQUENCY_ATTR_INDOOR_ONLY, NL80211_FREQUENCY_ATTR_GO_CONCURRENT, + NL80211_FREQUENCY_ATTR_NO_20MHZ, + NL80211_FREQUENCY_ATTR_NO_10MHZ, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 2adf7b2eccbc..84d686e2dbd0 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -616,12 +616,14 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, width = 5; break; case NL80211_CHAN_WIDTH_10: + prohibited_flags |= IEEE80211_CHAN_NO_10MHZ; width = 10; break; case NL80211_CHAN_WIDTH_20: if (!ht_cap->ht_supported) return false; case NL80211_CHAN_WIDTH_20_NOHT: + prohibited_flags |= IEEE80211_CHAN_NO_20MHZ; width = 20; break; case NL80211_CHAN_WIDTH_40: diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index fce423a4e96a..ca75f60041d2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -567,6 +567,13 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct ieee80211_channel *chan, bool large) { + /* Some channels must be completely excluded from the + * list to protect old user-space tools from breaking + */ + if (!large && chan->flags & + (IEEE80211_CHAN_NO_10MHZ | IEEE80211_CHAN_NO_20MHZ)) + return 0; + if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_FREQ, chan->center_freq)) goto nla_put_failure; @@ -620,6 +627,12 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, if ((chan->flags & IEEE80211_CHAN_GO_CONCURRENT) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_GO_CONCURRENT)) goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_20MHZ) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_20MHZ)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_10MHZ) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_10MHZ)) + goto nla_put_failure; } if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, -- cgit v1.2.3-71-gd317 From 842b597ee0a7e1aa5a3148164ffdba00ec17f614 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 25 Apr 2014 18:28:02 -0400 Subject: cgroup: implement cgroup.populated for the default hierarchy cgroup users often need a way to determine when a cgroup's subhierarchy becomes empty so that it can be cleaned up. cgroup currently provides release_agent for it; unfortunately, this mechanism is riddled with issues. * It delivers events by forking and execing a userland binary specified as the release_agent. This is a long deprecated method of notification delivery. It's extremely heavy, slow and cumbersome to integrate with larger infrastructure. * There is single monitoring point at the root. There's no way to delegate management of a subtree. * The event isn't recursive. It triggers when a cgroup doesn't have any tasks or child cgroups. Events for internal nodes trigger only after all children are removed. This again makes it impossible to delegate management of a subtree. * Events are filtered from the kernel side. "notify_on_release" file is used to subscribe to or suppress release event. This is unnecessarily complicated and probably done this way because event delivery itself was expensive. This patch implements interface file "cgroup.populated" which can be used to monitor whether the cgroup's subhierarchy has tasks in it or not. Its value is 0 if there is no task in the cgroup and its descendants; otherwise, 1, and kernfs_notify() notificaiton is triggers when the value changes, which can be monitored through poll and [di]notify. This is a lot ligther and simpler and trivially allows delegating management of subhierarchy - subhierarchy monitoring can block further propgation simply by putting itself or another process in the root of the subhierarchy and monitor events that it's interested in from there without interfering with monitoring higher in the tree. v2: Patch description updated as per Serge. v3: "cgroup.subtree_populated" renamed to "cgroup.populated". The subtree_ prefix was a bit confusing because "cgroup.subtree_control" uses it to denote the tree rooted at the cgroup sans the cgroup itself while the populated state includes the cgroup itself. Signed-off-by: Tejun Heo Acked-by: Serge Hallyn Acked-by: Li Zefan Cc: Lennart Poettering --- include/linux/cgroup.h | 15 ++++++++++++ kernel/cgroup.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 76 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ada239253ec7..4b38e2d6110d 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -154,6 +154,14 @@ struct cgroup { /* the number of attached css's */ int nr_css; + /* + * If this cgroup contains any tasks, it contributes one to + * populated_cnt. All children with non-zero popuplated_cnt of + * their own contribute one. The count is zero iff there's no task + * in this cgroup or its subtree. + */ + int populated_cnt; + atomic_t refcnt; /* @@ -166,6 +174,7 @@ struct cgroup { struct cgroup *parent; /* my parent */ struct kernfs_node *kn; /* cgroup kernfs entry */ struct kernfs_node *control_kn; /* kn for "cgroup.subtree_control" */ + struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */ /* * Monotonically increasing unique serial number which defines a @@ -264,6 +273,12 @@ enum { * * - "cgroup.clone_children" is removed. * + * - "cgroup.subtree_populated" is available. Its value is 0 if + * the cgroup and its descendants contain no task; otherwise, 1. + * The file also generates kernfs notification which can be + * monitored through poll and [di]notify when the value of the + * file changes. + * * - If mount is requested with sane_behavior but without any * subsystem, the default unified hierarchy is mounted. * diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 809dd903ceb8..0f986f7afee4 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -411,6 +411,43 @@ static struct css_set init_css_set = { static int css_set_count = 1; /* 1 for init_css_set */ +/** + * cgroup_update_populated - updated populated count of a cgroup + * @cgrp: the target cgroup + * @populated: inc or dec populated count + * + * @cgrp is either getting the first task (css_set) or losing the last. + * Update @cgrp->populated_cnt accordingly. The count is propagated + * towards root so that a given cgroup's populated_cnt is zero iff the + * cgroup and all its descendants are empty. + * + * @cgrp's interface file "cgroup.populated" is zero if + * @cgrp->populated_cnt is zero and 1 otherwise. When @cgrp->populated_cnt + * changes from or to zero, userland is notified that the content of the + * interface file has changed. This can be used to detect when @cgrp and + * its descendants become populated or empty. + */ +static void cgroup_update_populated(struct cgroup *cgrp, bool populated) +{ + lockdep_assert_held(&css_set_rwsem); + + do { + bool trigger; + + if (populated) + trigger = !cgrp->populated_cnt++; + else + trigger = !--cgrp->populated_cnt; + + if (!trigger) + break; + + if (cgrp->populated_kn) + kernfs_notify(cgrp->populated_kn); + cgrp = cgrp->parent; + } while (cgrp); +} + /* * hash table for cgroup groups. This improves the performance to find * an existing css_set. This hash doesn't (currently) take into @@ -456,10 +493,13 @@ static void put_css_set_locked(struct css_set *cset, bool taskexit) list_del(&link->cgrp_link); /* @cgrp can't go away while we're holding css_set_rwsem */ - if (list_empty(&cgrp->cset_links) && notify_on_release(cgrp)) { - if (taskexit) - set_bit(CGRP_RELEASABLE, &cgrp->flags); - check_for_release(cgrp); + if (list_empty(&cgrp->cset_links)) { + cgroup_update_populated(cgrp, false); + if (notify_on_release(cgrp)) { + if (taskexit) + set_bit(CGRP_RELEASABLE, &cgrp->flags); + check_for_release(cgrp); + } } kfree(link); @@ -668,7 +708,11 @@ static void link_css_set(struct list_head *tmp_links, struct css_set *cset, link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link); link->cset = cset; link->cgrp = cgrp; + + if (list_empty(&cgrp->cset_links)) + cgroup_update_populated(cgrp, true); list_move(&link->cset_link, &cgrp->cset_links); + /* * Always add links to the tail of the list so that the list * is sorted by order of hierarchy creation @@ -2643,6 +2687,12 @@ err_undo_css: goto out_unlock; } +static int cgroup_populated_show(struct seq_file *seq, void *v) +{ + seq_printf(seq, "%d\n", (bool)seq_css(seq)->cgroup->populated_cnt); + return 0; +} + static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { @@ -2809,6 +2859,8 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft) if (cft->seq_show == cgroup_subtree_control_show) cgrp->control_kn = kn; + else if (cft->seq_show == cgroup_populated_show) + cgrp->populated_kn = kn; return 0; } @@ -3918,6 +3970,11 @@ static struct cftype cgroup_base_files[] = { .seq_show = cgroup_subtree_control_show, .write_string = cgroup_subtree_control_write, }, + { + .name = "cgroup.populated", + .flags = CFTYPE_ONLY_ON_DFL | CFTYPE_NOT_ON_ROOT, + .seq_show = cgroup_populated_show, + }, /* * Historical crazy stuff. These don't have "cgroup." prefix and -- cgit v1.2.3-71-gd317 From a89778d8baf19cd7e728d81121a294a06cedaad1 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Thu, 24 Apr 2014 16:26:46 +0200 Subject: tipc: add support for link state subscriptions When links are established over a bearer plane, we create a node local publication containing information about the peer node and bearer plane. This allows TIPC applications to use the standard TIPC topology server subscription mechanism to get notifications when a link goes up or down. Signed-off-by: Erik Hugne Reviewed-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- include/uapi/linux/tipc.h | 1 + net/tipc/node.c | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index 852373d27dbb..53cd7902d34e 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -87,6 +87,7 @@ static inline unsigned int tipc_node(__u32 addr) #define TIPC_CFG_SRV 0 /* configuration service name type */ #define TIPC_TOP_SRV 1 /* topology service name type */ +#define TIPC_LINK_STATE 2 /* link state name type */ #define TIPC_RESERVED_TYPES 64 /* lowest user-publishable name type */ /* diff --git a/net/tipc/node.c b/net/tipc/node.c index be90115cda1a..3b86a74cb31f 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -144,9 +144,11 @@ void tipc_node_stop(void) void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { struct tipc_link **active = &n_ptr->active_links[0]; + u32 addr = n_ptr->addr; n_ptr->working_links++; - + tipc_nametbl_publish(TIPC_LINK_STATE, addr, addr, TIPC_NODE_SCOPE, + l_ptr->bearer_id, addr); pr_info("Established link <%s> on network plane %c\n", l_ptr->name, l_ptr->net_plane); @@ -203,8 +205,10 @@ static void node_select_active_links(struct tipc_node *n_ptr) void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { struct tipc_link **active; + u32 addr = n_ptr->addr; n_ptr->working_links--; + tipc_nametbl_withdraw(TIPC_LINK_STATE, addr, l_ptr->bearer_id, addr); if (!tipc_link_is_active(l_ptr)) { pr_info("Lost standby link <%s> on network plane %c\n", -- cgit v1.2.3-71-gd317 From 78acb1f9b898e85fa2c1e28e700b54b66b288e8d Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Thu, 24 Apr 2014 16:26:47 +0200 Subject: tipc: add ioctl to fetch link names We add a new ioctl for AF_TIPC that can be used to fetch the logical name for a link to a remote node on a given bearer. This should be used in combination with link state subscriptions. The logical name size limit definitions are moved to tipc.h, as they are now also needed by the new ioctl. Signed-off-by: Erik Hugne Reviewed-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- include/uapi/linux/tipc.h | 22 ++++++++++++++++++++++ include/uapi/linux/tipc_config.h | 10 +--------- net/tipc/node.c | 27 +++++++++++++++++++++++++++ net/tipc/node.h | 1 + net/tipc/socket.c | 29 ++++++++++++++++++++++++++--- 5 files changed, 77 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index 53cd7902d34e..6f71b9b41595 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -38,6 +38,7 @@ #define _LINUX_TIPC_H_ #include +#include /* * TIPC addressing primitives @@ -207,4 +208,25 @@ struct sockaddr_tipc { #define TIPC_NODE_RECVQ_DEPTH 131 /* Default: none (read only) */ #define TIPC_SOCK_RECVQ_DEPTH 132 /* Default: none (read only) */ +/* + * Maximum sizes of TIPC bearer-related names (including terminating NULL) + * The string formatting for each name element is: + * media: media + * interface: media:interface name + * link: Z.C.N:interface-Z.C.N:interface + * + */ + +#define TIPC_MAX_MEDIA_NAME 16 +#define TIPC_MAX_IF_NAME 16 +#define TIPC_MAX_BEARER_NAME 32 +#define TIPC_MAX_LINK_NAME 60 + +#define SIOCGETLINKNAME SIOCPROTOPRIVATE + +struct tipc_sioc_ln_req { + __u32 peer; + __u32 bearer_id; + char linkname[TIPC_MAX_LINK_NAME]; +}; #endif diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h index 6b0bff09b3a7..41a76acbb305 100644 --- a/include/uapi/linux/tipc_config.h +++ b/include/uapi/linux/tipc_config.h @@ -39,6 +39,7 @@ #include #include +#include #include #ifndef __KERNEL__ @@ -154,15 +155,6 @@ #define TIPC_TLV_NAME_TBL_QUERY 25 /* struct tipc_name_table_query */ #define TIPC_TLV_PORT_REF 26 /* 32-bit port reference */ -/* - * Maximum sizes of TIPC bearer-related names (including terminating NUL) - */ - -#define TIPC_MAX_MEDIA_NAME 16 /* format = media */ -#define TIPC_MAX_IF_NAME 16 /* format = interface */ -#define TIPC_MAX_BEARER_NAME 32 /* format = media:interface */ -#define TIPC_MAX_LINK_NAME 60 /* format = Z.C.N:interface-Z.C.N:interface */ - /* * Link priority limits (min, default, max, media default) */ diff --git a/net/tipc/node.c b/net/tipc/node.c index 3b86a74cb31f..1f938f3dba4b 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -438,3 +438,30 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) rcu_read_unlock(); return buf; } + +/** + * tipc_node_get_linkname - get the name of a link + * + * @bearer_id: id of the bearer + * @node: peer node address + * @linkname: link name output buffer + * + * Returns 0 on success + */ +int tipc_node_get_linkname(u32 bearer_id, u32 addr, char *linkname, size_t len) +{ + struct tipc_link *link; + struct tipc_node *node = tipc_node_find(addr); + + if ((bearer_id > MAX_BEARERS) || !node) + return -EINVAL; + tipc_node_lock(node); + link = node->links[bearer_id]; + if (link) { + strncpy(linkname, link->name, len); + tipc_node_unlock(node); + return 0; + } + tipc_node_unlock(node); + return -EINVAL; +} diff --git a/net/tipc/node.h b/net/tipc/node.h index 7cbb8cec1a93..411b19114064 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -118,6 +118,7 @@ int tipc_node_active_links(struct tipc_node *n_ptr); int tipc_node_is_up(struct tipc_node *n_ptr); struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space); struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space); +int tipc_node_get_linkname(u32 bearer_id, u32 node, char *linkname, size_t len); static inline void tipc_node_lock(struct tipc_node *n_ptr) { diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3c0256962f7d..3f9912f87d0d 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -36,6 +36,7 @@ #include "core.h" #include "port.h" +#include "node.h" #include @@ -1905,6 +1906,28 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, return put_user(sizeof(value), ol); } +int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg) +{ + struct tipc_sioc_ln_req lnr; + void __user *argp = (void __user *)arg; + + switch (cmd) { + case SIOCGETLINKNAME: + if (copy_from_user(&lnr, argp, sizeof(lnr))) + return -EFAULT; + if (!tipc_node_get_linkname(lnr.bearer_id, lnr.peer, + lnr.linkname, TIPC_MAX_LINK_NAME)) { + if (copy_to_user(argp, &lnr, sizeof(lnr))) + return -EFAULT; + return 0; + } + return -EADDRNOTAVAIL; + break; + default: + return -ENOIOCTLCMD; + } +} + /* Protocol switches for the various types of TIPC sockets */ static const struct proto_ops msg_ops = { @@ -1917,7 +1940,7 @@ static const struct proto_ops msg_ops = { .accept = sock_no_accept, .getname = tipc_getname, .poll = tipc_poll, - .ioctl = sock_no_ioctl, + .ioctl = tipc_ioctl, .listen = sock_no_listen, .shutdown = tipc_shutdown, .setsockopt = tipc_setsockopt, @@ -1938,7 +1961,7 @@ static const struct proto_ops packet_ops = { .accept = tipc_accept, .getname = tipc_getname, .poll = tipc_poll, - .ioctl = sock_no_ioctl, + .ioctl = tipc_ioctl, .listen = tipc_listen, .shutdown = tipc_shutdown, .setsockopt = tipc_setsockopt, @@ -1959,7 +1982,7 @@ static const struct proto_ops stream_ops = { .accept = tipc_accept, .getname = tipc_getname, .poll = tipc_poll, - .ioctl = sock_no_ioctl, + .ioctl = tipc_ioctl, .listen = tipc_listen, .shutdown = tipc_shutdown, .setsockopt = tipc_setsockopt, -- cgit v1.2.3-71-gd317 From 4af619ae2ccf47a2b3a108e1926736484721370f Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 24 Apr 2014 19:09:05 +0200 Subject: at86rf230: use irq_get_trigger_type This patch removes the platform data for the irq_type. We use instead the irq_get_trigger_type function to get these flags which should already configured by the interrupt controller. Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- drivers/net/ieee802154/at86rf230.c | 28 ++++++++-------------------- include/linux/spi/at86rf230.h | 14 -------------- 2 files changed, 8 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index e36f194673a4..17b9c9aea9be 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -970,8 +971,7 @@ static int at86rf230_irq_polarity(struct at86rf230_local *lp, int pol) static int at86rf230_hw_init(struct at86rf230_local *lp) { - struct at86rf230_platform_data *pdata = lp->spi->dev.platform_data; - int rc, irq_pol; + int rc, irq_pol, irq_type; u8 status; u8 csma_seed[2]; @@ -983,8 +983,9 @@ static int at86rf230_hw_init(struct at86rf230_local *lp) if (rc) return rc; + irq_type = irq_get_trigger_type(lp->spi->irq); /* configure irq polarity, defaults to high active */ - if (pdata->irq_type & (IRQF_TRIGGER_FALLING | IRQF_TRIGGER_LOW)) + if (irq_type & (IRQF_TRIGGER_FALLING | IRQF_TRIGGER_LOW)) irq_pol = IRQ_ACTIVE_LOW; else irq_pol = IRQ_ACTIVE_HIGH; @@ -1032,7 +1033,6 @@ static struct at86rf230_platform_data * at86rf230_get_pdata(struct spi_device *spi) { struct at86rf230_platform_data *pdata; - const char *irq_type; if (!IS_ENABLED(CONFIG_OF) || !spi->dev.of_node) return spi->dev.platform_data; @@ -1044,19 +1044,6 @@ at86rf230_get_pdata(struct spi_device *spi) pdata->rstn = of_get_named_gpio(spi->dev.of_node, "reset-gpio", 0); pdata->slp_tr = of_get_named_gpio(spi->dev.of_node, "sleep-gpio", 0); - pdata->irq_type = IRQF_TRIGGER_RISING; - of_property_read_string(spi->dev.of_node, "irq-type", &irq_type); - if (!strcmp(irq_type, "level-high")) - pdata->irq_type = IRQF_TRIGGER_HIGH; - else if (!strcmp(irq_type, "level-low")) - pdata->irq_type = IRQF_TRIGGER_LOW; - else if (!strcmp(irq_type, "edge-rising")) - pdata->irq_type = IRQF_TRIGGER_RISING; - else if (!strcmp(irq_type, "edge-falling")) - pdata->irq_type = IRQF_TRIGGER_FALLING; - else - dev_warn(&spi->dev, "wrong irq-type specified using edge-rising\n"); - spi->dev.platform_data = pdata; done: return pdata; @@ -1071,7 +1058,7 @@ static int at86rf230_probe(struct spi_device *spi) u8 part = 0, version = 0, status; irq_handler_t irq_handler; work_func_t irq_worker; - int rc; + int rc, irq_type; const char *chip; struct ieee802154_ops *ops = NULL; @@ -1176,7 +1163,8 @@ static int at86rf230_probe(struct spi_device *spi) dev->extra_tx_headroom = 0; dev->flags = IEEE802154_HW_OMIT_CKSUM | IEEE802154_HW_AACK; - if (pdata->irq_type & (IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING)) { + irq_type = irq_get_trigger_type(spi->irq); + if (irq_type & (IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING)) { irq_worker = at86rf230_irqwork; irq_handler = at86rf230_isr; } else { @@ -1203,7 +1191,7 @@ static int at86rf230_probe(struct spi_device *spi) goto err_hw_init; rc = request_irq(spi->irq, irq_handler, - IRQF_SHARED | pdata->irq_type, + IRQF_SHARED | irq_type, dev_name(&spi->dev), lp); if (rc) goto err_hw_init; diff --git a/include/linux/spi/at86rf230.h b/include/linux/spi/at86rf230.h index aa327a8105ad..b2b1afbb3202 100644 --- a/include/linux/spi/at86rf230.h +++ b/include/linux/spi/at86rf230.h @@ -26,20 +26,6 @@ struct at86rf230_platform_data { int rstn; int slp_tr; int dig2; - - /* Setting the irq_type will configure the driver to request - * the platform irq trigger type according to the given value - * and configure the interrupt polarity of the device to the - * corresponding polarity. - * - * Allowed values are: IRQF_TRIGGER_RISING, IRQF_TRIGGER_FALLING, - * IRQF_TRIGGER_HIGH and IRQF_TRIGGER_LOW - * - * Setting it to 0, the driver does not touch the trigger type - * configuration of the interrupt and sets the interrupt polarity - * of the device to high active (the default value). - */ - int irq_type; }; #endif -- cgit v1.2.3-71-gd317 From eb11022dca4eb22def72d5d4e3140caa357b34e1 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 25 Apr 2014 10:35:07 +0100 Subject: FDDI: Reformat for 8-character tabs Some of our FDDI support code has been apparently written with an assumption that tabs are 4-character wide. In preparation to the next change this update reformats so that it stays within 79 columns and otherwise renders correctly with 8-character tabs. No functional change. Signed-off-by: Maciej W. Rozycki Signed-off-by: David S. Miller --- include/uapi/linux/if_fddi.h | 90 ++++++++++++++++++++++---------------------- 1 file changed, 46 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/if_fddi.h b/include/uapi/linux/if_fddi.h index 0d36909c3aef..1086cd9f6754 100644 --- a/include/uapi/linux/if_fddi.h +++ b/include/uapi/linux/if_fddi.h @@ -30,74 +30,76 @@ * Define max and min legal sizes. The frame sizes do not include * 4 byte FCS/CRC (frame check sequence). */ -#define FDDI_K_ALEN 6 /* Octets in one FDDI address */ -#define FDDI_K_8022_HLEN 16 /* Total octets in 802.2 header */ -#define FDDI_K_SNAP_HLEN 21 /* Total octets in 802.2 SNAP header */ -#define FDDI_K_8022_ZLEN 16 /* Min octets in 802.2 frame sans FCS */ -#define FDDI_K_SNAP_ZLEN 21 /* Min octets in 802.2 SNAP frame sans FCS */ +#define FDDI_K_ALEN 6 /* Octets in one FDDI address */ +#define FDDI_K_8022_HLEN 16 /* Total octets in 802.2 header */ +#define FDDI_K_SNAP_HLEN 21 /* Total octets in 802.2 SNAP header */ +#define FDDI_K_8022_ZLEN 16 /* Min octets in 802.2 frame sans + FCS */ +#define FDDI_K_SNAP_ZLEN 21 /* Min octets in 802.2 SNAP frame sans + FCS */ #define FDDI_K_8022_DLEN 4475 /* Max octets in 802.2 payload */ #define FDDI_K_SNAP_DLEN 4470 /* Max octets in 802.2 SNAP payload */ -#define FDDI_K_LLC_ZLEN 13 /* Min octets in LLC frame sans FCS */ +#define FDDI_K_LLC_ZLEN 13 /* Min octets in LLC frame sans FCS */ #define FDDI_K_LLC_LEN 4491 /* Max octets in LLC frame sans FCS */ +#define FDDI_K_OUI_LEN 3 /* Octets in OUI in 802.2 SNAP + header */ /* Define FDDI Frame Control (FC) Byte values */ -#define FDDI_FC_K_VOID 0x00 -#define FDDI_FC_K_NON_RESTRICTED_TOKEN 0x80 -#define FDDI_FC_K_RESTRICTED_TOKEN 0xC0 -#define FDDI_FC_K_SMT_MIN 0x41 -#define FDDI_FC_K_SMT_MAX 0x4F -#define FDDI_FC_K_MAC_MIN 0xC1 -#define FDDI_FC_K_MAC_MAX 0xCF -#define FDDI_FC_K_ASYNC_LLC_MIN 0x50 -#define FDDI_FC_K_ASYNC_LLC_DEF 0x54 -#define FDDI_FC_K_ASYNC_LLC_MAX 0x5F -#define FDDI_FC_K_SYNC_LLC_MIN 0xD0 -#define FDDI_FC_K_SYNC_LLC_MAX 0xD7 -#define FDDI_FC_K_IMPLEMENTOR_MIN 0x60 -#define FDDI_FC_K_IMPLEMENTOR_MAX 0x6F -#define FDDI_FC_K_RESERVED_MIN 0x70 -#define FDDI_FC_K_RESERVED_MAX 0x7F +#define FDDI_FC_K_VOID 0x00 +#define FDDI_FC_K_NON_RESTRICTED_TOKEN 0x80 +#define FDDI_FC_K_RESTRICTED_TOKEN 0xC0 +#define FDDI_FC_K_SMT_MIN 0x41 +#define FDDI_FC_K_SMT_MAX 0x4F +#define FDDI_FC_K_MAC_MIN 0xC1 +#define FDDI_FC_K_MAC_MAX 0xCF +#define FDDI_FC_K_ASYNC_LLC_MIN 0x50 +#define FDDI_FC_K_ASYNC_LLC_DEF 0x54 +#define FDDI_FC_K_ASYNC_LLC_MAX 0x5F +#define FDDI_FC_K_SYNC_LLC_MIN 0xD0 +#define FDDI_FC_K_SYNC_LLC_MAX 0xD7 +#define FDDI_FC_K_IMPLEMENTOR_MIN 0x60 +#define FDDI_FC_K_IMPLEMENTOR_MAX 0x6F +#define FDDI_FC_K_RESERVED_MIN 0x70 +#define FDDI_FC_K_RESERVED_MAX 0x7F /* Define LLC and SNAP constants */ -#define FDDI_EXTENDED_SAP 0xAA +#define FDDI_EXTENDED_SAP 0xAA #define FDDI_UI_CMD 0x03 /* Define 802.2 Type 1 header */ struct fddi_8022_1_hdr { - __u8 dsap; /* destination service access point */ - __u8 ssap; /* source service access point */ - __u8 ctrl; /* control byte #1 */ + __u8 dsap; /* destination service access point */ + __u8 ssap; /* source service access point */ + __u8 ctrl; /* control byte #1 */ } __attribute__((packed)); /* Define 802.2 Type 2 header */ struct fddi_8022_2_hdr { - __u8 dsap; /* destination service access point */ - __u8 ssap; /* source service access point */ - __u8 ctrl_1; /* control byte #1 */ - __u8 ctrl_2; /* control byte #2 */ + __u8 dsap; /* destination service access point */ + __u8 ssap; /* source service access point */ + __u8 ctrl_1; /* control byte #1 */ + __u8 ctrl_2; /* control byte #2 */ } __attribute__((packed)); /* Define 802.2 SNAP header */ -#define FDDI_K_OUI_LEN 3 struct fddi_snap_hdr { - __u8 dsap; /* always 0xAA */ - __u8 ssap; /* always 0xAA */ - __u8 ctrl; /* always 0x03 */ + __u8 dsap; /* always 0xAA */ + __u8 ssap; /* always 0xAA */ + __u8 ctrl; /* always 0x03 */ __u8 oui[FDDI_K_OUI_LEN]; /* organizational universal id */ - __be16 ethertype; /* packet type ID field */ + __be16 ethertype; /* packet type ID field */ } __attribute__((packed)); /* Define FDDI LLC frame header */ struct fddihdr { - __u8 fc; /* frame control */ - __u8 daddr[FDDI_K_ALEN]; /* destination address */ - __u8 saddr[FDDI_K_ALEN]; /* source address */ - union - { - struct fddi_8022_1_hdr llc_8022_1; - struct fddi_8022_2_hdr llc_8022_2; - struct fddi_snap_hdr llc_snap; - } hdr; + __u8 fc; /* frame control */ + __u8 daddr[FDDI_K_ALEN]; /* destination address */ + __u8 saddr[FDDI_K_ALEN]; /* source address */ + union { + struct fddi_8022_1_hdr llc_8022_1; + struct fddi_8022_2_hdr llc_8022_2; + struct fddi_snap_hdr llc_snap; + } hdr; } __attribute__((packed)); -- cgit v1.2.3-71-gd317 From e2dcdfe95c0bd67e37db6057edd9c4ee1f1c7b17 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 28 Apr 2014 11:15:08 +0930 Subject: virtio: virtio_break_device() to mark all virtqueues broken. Good for post-apocalyptic scenarios, like S/390 hotplug. Signed-off-by: Rusty Russell --- drivers/virtio/virtio_ring.c | 15 +++++++++++++++ include/linux/virtio.h | 2 ++ 2 files changed, 17 insertions(+) (limited to 'include') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 1e443629f76d..4d08f45a9c29 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -865,4 +865,19 @@ bool virtqueue_is_broken(struct virtqueue *_vq) } EXPORT_SYMBOL_GPL(virtqueue_is_broken); +/* + * This should prevent the device from being used, allowing drivers to + * recover. You may need to grab appropriate locks to flush. + */ +void virtio_break_device(struct virtio_device *dev) +{ + struct virtqueue *_vq; + + list_for_each_entry(_vq, &dev->vqs, list) { + struct vring_virtqueue *vq = to_vvq(_vq); + vq->broken = true; + } +} +EXPORT_SYMBOL_GPL(virtio_break_device); + MODULE_LICENSE("GPL"); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index e4abb84199be..b46671e28de2 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -106,6 +106,8 @@ static inline struct virtio_device *dev_to_virtio(struct device *_dev) int register_virtio_device(struct virtio_device *dev); void unregister_virtio_device(struct virtio_device *dev); +void virtio_break_device(struct virtio_device *dev); + /** * virtio_driver - operations for a virtio I/O driver * @driver: underlying device driver (populate name and owner). -- cgit v1.2.3-71-gd317 From 51e158c12aca3c9ac63988611a97c05109b14dc9 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 28 Apr 2014 11:34:33 +0930 Subject: param: hand arguments after -- straight to init The kernel passes any args it doesn't need through to init, except it assumes anything containing '.' belongs to the kernel (for a module). This change means all users can clearly distinguish which arguments are for init. For example, the kernel uses debug ("dee-bug") to mean log everything to the console, where systemd uses the debug from the Scandinavian "day-boog" meaning "fail to boot". If a future versions uses argv[] instead of reading /proc/cmdline, this confusion will be avoided. eg: test 'FOO="this is --foo"' -- 'systemd.debug="true true true"' Gives: argv[0] = '/debug-init' argv[1] = 'test' argv[2] = 'systemd.debug=true true true' envp[0] = 'HOME=/' envp[1] = 'TERM=linux' envp[2] = 'FOO=this is --foo' Signed-off-by: Rusty Russell --- include/linux/moduleparam.h | 2 +- init/main.c | 33 +++++++++++++++++++++++++++++---- kernel/module.c | 12 +++++++++--- kernel/params.c | 25 ++++++++++++++----------- 4 files changed, 53 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 204a67743804..b1990c5524e1 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -321,7 +321,7 @@ extern bool parameq(const char *name1, const char *name2); extern bool parameqn(const char *name1, const char *name2, size_t n); /* Called on module insert or kernel boot */ -extern int parse_args(const char *name, +extern char *parse_args(const char *name, char *args, const struct kernel_param *params, unsigned num, diff --git a/init/main.c b/init/main.c index 9c7fd4c9249f..e9d458b5d77b 100644 --- a/init/main.c +++ b/init/main.c @@ -252,6 +252,27 @@ static int __init repair_env_string(char *param, char *val, const char *unused) return 0; } +/* Anything after -- gets handed straight to init. */ +static int __init set_init_arg(char *param, char *val, const char *unused) +{ + unsigned int i; + + if (panic_later) + return 0; + + repair_env_string(param, val, unused); + + for (i = 0; argv_init[i]; i++) { + if (i == MAX_INIT_ARGS) { + panic_later = "init"; + panic_param = param; + return 0; + } + } + argv_init[i] = param; + return 0; +} + /* * Unknown boot options get handed to init, unless they look like * unused parameters (modprobe will find them in /proc/cmdline). @@ -478,7 +499,7 @@ static void __init mm_init(void) asmlinkage void __init start_kernel(void) { - char * command_line; + char * command_line, *after_dashes; extern const struct kernel_param __start___param[], __stop___param[]; /* @@ -519,9 +540,13 @@ asmlinkage void __init start_kernel(void) pr_notice("Kernel command line: %s\n", boot_command_line); parse_early_param(); - parse_args("Booting kernel", static_command_line, __start___param, - __stop___param - __start___param, - -1, -1, &unknown_bootoption); + after_dashes = parse_args("Booting kernel", + static_command_line, __start___param, + __stop___param - __start___param, + -1, -1, &unknown_bootoption); + if (after_dashes) + parse_args("Setting init args", after_dashes, NULL, 0, -1, -1, + set_init_arg); jump_label_init(); diff --git a/kernel/module.c b/kernel/module.c index 11869408f79b..66e4e0d260a9 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3193,6 +3193,7 @@ static int load_module(struct load_info *info, const char __user *uargs, { struct module *mod; long err; + char *after_dashes; err = module_sig_check(info); if (err) @@ -3277,10 +3278,15 @@ static int load_module(struct load_info *info, const char __user *uargs, goto ddebug_cleanup; /* Module is ready to execute: parsing args may do that. */ - err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, - -32768, 32767, unknown_module_param_cb); - if (err < 0) + after_dashes = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, + -32768, 32767, unknown_module_param_cb); + if (IS_ERR(after_dashes)) { + err = PTR_ERR(after_dashes); goto bug_cleanup; + } else if (after_dashes) { + pr_warn("%s: parameters '%s' after `--' ignored\n", + mod->name, after_dashes); + } /* Link in to syfs. */ err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp); diff --git a/kernel/params.c b/kernel/params.c index b00142e7f3ba..1e52ca233fd9 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -177,13 +177,13 @@ static char *next_arg(char *args, char **param, char **val) } /* Args looks like "foo=bar,bar2 baz=fuz wiz". */ -int parse_args(const char *doing, - char *args, - const struct kernel_param *params, - unsigned num, - s16 min_level, - s16 max_level, - int (*unknown)(char *param, char *val, const char *doing)) +char *parse_args(const char *doing, + char *args, + const struct kernel_param *params, + unsigned num, + s16 min_level, + s16 max_level, + int (*unknown)(char *param, char *val, const char *doing)) { char *param, *val; @@ -198,6 +198,9 @@ int parse_args(const char *doing, int irq_was_disabled; args = next_arg(args, ¶m, &val); + /* Stop at -- */ + if (!val && strcmp(param, "--") == 0) + return args; irq_was_disabled = irqs_disabled(); ret = parse_one(param, val, doing, params, num, min_level, max_level, unknown); @@ -208,22 +211,22 @@ int parse_args(const char *doing, switch (ret) { case -ENOENT: pr_err("%s: Unknown parameter `%s'\n", doing, param); - return ret; + return ERR_PTR(ret); case -ENOSPC: pr_err("%s: `%s' too large for parameter `%s'\n", doing, val ?: "", param); - return ret; + return ERR_PTR(ret); case 0: break; default: pr_err("%s: `%s' invalid for parameter `%s'\n", doing, val ?: "", param); - return ret; + return ERR_PTR(ret); } } /* All parsed OK. */ - return 0; + return NULL; } /* Lazy bastard, eh? */ -- cgit v1.2.3-71-gd317 From 2f7ef2f8790f5bf53db4fc6b2310943139285827 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 25 Apr 2014 13:54:06 -0700 Subject: sched, cls: check if we could overwrite actions when changing a filter When actions are attached to a filter, they are a part of the filter itself, so when changing a filter we should allow to overwrite the actions inside as well. In my specific case, when I tried to _append_ a new action to an existing filter which already has an action, I got EEXIST since kernel refused to overwrite the existing one in kernel. This patch checks if we are changing the filter checking NLM_F_CREATE flag (Sigh, filters don't use NLM_F_REPLACE...) and then passes the boolean down to actions. This fixes the problem above. Cc: Jamal Hadi Salim Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: Cong Wang Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 +- include/net/sch_generic.h | 2 +- net/sched/cls_api.c | 9 +++++---- net/sched/cls_basic.c | 10 +++++----- net/sched/cls_bpf.c | 10 +++++----- net/sched/cls_cgroup.c | 4 ++-- net/sched/cls_flow.c | 4 ++-- net/sched/cls_fw.c | 10 +++++----- net/sched/cls_route.c | 11 ++++++----- net/sched/cls_rsvp.h | 4 ++-- net/sched/cls_tcindex.c | 8 ++++---- net/sched/cls_u32.c | 10 +++++----- 12 files changed, 43 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index a2441fb1428f..6da46dcf1049 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -136,7 +136,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, - struct tcf_exts *exts); + struct tcf_exts *exts, bool ovr); void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts); void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, struct tcf_exts *src); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d062f81c692f..624f9857c83e 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -199,7 +199,7 @@ struct tcf_proto_ops { int (*change)(struct net *net, struct sk_buff *, struct tcf_proto*, unsigned long, u32 handle, struct nlattr **, - unsigned long *); + unsigned long *, bool); int (*delete)(struct tcf_proto*, unsigned long); void (*walk)(struct tcf_proto*, struct tcf_walker *arg); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 29a30a14c315..6786130a4f59 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -317,7 +317,8 @@ replay: } } - err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh); + err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, + n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE); if (err == 0) { if (tp_created) { spin_lock_bh(root_lock); @@ -504,7 +505,7 @@ void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts) EXPORT_SYMBOL(tcf_exts_destroy); int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, - struct nlattr *rate_tlv, struct tcf_exts *exts) + struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr) { #ifdef CONFIG_NET_CLS_ACT { @@ -513,7 +514,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, INIT_LIST_HEAD(&exts->actions); if (exts->police && tb[exts->police]) { act = tcf_action_init_1(net, tb[exts->police], rate_tlv, - "police", TCA_ACT_NOREPLACE, + "police", ovr, TCA_ACT_BIND); if (IS_ERR(act)) return PTR_ERR(act); @@ -523,7 +524,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, } else if (exts->action && tb[exts->action]) { int err; err = tcf_action_init(net, tb[exts->action], rate_tlv, - NULL, TCA_ACT_NOREPLACE, + NULL, ovr, TCA_ACT_BIND, &exts->actions); if (err) return err; diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index e98ca99c202b..0ae1813e3e90 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -130,14 +130,14 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = { static int basic_set_parms(struct net *net, struct tcf_proto *tp, struct basic_filter *f, unsigned long base, struct nlattr **tb, - struct nlattr *est) + struct nlattr *est, bool ovr) { int err; struct tcf_exts e; struct tcf_ematch_tree t; tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE); - err = tcf_exts_validate(net, tp, tb, est, &e); + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); if (err < 0) return err; @@ -161,7 +161,7 @@ errout: static int basic_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, unsigned long *arg) + struct nlattr **tca, unsigned long *arg, bool ovr) { int err; struct basic_head *head = tp->root; @@ -179,7 +179,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, if (f != NULL) { if (handle && f->handle != handle) return -EINVAL; - return basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE]); + return basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr); } err = -ENOBUFS; @@ -206,7 +206,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, f->handle = head->hgenerator; } - err = basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE]); + err = basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr); if (err < 0) goto errout; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 8e3cf49118e3..16186965af97 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -156,7 +156,7 @@ static void cls_bpf_put(struct tcf_proto *tp, unsigned long f) static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, struct cls_bpf_prog *prog, unsigned long base, struct nlattr **tb, - struct nlattr *est) + struct nlattr *est, bool ovr) { struct sock_filter *bpf_ops, *bpf_old; struct tcf_exts exts; @@ -170,7 +170,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, return -EINVAL; tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE); - ret = tcf_exts_validate(net, tp, tb, est, &exts); + ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr); if (ret < 0) return ret; @@ -242,7 +242,7 @@ static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp, static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg) + unsigned long *arg, bool ovr) { struct cls_bpf_head *head = tp->root; struct cls_bpf_prog *prog = (struct cls_bpf_prog *) *arg; @@ -260,7 +260,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (handle && prog->handle != handle) return -EINVAL; return cls_bpf_modify_existing(net, tp, prog, base, tb, - tca[TCA_RATE]); + tca[TCA_RATE], ovr); } prog = kzalloc(sizeof(*prog), GFP_KERNEL); @@ -277,7 +277,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, goto errout; } - ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE]); + ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], ovr); if (ret < 0) goto errout; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 8e2158ab551c..cacf01bd04f0 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -83,7 +83,7 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg) + unsigned long *arg, bool ovr) { struct nlattr *tb[TCA_CGROUP_MAX + 1]; struct cls_cgroup_head *head = tp->root; @@ -119,7 +119,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, return err; tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); if (err < 0) return err; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 257029c54332..35be16f7c192 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -349,7 +349,7 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { static int flow_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg) + unsigned long *arg, bool ovr) { struct flow_head *head = tp->root; struct flow_filter *f; @@ -393,7 +393,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, } tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE); - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); if (err < 0) return err; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 63a3ce75c02e..861b03ccfed0 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -169,7 +169,7 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = { static int fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, - struct nlattr **tb, struct nlattr **tca, unsigned long base) + struct nlattr **tb, struct nlattr **tca, unsigned long base, bool ovr) { struct fw_head *head = tp->root; struct tcf_exts e; @@ -177,7 +177,7 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, int err; tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE); - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); if (err < 0) return err; @@ -218,7 +218,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg) + unsigned long *arg, bool ovr) { struct fw_head *head = tp->root; struct fw_filter *f = (struct fw_filter *) *arg; @@ -236,7 +236,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, if (f != NULL) { if (f->id != handle && handle) return -EINVAL; - return fw_change_attrs(net, tp, f, tb, tca, base); + return fw_change_attrs(net, tp, f, tb, tca, base, ovr); } if (!handle) @@ -264,7 +264,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE); f->id = handle; - err = fw_change_attrs(net, tp, f, tb, tca, base); + err = fw_change_attrs(net, tp, f, tb, tca, base, ovr); if (err < 0) goto errout; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 1ad3068f2ce1..dd9fc2523c76 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -333,7 +333,8 @@ static const struct nla_policy route4_policy[TCA_ROUTE4_MAX + 1] = { static int route4_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct route4_filter *f, u32 handle, struct route4_head *head, - struct nlattr **tb, struct nlattr *est, int new) + struct nlattr **tb, struct nlattr *est, int new, + bool ovr) { int err; u32 id = 0, to = 0, nhandle = 0x8000; @@ -343,7 +344,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, struct tcf_exts e; tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); - err = tcf_exts_validate(net, tp, tb, est, &e); + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); if (err < 0) return err; @@ -428,7 +429,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg) + unsigned long *arg, bool ovr) { struct route4_head *head = tp->root; struct route4_filter *f, *f1, **fp; @@ -455,7 +456,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, old_handle = f->handle; err = route4_set_parms(net, tp, base, f, handle, head, tb, - tca[TCA_RATE], 0); + tca[TCA_RATE], 0, ovr); if (err < 0) return err; @@ -479,7 +480,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); err = route4_set_parms(net, tp, base, f, handle, head, tb, - tca[TCA_RATE], 1); + tca[TCA_RATE], 1, ovr); if (err < 0) goto errout; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 19f8e5dfa8bd..1020e233a5d6 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -415,7 +415,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg) + unsigned long *arg, bool ovr) { struct rsvp_head *data = tp->root; struct rsvp_filter *f, **fp; @@ -436,7 +436,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, return err; tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE); - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); if (err < 0) return err; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index eed8404443d8..d11d0a4fbe34 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -192,7 +192,7 @@ static int tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, u32 handle, struct tcindex_data *p, struct tcindex_filter_result *r, struct nlattr **tb, - struct nlattr *est) + struct nlattr *est, bool ovr) { int err, balloc = 0; struct tcindex_filter_result new_filter_result, *old_r = r; @@ -202,7 +202,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tcf_exts e; tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); - err = tcf_exts_validate(net, tp, tb, est, &e); + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); if (err < 0) return err; @@ -331,7 +331,7 @@ errout: static int tcindex_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, unsigned long *arg) + struct nlattr **tca, unsigned long *arg, bool ovr) { struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_TCINDEX_MAX + 1]; @@ -351,7 +351,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, return err; return tcindex_set_parms(net, tp, base, handle, p, r, tb, - tca[TCA_RATE]); + tca[TCA_RATE], ovr); } diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 84c28daff848..c39b583ace32 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -486,13 +486,13 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { static int u32_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tc_u_hnode *ht, struct tc_u_knode *n, struct nlattr **tb, - struct nlattr *est) + struct nlattr *est, bool ovr) { int err; struct tcf_exts e; tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE); - err = tcf_exts_validate(net, tp, tb, est, &e); + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); if (err < 0) return err; @@ -545,7 +545,7 @@ errout: static int u32_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg) + unsigned long *arg, bool ovr) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *ht; @@ -569,7 +569,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; return u32_set_parms(net, tp, base, n->ht_up, n, tb, - tca[TCA_RATE]); + tca[TCA_RATE], ovr); } if (tb[TCA_U32_DIVISOR]) { @@ -656,7 +656,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, } #endif - err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE]); + err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr); if (err == 0) { struct tc_u_knode **ins; for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next) -- cgit v1.2.3-71-gd317 From e16821bcfb364b0c41142db275dc74b39fa42c30 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 28 Apr 2014 11:22:08 +0300 Subject: cfg80211: Dynamic channel bandwidth changes in AP mode This extends NL80211_CMD_SET_CHANNEL to allow dynamic channel bandwidth changes in AP mode (including P2P GO) during a lifetime of the BSS. This can be used to implement, e.g., HT 20/40 MHz co-existence rules on the 2.4 GHz band. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 8 ++++++++ include/uapi/linux/nl80211.h | 4 ++++ net/wireless/nl80211.c | 40 ++++++++++++++++++++++++++++------------ net/wireless/rdev-ops.h | 13 +++++++++++++ net/wireless/trace.h | 18 ++++++++++++++++++ 5 files changed, 71 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c98cf08538b9..7eae46ccec01 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2290,6 +2290,10 @@ struct cfg80211_qos_map { * @channel_switch: initiate channel-switch procedure (with CSA) * * @set_qos_map: Set QoS mapping information to the driver + * + * @set_ap_chanwidth: Set the AP (including P2P GO) mode channel width for the + * given interface This is used e.g. for dynamic HT 20/40 MHz channel width + * changes during the lifetime of the BSS. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2533,9 +2537,13 @@ struct cfg80211_ops { int (*channel_switch)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_csa_settings *params); + int (*set_qos_map)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_qos_map *qos_map); + + int (*set_ap_chanwidth)(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_chan_def *chandef); }; /* diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 0592032ff160..406010d4def0 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3929,6 +3929,9 @@ enum nl80211_ap_sme_features { * interface. An active monitor interface behaves like a normal monitor * interface, but gets added to the driver. It ensures that incoming * unicast packets directed at the configured interface address get ACKed. + * @NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE: This driver supports dynamic + * channel bandwidth change (e.g., HT 20 <-> 40 MHz channel) during the + * lifetime of a BSS. */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -3949,6 +3952,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_FULL_AP_CLIENT_STATE = 1 << 15, NL80211_FEATURE_USERSPACE_MPM = 1 << 16, NL80211_FEATURE_ACTIVE_MONITOR = 1 << 17, + NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE = 1 << 18, }; /** diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ca75f60041d2..0f1b18f209d6 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1928,18 +1928,20 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, } static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, + struct net_device *dev, struct genl_info *info) { struct cfg80211_chan_def chandef; int result; enum nl80211_iftype iftype = NL80211_IFTYPE_MONITOR; + struct wireless_dev *wdev = NULL; - if (wdev) - iftype = wdev->iftype; - + if (dev) + wdev = dev->ieee80211_ptr; if (!nl80211_can_set_dev_channel(wdev)) return -EOPNOTSUPP; + if (wdev) + iftype = wdev->iftype; result = nl80211_parse_chandef(rdev, info, &chandef); if (result) @@ -1948,14 +1950,27 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, switch (iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - if (wdev->beacon_interval) { - result = -EBUSY; - break; - } if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef, iftype)) { result = -EINVAL; break; } + if (wdev->beacon_interval) { + if (!dev || !rdev->ops->set_ap_chanwidth || + !(rdev->wiphy.features & + NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE)) { + result = -EBUSY; + break; + } + + /* Only allow dynamic channel width changes */ + if (chandef.chan != wdev->preset_chandef.chan) { + result = -EBUSY; + break; + } + result = rdev_set_ap_chanwidth(rdev, dev, &chandef); + if (result) + break; + } wdev->preset_chandef = chandef; result = 0; break; @@ -1977,7 +1992,7 @@ static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *netdev = info->user_ptr[1]; - return __nl80211_set_channel(rdev, netdev->ieee80211_ptr, info); + return __nl80211_set_channel(rdev, netdev, info); } static int nl80211_set_wds_peer(struct sk_buff *skb, struct genl_info *info) @@ -2099,9 +2114,10 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { - result = __nl80211_set_channel(rdev, - nl80211_can_set_dev_channel(wdev) ? wdev : NULL, - info); + result = __nl80211_set_channel( + rdev, + nl80211_can_set_dev_channel(wdev) ? netdev : NULL, + info); if (result) return result; } diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 74d97d33c938..00cdf73ba6c4 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -950,4 +950,17 @@ static inline int rdev_set_qos_map(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_set_ap_chanwidth(struct cfg80211_registered_device *rdev, + struct net_device *dev, struct cfg80211_chan_def *chandef) +{ + int ret; + + trace_rdev_set_ap_chanwidth(&rdev->wiphy, dev, chandef); + ret = rdev->ops->set_ap_chanwidth(&rdev->wiphy, dev, chandef); + trace_rdev_return_int(&rdev->wiphy, ret); + + return ret; +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 47b499f8f54d..f3c13ff4d04c 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1919,6 +1919,24 @@ TRACE_EVENT(rdev_set_qos_map, WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->num_des) ); +TRACE_EVENT(rdev_set_ap_chanwidth, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_chan_def *chandef), + TP_ARGS(wiphy, netdev, chandef), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + CHAN_DEF_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + CHAN_DEF_ASSIGN(chandef); + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ -- cgit v1.2.3-71-gd317 From 73dbd77d88e6974d2e30c239cef67e5370c2b7c5 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 29 Apr 2014 11:44:36 +0200 Subject: drm: Fixup flip-work kerneldoc Describe the fifo parameter. It seems like kerneldoc doesn't properly handle fields defined using a macro, so it will end up complaining about this anyway and not generate the documentation for it either. At least the kerneldoc is now complete. Signed-off-by: Thierry Reding Signed-off-by: Daniel Vetter --- include/drm/drm_flip_work.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/drm_flip_work.h b/include/drm/drm_flip_work.h index 35c776ae7d3b..9eed34dcd6af 100644 --- a/include/drm/drm_flip_work.h +++ b/include/drm/drm_flip_work.h @@ -57,6 +57,7 @@ typedef void (*drm_flip_func_t)(struct drm_flip_work *work, void *val); * @count: number of committed items * @func: callback fxn called for each committed item * @worker: worker which calls @func + * @fifo: queue of committed items */ struct drm_flip_work { const char *name; -- cgit v1.2.3-71-gd317 From 683399eddb9fff742b1a14c5a5d03e12bfc0afff Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Sun, 20 Apr 2014 18:57:36 -0600 Subject: netfilter: nfnetlink_acct: Adding quota support to accounting framework nfacct objects already support accounting at the byte and packet level. As such it is a natural extension to add the possiblity to define a ceiling limit for both metrics. All the support for quotas itself is added to nfnetlink acctounting framework to stay coherent with current accounting object management. Quota limit checks are implemented in xt_nfacct filter where statistic collection is already done. Pablo Neira Ayuso has also contributed to this feature. Signed-off-by: Mathieu Poirier Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink_acct.h | 8 ++- include/uapi/linux/netfilter/nfnetlink.h | 2 + include/uapi/linux/netfilter/nfnetlink_acct.h | 9 +++ net/netfilter/nfnetlink_acct.c | 85 +++++++++++++++++++++++++++ net/netfilter/xt_nfacct.c | 5 +- 5 files changed, 107 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h index b2e85e59f760..6ec975748742 100644 --- a/include/linux/netfilter/nfnetlink_acct.h +++ b/include/linux/netfilter/nfnetlink_acct.h @@ -3,11 +3,17 @@ #include +enum { + NFACCT_NO_QUOTA = -1, + NFACCT_UNDERQUOTA, + NFACCT_OVERQUOTA, +}; struct nf_acct; struct nf_acct *nfnl_acct_find_get(const char *filter_name); void nfnl_acct_put(struct nf_acct *acct); void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct); - +extern int nfnl_acct_overquota(const struct sk_buff *skb, + struct nf_acct *nfacct); #endif /* _NFNL_ACCT_H */ diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index 596ddd45253c..354a7e5e50f2 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -20,6 +20,8 @@ enum nfnetlink_groups { #define NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_NFTABLES, #define NFNLGRP_NFTABLES NFNLGRP_NFTABLES + NFNLGRP_ACCT_QUOTA, +#define NFNLGRP_ACCT_QUOTA NFNLGRP_ACCT_QUOTA __NFNLGRP_MAX, }; #define NFNLGRP_MAX (__NFNLGRP_MAX - 1) diff --git a/include/uapi/linux/netfilter/nfnetlink_acct.h b/include/uapi/linux/netfilter/nfnetlink_acct.h index c7b6269e760b..51404ec19022 100644 --- a/include/uapi/linux/netfilter/nfnetlink_acct.h +++ b/include/uapi/linux/netfilter/nfnetlink_acct.h @@ -10,15 +10,24 @@ enum nfnl_acct_msg_types { NFNL_MSG_ACCT_GET, NFNL_MSG_ACCT_GET_CTRZERO, NFNL_MSG_ACCT_DEL, + NFNL_MSG_ACCT_OVERQUOTA, NFNL_MSG_ACCT_MAX }; +enum nfnl_acct_flags { + NFACCT_F_QUOTA_PKTS = (1 << 0), + NFACCT_F_QUOTA_BYTES = (1 << 1), + NFACCT_F_OVERQUOTA = (1 << 2), /* can't be set from userspace */ +}; + enum nfnl_acct_type { NFACCT_UNSPEC, NFACCT_NAME, NFACCT_PKTS, NFACCT_BYTES, NFACCT_USE, + NFACCT_FLAGS, + NFACCT_QUOTA, __NFACCT_MAX }; #define NFACCT_MAX (__NFACCT_MAX - 1) diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index c7b6d466a662..70e86bbb3637 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -32,18 +32,24 @@ static LIST_HEAD(nfnl_acct_list); struct nf_acct { atomic64_t pkts; atomic64_t bytes; + unsigned long flags; struct list_head head; atomic_t refcnt; char name[NFACCT_NAME_MAX]; struct rcu_head rcu_head; + char data[0]; }; +#define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES) + static int nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) { struct nf_acct *nfacct, *matching = NULL; char *acct_name; + unsigned int size = 0; + u32 flags = 0; if (!tb[NFACCT_NAME]) return -EINVAL; @@ -68,15 +74,39 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, /* reset counters if you request a replacement. */ atomic64_set(&matching->pkts, 0); atomic64_set(&matching->bytes, 0); + smp_mb__before_clear_bit(); + /* reset overquota flag if quota is enabled. */ + if ((matching->flags & NFACCT_F_QUOTA)) + clear_bit(NFACCT_F_OVERQUOTA, &matching->flags); return 0; } return -EBUSY; } nfacct = kzalloc(sizeof(struct nf_acct), GFP_KERNEL); + if (tb[NFACCT_FLAGS]) { + flags = ntohl(nla_get_be32(tb[NFACCT_FLAGS])); + if (flags & ~NFACCT_F_QUOTA) + return -EOPNOTSUPP; + if ((flags & NFACCT_F_QUOTA) == NFACCT_F_QUOTA) + return -EINVAL; + if (flags & NFACCT_F_OVERQUOTA) + return -EINVAL; + + size += sizeof(u64); + } + + nfacct = kzalloc(sizeof(struct nf_acct) + size, GFP_KERNEL); if (nfacct == NULL) return -ENOMEM; + if (flags & NFACCT_F_QUOTA) { + u64 *quota = (u64 *)nfacct->data; + + *quota = be64_to_cpu(nla_get_be64(tb[NFACCT_QUOTA])); + nfacct->flags = flags; + } + strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX); if (tb[NFACCT_BYTES]) { @@ -117,6 +147,9 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, if (type == NFNL_MSG_ACCT_GET_CTRZERO) { pkts = atomic64_xchg(&acct->pkts, 0); bytes = atomic64_xchg(&acct->bytes, 0); + smp_mb__before_clear_bit(); + if (acct->flags & NFACCT_F_QUOTA) + clear_bit(NFACCT_F_OVERQUOTA, &acct->flags); } else { pkts = atomic64_read(&acct->pkts); bytes = atomic64_read(&acct->bytes); @@ -125,7 +158,13 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes)) || nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt)))) goto nla_put_failure; + if (acct->flags & NFACCT_F_QUOTA) { + u64 *quota = (u64 *)acct->data; + if (nla_put_be32(skb, NFACCT_FLAGS, htonl(acct->flags)) || + nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota))) + goto nla_put_failure; + } nlmsg_end(skb, nlh); return skb->len; @@ -270,6 +309,8 @@ static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = { [NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 }, [NFACCT_BYTES] = { .type = NLA_U64 }, [NFACCT_PKTS] = { .type = NLA_U64 }, + [NFACCT_FLAGS] = { .type = NLA_U32 }, + [NFACCT_QUOTA] = { .type = NLA_U64 }, }; static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = { @@ -336,6 +377,50 @@ void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct) } EXPORT_SYMBOL_GPL(nfnl_acct_update); +static void nfnl_overquota_report(struct nf_acct *nfacct) +{ + int ret; + struct sk_buff *skb; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (skb == NULL) + return; + + ret = nfnl_acct_fill_info(skb, 0, 0, NFNL_MSG_ACCT_OVERQUOTA, 0, + nfacct); + if (ret <= 0) { + kfree_skb(skb); + return; + } + netlink_broadcast(init_net.nfnl, skb, 0, NFNLGRP_ACCT_QUOTA, + GFP_ATOMIC); +} + +int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct) +{ + u64 now; + u64 *quota; + int ret = NFACCT_UNDERQUOTA; + + /* no place here if we don't have a quota */ + if (!(nfacct->flags & NFACCT_F_QUOTA)) + return NFACCT_NO_QUOTA; + + quota = (u64 *)nfacct->data; + now = (nfacct->flags & NFACCT_F_QUOTA_PKTS) ? + atomic64_read(&nfacct->pkts) : atomic64_read(&nfacct->bytes); + + ret = now > *quota; + + if (now >= *quota && + !test_and_set_bit(NFACCT_F_OVERQUOTA, &nfacct->flags)) { + nfnl_overquota_report(nfacct); + } + + return ret; +} +EXPORT_SYMBOL_GPL(nfnl_acct_overquota); + static int __init nfnl_acct_init(void) { int ret; diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c index b3be0ef21f19..8c646ed9c921 100644 --- a/net/netfilter/xt_nfacct.c +++ b/net/netfilter/xt_nfacct.c @@ -21,11 +21,14 @@ MODULE_ALIAS("ip6t_nfacct"); static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par) { + int overquota; const struct xt_nfacct_match_info *info = par->targinfo; nfnl_acct_update(skb, info->nfacct); - return true; + overquota = nfnl_acct_overquota(skb, info->nfacct); + + return overquota == NFACCT_UNDERQUOTA ? false : true; } static int -- cgit v1.2.3-71-gd317 From f768e5bdefe1ec9adbf7a116dfb156b73cacb582 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 28 Apr 2014 21:09:50 +0200 Subject: netfilter: add helper for adding nat extension Reduce copy-past a bit by adding a common helper. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_nat.h | 2 ++ net/ipv4/netfilter/iptable_nat.c | 14 +++----------- net/ipv4/netfilter/nft_chain_nat_ipv4.c | 12 +++--------- net/ipv6/netfilter/ip6table_nat.c | 14 +++----------- net/ipv6/netfilter/nft_chain_nat_ipv6.c | 12 +++--------- net/netfilter/nf_nat_core.c | 24 ++++++++++++++++-------- 6 files changed, 30 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 07eaaf604092..a71dd333ac68 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -48,6 +48,8 @@ unsigned int nf_nat_setup_info(struct nf_conn *ct, extern unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum); +struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct); + /* Is this tuple already taken? (not by us)*/ int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack); diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index ee2886126e3d..f1787c04a4dd 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -91,17 +91,9 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, if (nf_ct_is_untracked(ct)) return NF_ACCEPT; - nat = nfct_nat(ct); - if (!nat) { - /* NAT module was loaded late. */ - if (nf_ct_is_confirmed(ct)) - return NF_ACCEPT; - nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); - if (nat == NULL) { - pr_debug("failed to add NAT extension\n"); - return NF_ACCEPT; - } - } + nat = nf_ct_nat_ext_add(ct); + if (nat == NULL) + return NF_ACCEPT; switch (ctinfo) { case IP_CT_RELATED: diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c index b5b256d45e67..3964157d826c 100644 --- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -48,15 +48,9 @@ static unsigned int nf_nat_fn(const struct nf_hook_ops *ops, NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); - nat = nfct_nat(ct); - if (nat == NULL) { - /* Conntrack module was loaded late, can't add extension. */ - if (nf_ct_is_confirmed(ct)) - return NF_ACCEPT; - nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); - if (nat == NULL) - return NF_ACCEPT; - } + nat = nf_ct_nat_ext_add(ct); + if (nat == NULL) + return NF_ACCEPT; switch (ctinfo) { case IP_CT_RELATED: diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 84c7f33d0cf8..387d8b8fc18d 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -90,17 +90,9 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, if (nf_ct_is_untracked(ct)) return NF_ACCEPT; - nat = nfct_nat(ct); - if (!nat) { - /* NAT module was loaded late. */ - if (nf_ct_is_confirmed(ct)) - return NF_ACCEPT; - nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); - if (nat == NULL) { - pr_debug("failed to add NAT extension\n"); - return NF_ACCEPT; - } - } + nat = nf_ct_nat_ext_add(ct); + if (nat == NULL) + return NF_ACCEPT; switch (ctinfo) { case IP_CT_RELATED: diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c index 9c3297a768fd..d189fcb437fe 100644 --- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c @@ -47,15 +47,9 @@ static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops, if (ct == NULL || nf_ct_is_untracked(ct)) return NF_ACCEPT; - nat = nfct_nat(ct); - if (nat == NULL) { - /* Conntrack module was loaded late, can't add extension. */ - if (nf_ct_is_confirmed(ct)) - return NF_ACCEPT; - nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); - if (nat == NULL) - return NF_ACCEPT; - } + nat = nf_ct_nat_ext_add(ct); + if (nat == NULL) + return NF_ACCEPT; switch (ctinfo) { case IP_CT_RELATED: diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 52ca952b802c..09096a670c45 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -358,6 +358,19 @@ out: rcu_read_unlock(); } +struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct) +{ + struct nf_conn_nat *nat = nfct_nat(ct); + if (nat) + return nat; + + if (!nf_ct_is_confirmed(ct)) + nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); + + return nat; +} +EXPORT_SYMBOL_GPL(nf_ct_nat_ext_add); + unsigned int nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range *range, @@ -368,14 +381,9 @@ nf_nat_setup_info(struct nf_conn *ct, struct nf_conn_nat *nat; /* nat helper or nfctnetlink also setup binding */ - nat = nfct_nat(ct); - if (!nat) { - nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); - if (nat == NULL) { - pr_debug("failed to add NAT extension\n"); - return NF_ACCEPT; - } - } + nat = nf_ct_nat_ext_add(ct); + if (nat == NULL) + return NF_ACCEPT; NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC || maniptype == NF_NAT_MANIP_DST); -- cgit v1.2.3-71-gd317 From 9cd4360de6090a6daf7fbe024e34953f2ae60ef2 Mon Sep 17 00:00:00 2001 From: Srikanth Thokala Date: Wed, 23 Apr 2014 20:23:26 +0530 Subject: dma: Add Xilinx AXI Video Direct Memory Access Engine driver support This is the driver for the AXI Video Direct Memory Access (AXI VDMA) core, which is a soft Xilinx IP core that provides high- bandwidth direct memory access between memory and AXI4-Stream type video target peripherals. The core provides efficient two dimensional DMA operations with independent asynchronous read and write channel operation. This module works on Zynq (ARM Based SoC) and Microblaze platforms. Signed-off-by: Srikanth Thokala Acked-by: Jassi Brar Reviewed-by: Levente Kurusa Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 14 + drivers/dma/Makefile | 1 + drivers/dma/xilinx/Makefile | 1 + drivers/dma/xilinx/xilinx_vdma.c | 1379 ++++++++++++++++++++++++++++++++++++++ include/linux/amba/xilinx_dma.h | 47 ++ 5 files changed, 1442 insertions(+) create mode 100644 drivers/dma/xilinx/Makefile create mode 100644 drivers/dma/xilinx/xilinx_vdma.c create mode 100644 include/linux/amba/xilinx_dma.h (limited to 'include') diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 5c5863842de9..b30b7ed89fb2 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -361,6 +361,20 @@ config FSL_EDMA multiplexing capability for DMA request sources(slot). This module can be found on Freescale Vybrid and LS-1 SoCs. +config XILINX_VDMA + tristate "Xilinx AXI VDMA Engine" + depends on (ARCH_ZYNQ || MICROBLAZE) + select DMA_ENGINE + help + Enable support for Xilinx AXI VDMA Soft IP. + + This engine provides high-bandwidth direct memory access + between memory and AXI4-Stream video type target + peripherals including peripherals which support AXI4- + Stream Video Protocol. It has two stream interfaces/ + channels, Memory Mapped to Stream (MM2S) and Stream to + Memory Mapped (S2MM) for the data transfers. + config DMA_ENGINE bool diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 5150c82c9caf..c779e1eb2db2 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -46,3 +46,4 @@ obj-$(CONFIG_K3_DMA) += k3dma.o obj-$(CONFIG_MOXART_DMA) += moxart-dma.o obj-$(CONFIG_FSL_EDMA) += fsl-edma.o obj-$(CONFIG_QCOM_BAM_DMA) += qcom_bam_dma.o +obj-y += xilinx/ diff --git a/drivers/dma/xilinx/Makefile b/drivers/dma/xilinx/Makefile new file mode 100644 index 000000000000..3c4e9f2fea28 --- /dev/null +++ b/drivers/dma/xilinx/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_XILINX_VDMA) += xilinx_vdma.o diff --git a/drivers/dma/xilinx/xilinx_vdma.c b/drivers/dma/xilinx/xilinx_vdma.c new file mode 100644 index 000000000000..42a13e8d4607 --- /dev/null +++ b/drivers/dma/xilinx/xilinx_vdma.c @@ -0,0 +1,1379 @@ +/* + * DMA driver for Xilinx Video DMA Engine + * + * Copyright (C) 2010-2014 Xilinx, Inc. All rights reserved. + * + * Based on the Freescale DMA driver. + * + * Description: + * The AXI Video Direct Memory Access (AXI VDMA) core is a soft Xilinx IP + * core that provides high-bandwidth direct memory access between memory + * and AXI4-Stream type video target peripherals. The core provides efficient + * two dimensional DMA operations with independent asynchronous read (S2MM) + * and write (MM2S) channel operation. It can be configured to have either + * one channel or two channels. If configured as two channels, one is to + * transmit to the video device (MM2S) and another is to receive from the + * video device (S2MM). Initialization, status, interrupt and management + * registers are accessed through an AXI4-Lite slave interface. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../dmaengine.h" + +/* Register/Descriptor Offsets */ +#define XILINX_VDMA_MM2S_CTRL_OFFSET 0x0000 +#define XILINX_VDMA_S2MM_CTRL_OFFSET 0x0030 +#define XILINX_VDMA_MM2S_DESC_OFFSET 0x0050 +#define XILINX_VDMA_S2MM_DESC_OFFSET 0x00a0 + +/* Control Registers */ +#define XILINX_VDMA_REG_DMACR 0x0000 +#define XILINX_VDMA_DMACR_DELAY_MAX 0xff +#define XILINX_VDMA_DMACR_DELAY_SHIFT 24 +#define XILINX_VDMA_DMACR_FRAME_COUNT_MAX 0xff +#define XILINX_VDMA_DMACR_FRAME_COUNT_SHIFT 16 +#define XILINX_VDMA_DMACR_ERR_IRQ BIT(14) +#define XILINX_VDMA_DMACR_DLY_CNT_IRQ BIT(13) +#define XILINX_VDMA_DMACR_FRM_CNT_IRQ BIT(12) +#define XILINX_VDMA_DMACR_MASTER_SHIFT 8 +#define XILINX_VDMA_DMACR_FSYNCSRC_SHIFT 5 +#define XILINX_VDMA_DMACR_FRAMECNT_EN BIT(4) +#define XILINX_VDMA_DMACR_GENLOCK_EN BIT(3) +#define XILINX_VDMA_DMACR_RESET BIT(2) +#define XILINX_VDMA_DMACR_CIRC_EN BIT(1) +#define XILINX_VDMA_DMACR_RUNSTOP BIT(0) +#define XILINX_VDMA_DMACR_FSYNCSRC_MASK GENMASK(6, 5) + +#define XILINX_VDMA_REG_DMASR 0x0004 +#define XILINX_VDMA_DMASR_EOL_LATE_ERR BIT(15) +#define XILINX_VDMA_DMASR_ERR_IRQ BIT(14) +#define XILINX_VDMA_DMASR_DLY_CNT_IRQ BIT(13) +#define XILINX_VDMA_DMASR_FRM_CNT_IRQ BIT(12) +#define XILINX_VDMA_DMASR_SOF_LATE_ERR BIT(11) +#define XILINX_VDMA_DMASR_SG_DEC_ERR BIT(10) +#define XILINX_VDMA_DMASR_SG_SLV_ERR BIT(9) +#define XILINX_VDMA_DMASR_EOF_EARLY_ERR BIT(8) +#define XILINX_VDMA_DMASR_SOF_EARLY_ERR BIT(7) +#define XILINX_VDMA_DMASR_DMA_DEC_ERR BIT(6) +#define XILINX_VDMA_DMASR_DMA_SLAVE_ERR BIT(5) +#define XILINX_VDMA_DMASR_DMA_INT_ERR BIT(4) +#define XILINX_VDMA_DMASR_IDLE BIT(1) +#define XILINX_VDMA_DMASR_HALTED BIT(0) +#define XILINX_VDMA_DMASR_DELAY_MASK GENMASK(31, 24) +#define XILINX_VDMA_DMASR_FRAME_COUNT_MASK GENMASK(23, 16) + +#define XILINX_VDMA_REG_CURDESC 0x0008 +#define XILINX_VDMA_REG_TAILDESC 0x0010 +#define XILINX_VDMA_REG_REG_INDEX 0x0014 +#define XILINX_VDMA_REG_FRMSTORE 0x0018 +#define XILINX_VDMA_REG_THRESHOLD 0x001c +#define XILINX_VDMA_REG_FRMPTR_STS 0x0024 +#define XILINX_VDMA_REG_PARK_PTR 0x0028 +#define XILINX_VDMA_PARK_PTR_WR_REF_SHIFT 8 +#define XILINX_VDMA_PARK_PTR_RD_REF_SHIFT 0 +#define XILINX_VDMA_REG_VDMA_VERSION 0x002c + +/* Register Direct Mode Registers */ +#define XILINX_VDMA_REG_VSIZE 0x0000 +#define XILINX_VDMA_REG_HSIZE 0x0004 + +#define XILINX_VDMA_REG_FRMDLY_STRIDE 0x0008 +#define XILINX_VDMA_FRMDLY_STRIDE_FRMDLY_SHIFT 24 +#define XILINX_VDMA_FRMDLY_STRIDE_STRIDE_SHIFT 0 + +#define XILINX_VDMA_REG_START_ADDRESS(n) (0x000c + 4 * (n)) + +/* HW specific definitions */ +#define XILINX_VDMA_MAX_CHANS_PER_DEVICE 0x2 + +#define XILINX_VDMA_DMAXR_ALL_IRQ_MASK \ + (XILINX_VDMA_DMASR_FRM_CNT_IRQ | \ + XILINX_VDMA_DMASR_DLY_CNT_IRQ | \ + XILINX_VDMA_DMASR_ERR_IRQ) + +#define XILINX_VDMA_DMASR_ALL_ERR_MASK \ + (XILINX_VDMA_DMASR_EOL_LATE_ERR | \ + XILINX_VDMA_DMASR_SOF_LATE_ERR | \ + XILINX_VDMA_DMASR_SG_DEC_ERR | \ + XILINX_VDMA_DMASR_SG_SLV_ERR | \ + XILINX_VDMA_DMASR_EOF_EARLY_ERR | \ + XILINX_VDMA_DMASR_SOF_EARLY_ERR | \ + XILINX_VDMA_DMASR_DMA_DEC_ERR | \ + XILINX_VDMA_DMASR_DMA_SLAVE_ERR | \ + XILINX_VDMA_DMASR_DMA_INT_ERR) + +/* + * Recoverable errors are DMA Internal error, SOF Early, EOF Early + * and SOF Late. They are only recoverable when C_FLUSH_ON_FSYNC + * is enabled in the h/w system. + */ +#define XILINX_VDMA_DMASR_ERR_RECOVER_MASK \ + (XILINX_VDMA_DMASR_SOF_LATE_ERR | \ + XILINX_VDMA_DMASR_EOF_EARLY_ERR | \ + XILINX_VDMA_DMASR_SOF_EARLY_ERR | \ + XILINX_VDMA_DMASR_DMA_INT_ERR) + +/* Axi VDMA Flush on Fsync bits */ +#define XILINX_VDMA_FLUSH_S2MM 3 +#define XILINX_VDMA_FLUSH_MM2S 2 +#define XILINX_VDMA_FLUSH_BOTH 1 + +/* Delay loop counter to prevent hardware failure */ +#define XILINX_VDMA_LOOP_COUNT 1000000 + +/** + * struct xilinx_vdma_desc_hw - Hardware Descriptor + * @next_desc: Next Descriptor Pointer @0x00 + * @pad1: Reserved @0x04 + * @buf_addr: Buffer address @0x08 + * @pad2: Reserved @0x0C + * @vsize: Vertical Size @0x10 + * @hsize: Horizontal Size @0x14 + * @stride: Number of bytes between the first + * pixels of each horizontal line @0x18 + */ +struct xilinx_vdma_desc_hw { + u32 next_desc; + u32 pad1; + u32 buf_addr; + u32 pad2; + u32 vsize; + u32 hsize; + u32 stride; +} __aligned(64); + +/** + * struct xilinx_vdma_tx_segment - Descriptor segment + * @hw: Hardware descriptor + * @node: Node in the descriptor segments list + * @phys: Physical address of segment + */ +struct xilinx_vdma_tx_segment { + struct xilinx_vdma_desc_hw hw; + struct list_head node; + dma_addr_t phys; +} __aligned(64); + +/** + * struct xilinx_vdma_tx_descriptor - Per Transaction structure + * @async_tx: Async transaction descriptor + * @segments: TX segments list + * @node: Node in the channel descriptors list + */ +struct xilinx_vdma_tx_descriptor { + struct dma_async_tx_descriptor async_tx; + struct list_head segments; + struct list_head node; +}; + +/** + * struct xilinx_vdma_chan - Driver specific VDMA channel structure + * @xdev: Driver specific device structure + * @ctrl_offset: Control registers offset + * @desc_offset: TX descriptor registers offset + * @lock: Descriptor operation lock + * @pending_list: Descriptors waiting + * @active_desc: Active descriptor + * @allocated_desc: Allocated descriptor + * @done_list: Complete descriptors + * @common: DMA common channel + * @desc_pool: Descriptors pool + * @dev: The dma device + * @irq: Channel IRQ + * @id: Channel ID + * @direction: Transfer direction + * @num_frms: Number of frames + * @has_sg: Support scatter transfers + * @genlock: Support genlock mode + * @err: Channel has errors + * @tasklet: Cleanup work after irq + * @config: Device configuration info + * @flush_on_fsync: Flush on Frame sync + */ +struct xilinx_vdma_chan { + struct xilinx_vdma_device *xdev; + u32 ctrl_offset; + u32 desc_offset; + spinlock_t lock; + struct list_head pending_list; + struct xilinx_vdma_tx_descriptor *active_desc; + struct xilinx_vdma_tx_descriptor *allocated_desc; + struct list_head done_list; + struct dma_chan common; + struct dma_pool *desc_pool; + struct device *dev; + int irq; + int id; + enum dma_transfer_direction direction; + int num_frms; + bool has_sg; + bool genlock; + bool err; + struct tasklet_struct tasklet; + struct xilinx_vdma_config config; + bool flush_on_fsync; +}; + +/** + * struct xilinx_vdma_device - VDMA device structure + * @regs: I/O mapped base address + * @dev: Device Structure + * @common: DMA device structure + * @chan: Driver specific VDMA channel + * @has_sg: Specifies whether Scatter-Gather is present or not + * @flush_on_fsync: Flush on frame sync + */ +struct xilinx_vdma_device { + void __iomem *regs; + struct device *dev; + struct dma_device common; + struct xilinx_vdma_chan *chan[XILINX_VDMA_MAX_CHANS_PER_DEVICE]; + bool has_sg; + u32 flush_on_fsync; +}; + +/* Macros */ +#define to_xilinx_chan(chan) \ + container_of(chan, struct xilinx_vdma_chan, common) +#define to_vdma_tx_descriptor(tx) \ + container_of(tx, struct xilinx_vdma_tx_descriptor, async_tx) + +/* IO accessors */ +static inline u32 vdma_read(struct xilinx_vdma_chan *chan, u32 reg) +{ + return ioread32(chan->xdev->regs + reg); +} + +static inline void vdma_write(struct xilinx_vdma_chan *chan, u32 reg, u32 value) +{ + iowrite32(value, chan->xdev->regs + reg); +} + +static inline void vdma_desc_write(struct xilinx_vdma_chan *chan, u32 reg, + u32 value) +{ + vdma_write(chan, chan->desc_offset + reg, value); +} + +static inline u32 vdma_ctrl_read(struct xilinx_vdma_chan *chan, u32 reg) +{ + return vdma_read(chan, chan->ctrl_offset + reg); +} + +static inline void vdma_ctrl_write(struct xilinx_vdma_chan *chan, u32 reg, + u32 value) +{ + vdma_write(chan, chan->ctrl_offset + reg, value); +} + +static inline void vdma_ctrl_clr(struct xilinx_vdma_chan *chan, u32 reg, + u32 clr) +{ + vdma_ctrl_write(chan, reg, vdma_ctrl_read(chan, reg) & ~clr); +} + +static inline void vdma_ctrl_set(struct xilinx_vdma_chan *chan, u32 reg, + u32 set) +{ + vdma_ctrl_write(chan, reg, vdma_ctrl_read(chan, reg) | set); +} + +/* ----------------------------------------------------------------------------- + * Descriptors and segments alloc and free + */ + +/** + * xilinx_vdma_alloc_tx_segment - Allocate transaction segment + * @chan: Driver specific VDMA channel + * + * Return: The allocated segment on success and NULL on failure. + */ +static struct xilinx_vdma_tx_segment * +xilinx_vdma_alloc_tx_segment(struct xilinx_vdma_chan *chan) +{ + struct xilinx_vdma_tx_segment *segment; + dma_addr_t phys; + + segment = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &phys); + if (!segment) + return NULL; + + memset(segment, 0, sizeof(*segment)); + segment->phys = phys; + + return segment; +} + +/** + * xilinx_vdma_free_tx_segment - Free transaction segment + * @chan: Driver specific VDMA channel + * @segment: VDMA transaction segment + */ +static void xilinx_vdma_free_tx_segment(struct xilinx_vdma_chan *chan, + struct xilinx_vdma_tx_segment *segment) +{ + dma_pool_free(chan->desc_pool, segment, segment->phys); +} + +/** + * xilinx_vdma_tx_descriptor - Allocate transaction descriptor + * @chan: Driver specific VDMA channel + * + * Return: The allocated descriptor on success and NULL on failure. + */ +static struct xilinx_vdma_tx_descriptor * +xilinx_vdma_alloc_tx_descriptor(struct xilinx_vdma_chan *chan) +{ + struct xilinx_vdma_tx_descriptor *desc; + unsigned long flags; + + if (chan->allocated_desc) + return chan->allocated_desc; + + desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) + return NULL; + + spin_lock_irqsave(&chan->lock, flags); + chan->allocated_desc = desc; + spin_unlock_irqrestore(&chan->lock, flags); + + INIT_LIST_HEAD(&desc->segments); + + return desc; +} + +/** + * xilinx_vdma_free_tx_descriptor - Free transaction descriptor + * @chan: Driver specific VDMA channel + * @desc: VDMA transaction descriptor + */ +static void +xilinx_vdma_free_tx_descriptor(struct xilinx_vdma_chan *chan, + struct xilinx_vdma_tx_descriptor *desc) +{ + struct xilinx_vdma_tx_segment *segment, *next; + + if (!desc) + return; + + list_for_each_entry_safe(segment, next, &desc->segments, node) { + list_del(&segment->node); + xilinx_vdma_free_tx_segment(chan, segment); + } + + kfree(desc); +} + +/* Required functions */ + +/** + * xilinx_vdma_free_desc_list - Free descriptors list + * @chan: Driver specific VDMA channel + * @list: List to parse and delete the descriptor + */ +static void xilinx_vdma_free_desc_list(struct xilinx_vdma_chan *chan, + struct list_head *list) +{ + struct xilinx_vdma_tx_descriptor *desc, *next; + + list_for_each_entry_safe(desc, next, list, node) { + list_del(&desc->node); + xilinx_vdma_free_tx_descriptor(chan, desc); + } +} + +/** + * xilinx_vdma_free_descriptors - Free channel descriptors + * @chan: Driver specific VDMA channel + */ +static void xilinx_vdma_free_descriptors(struct xilinx_vdma_chan *chan) +{ + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + + xilinx_vdma_free_desc_list(chan, &chan->pending_list); + xilinx_vdma_free_desc_list(chan, &chan->done_list); + + xilinx_vdma_free_tx_descriptor(chan, chan->active_desc); + chan->active_desc = NULL; + + spin_unlock_irqrestore(&chan->lock, flags); +} + +/** + * xilinx_vdma_free_chan_resources - Free channel resources + * @dchan: DMA channel + */ +static void xilinx_vdma_free_chan_resources(struct dma_chan *dchan) +{ + struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan); + + dev_dbg(chan->dev, "Free all channel resources.\n"); + + xilinx_vdma_free_descriptors(chan); + dma_pool_destroy(chan->desc_pool); + chan->desc_pool = NULL; +} + +/** + * xilinx_vdma_chan_desc_cleanup - Clean channel descriptors + * @chan: Driver specific VDMA channel + */ +static void xilinx_vdma_chan_desc_cleanup(struct xilinx_vdma_chan *chan) +{ + struct xilinx_vdma_tx_descriptor *desc, *next; + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + + list_for_each_entry_safe(desc, next, &chan->done_list, node) { + dma_async_tx_callback callback; + void *callback_param; + + /* Remove from the list of running transactions */ + list_del(&desc->node); + + /* Run the link descriptor callback function */ + callback = desc->async_tx.callback; + callback_param = desc->async_tx.callback_param; + if (callback) { + spin_unlock_irqrestore(&chan->lock, flags); + callback(callback_param); + spin_lock_irqsave(&chan->lock, flags); + } + + /* Run any dependencies, then free the descriptor */ + dma_run_dependencies(&desc->async_tx); + xilinx_vdma_free_tx_descriptor(chan, desc); + } + + spin_unlock_irqrestore(&chan->lock, flags); +} + +/** + * xilinx_vdma_do_tasklet - Schedule completion tasklet + * @data: Pointer to the Xilinx VDMA channel structure + */ +static void xilinx_vdma_do_tasklet(unsigned long data) +{ + struct xilinx_vdma_chan *chan = (struct xilinx_vdma_chan *)data; + + xilinx_vdma_chan_desc_cleanup(chan); +} + +/** + * xilinx_vdma_alloc_chan_resources - Allocate channel resources + * @dchan: DMA channel + * + * Return: '0' on success and failure value on error + */ +static int xilinx_vdma_alloc_chan_resources(struct dma_chan *dchan) +{ + struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan); + + /* Has this channel already been allocated? */ + if (chan->desc_pool) + return 0; + + /* + * We need the descriptor to be aligned to 64bytes + * for meeting Xilinx VDMA specification requirement. + */ + chan->desc_pool = dma_pool_create("xilinx_vdma_desc_pool", + chan->dev, + sizeof(struct xilinx_vdma_tx_segment), + __alignof__(struct xilinx_vdma_tx_segment), 0); + if (!chan->desc_pool) { + dev_err(chan->dev, + "unable to allocate channel %d descriptor pool\n", + chan->id); + return -ENOMEM; + } + + dma_cookie_init(dchan); + return 0; +} + +/** + * xilinx_vdma_tx_status - Get VDMA transaction status + * @dchan: DMA channel + * @cookie: Transaction identifier + * @txstate: Transaction state + * + * Return: DMA transaction status + */ +static enum dma_status xilinx_vdma_tx_status(struct dma_chan *dchan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + return dma_cookie_status(dchan, cookie, txstate); +} + +/** + * xilinx_vdma_is_running - Check if VDMA channel is running + * @chan: Driver specific VDMA channel + * + * Return: '1' if running, '0' if not. + */ +static bool xilinx_vdma_is_running(struct xilinx_vdma_chan *chan) +{ + return !(vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR) & + XILINX_VDMA_DMASR_HALTED) && + (vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR) & + XILINX_VDMA_DMACR_RUNSTOP); +} + +/** + * xilinx_vdma_is_idle - Check if VDMA channel is idle + * @chan: Driver specific VDMA channel + * + * Return: '1' if idle, '0' if not. + */ +static bool xilinx_vdma_is_idle(struct xilinx_vdma_chan *chan) +{ + return vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR) & + XILINX_VDMA_DMASR_IDLE; +} + +/** + * xilinx_vdma_halt - Halt VDMA channel + * @chan: Driver specific VDMA channel + */ +static void xilinx_vdma_halt(struct xilinx_vdma_chan *chan) +{ + int loop = XILINX_VDMA_LOOP_COUNT; + + vdma_ctrl_clr(chan, XILINX_VDMA_REG_DMACR, XILINX_VDMA_DMACR_RUNSTOP); + + /* Wait for the hardware to halt */ + do { + if (vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR) & + XILINX_VDMA_DMASR_HALTED) + break; + } while (loop--); + + if (!loop) { + dev_err(chan->dev, "Cannot stop channel %p: %x\n", + chan, vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR)); + chan->err = true; + } + + return; +} + +/** + * xilinx_vdma_start - Start VDMA channel + * @chan: Driver specific VDMA channel + */ +static void xilinx_vdma_start(struct xilinx_vdma_chan *chan) +{ + int loop = XILINX_VDMA_LOOP_COUNT; + + vdma_ctrl_set(chan, XILINX_VDMA_REG_DMACR, XILINX_VDMA_DMACR_RUNSTOP); + + /* Wait for the hardware to start */ + do { + if (!(vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR) & + XILINX_VDMA_DMASR_HALTED)) + break; + } while (loop--); + + if (!loop) { + dev_err(chan->dev, "Cannot start channel %p: %x\n", + chan, vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR)); + + chan->err = true; + } + + return; +} + +/** + * xilinx_vdma_start_transfer - Starts VDMA transfer + * @chan: Driver specific channel struct pointer + */ +static void xilinx_vdma_start_transfer(struct xilinx_vdma_chan *chan) +{ + struct xilinx_vdma_config *config = &chan->config; + struct xilinx_vdma_tx_descriptor *desc; + unsigned long flags; + u32 reg; + struct xilinx_vdma_tx_segment *head, *tail = NULL; + + if (chan->err) + return; + + spin_lock_irqsave(&chan->lock, flags); + + /* There's already an active descriptor, bail out. */ + if (chan->active_desc) + goto out_unlock; + + if (list_empty(&chan->pending_list)) + goto out_unlock; + + desc = list_first_entry(&chan->pending_list, + struct xilinx_vdma_tx_descriptor, node); + + /* If it is SG mode and hardware is busy, cannot submit */ + if (chan->has_sg && xilinx_vdma_is_running(chan) && + !xilinx_vdma_is_idle(chan)) { + dev_dbg(chan->dev, "DMA controller still busy\n"); + goto out_unlock; + } + + /* + * If hardware is idle, then all descriptors on the running lists are + * done, start new transfers + */ + if (chan->has_sg) { + head = list_first_entry(&desc->segments, + struct xilinx_vdma_tx_segment, node); + tail = list_entry(desc->segments.prev, + struct xilinx_vdma_tx_segment, node); + + vdma_ctrl_write(chan, XILINX_VDMA_REG_CURDESC, head->phys); + } + + /* Configure the hardware using info in the config structure */ + reg = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR); + + if (config->frm_cnt_en) + reg |= XILINX_VDMA_DMACR_FRAMECNT_EN; + else + reg &= ~XILINX_VDMA_DMACR_FRAMECNT_EN; + + /* + * With SG, start with circular mode, so that BDs can be fetched. + * In direct register mode, if not parking, enable circular mode + */ + if (chan->has_sg || !config->park) + reg |= XILINX_VDMA_DMACR_CIRC_EN; + + if (config->park) + reg &= ~XILINX_VDMA_DMACR_CIRC_EN; + + vdma_ctrl_write(chan, XILINX_VDMA_REG_DMACR, reg); + + if (config->park && (config->park_frm >= 0) && + (config->park_frm < chan->num_frms)) { + if (chan->direction == DMA_MEM_TO_DEV) + vdma_write(chan, XILINX_VDMA_REG_PARK_PTR, + config->park_frm << + XILINX_VDMA_PARK_PTR_RD_REF_SHIFT); + else + vdma_write(chan, XILINX_VDMA_REG_PARK_PTR, + config->park_frm << + XILINX_VDMA_PARK_PTR_WR_REF_SHIFT); + } + + /* Start the hardware */ + xilinx_vdma_start(chan); + + if (chan->err) + goto out_unlock; + + /* Start the transfer */ + if (chan->has_sg) { + vdma_ctrl_write(chan, XILINX_VDMA_REG_TAILDESC, tail->phys); + } else { + struct xilinx_vdma_tx_segment *segment, *last = NULL; + int i = 0; + + list_for_each_entry(segment, &desc->segments, node) { + vdma_desc_write(chan, + XILINX_VDMA_REG_START_ADDRESS(i++), + segment->hw.buf_addr); + last = segment; + } + + if (!last) + goto out_unlock; + + /* HW expects these parameters to be same for one transaction */ + vdma_desc_write(chan, XILINX_VDMA_REG_HSIZE, last->hw.hsize); + vdma_desc_write(chan, XILINX_VDMA_REG_FRMDLY_STRIDE, + last->hw.stride); + vdma_desc_write(chan, XILINX_VDMA_REG_VSIZE, last->hw.vsize); + } + + list_del(&desc->node); + chan->active_desc = desc; + +out_unlock: + spin_unlock_irqrestore(&chan->lock, flags); +} + +/** + * xilinx_vdma_issue_pending - Issue pending transactions + * @dchan: DMA channel + */ +static void xilinx_vdma_issue_pending(struct dma_chan *dchan) +{ + struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan); + + xilinx_vdma_start_transfer(chan); +} + +/** + * xilinx_vdma_complete_descriptor - Mark the active descriptor as complete + * @chan : xilinx DMA channel + * + * CONTEXT: hardirq + */ +static void xilinx_vdma_complete_descriptor(struct xilinx_vdma_chan *chan) +{ + struct xilinx_vdma_tx_descriptor *desc; + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + + desc = chan->active_desc; + if (!desc) { + dev_dbg(chan->dev, "no running descriptors\n"); + goto out_unlock; + } + + dma_cookie_complete(&desc->async_tx); + list_add_tail(&desc->node, &chan->done_list); + + chan->active_desc = NULL; + +out_unlock: + spin_unlock_irqrestore(&chan->lock, flags); +} + +/** + * xilinx_vdma_reset - Reset VDMA channel + * @chan: Driver specific VDMA channel + * + * Return: '0' on success and failure value on error + */ +static int xilinx_vdma_reset(struct xilinx_vdma_chan *chan) +{ + int loop = XILINX_VDMA_LOOP_COUNT; + u32 tmp; + + vdma_ctrl_set(chan, XILINX_VDMA_REG_DMACR, XILINX_VDMA_DMACR_RESET); + + tmp = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR) & + XILINX_VDMA_DMACR_RESET; + + /* Wait for the hardware to finish reset */ + do { + tmp = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR) & + XILINX_VDMA_DMACR_RESET; + } while (loop-- && tmp); + + if (!loop) { + dev_err(chan->dev, "reset timeout, cr %x, sr %x\n", + vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR), + vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR)); + return -ETIMEDOUT; + } + + chan->err = false; + + return 0; +} + +/** + * xilinx_vdma_chan_reset - Reset VDMA channel and enable interrupts + * @chan: Driver specific VDMA channel + * + * Return: '0' on success and failure value on error + */ +static int xilinx_vdma_chan_reset(struct xilinx_vdma_chan *chan) +{ + int err; + + /* Reset VDMA */ + err = xilinx_vdma_reset(chan); + if (err) + return err; + + /* Enable interrupts */ + vdma_ctrl_set(chan, XILINX_VDMA_REG_DMACR, + XILINX_VDMA_DMAXR_ALL_IRQ_MASK); + + return 0; +} + +/** + * xilinx_vdma_irq_handler - VDMA Interrupt handler + * @irq: IRQ number + * @data: Pointer to the Xilinx VDMA channel structure + * + * Return: IRQ_HANDLED/IRQ_NONE + */ +static irqreturn_t xilinx_vdma_irq_handler(int irq, void *data) +{ + struct xilinx_vdma_chan *chan = data; + u32 status; + + /* Read the status and ack the interrupts. */ + status = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR); + if (!(status & XILINX_VDMA_DMAXR_ALL_IRQ_MASK)) + return IRQ_NONE; + + vdma_ctrl_write(chan, XILINX_VDMA_REG_DMASR, + status & XILINX_VDMA_DMAXR_ALL_IRQ_MASK); + + if (status & XILINX_VDMA_DMASR_ERR_IRQ) { + /* + * An error occurred. If C_FLUSH_ON_FSYNC is enabled and the + * error is recoverable, ignore it. Otherwise flag the error. + * + * Only recoverable errors can be cleared in the DMASR register, + * make sure not to write to other error bits to 1. + */ + u32 errors = status & XILINX_VDMA_DMASR_ALL_ERR_MASK; + vdma_ctrl_write(chan, XILINX_VDMA_REG_DMASR, + errors & XILINX_VDMA_DMASR_ERR_RECOVER_MASK); + + if (!chan->flush_on_fsync || + (errors & ~XILINX_VDMA_DMASR_ERR_RECOVER_MASK)) { + dev_err(chan->dev, + "Channel %p has errors %x, cdr %x tdr %x\n", + chan, errors, + vdma_ctrl_read(chan, XILINX_VDMA_REG_CURDESC), + vdma_ctrl_read(chan, XILINX_VDMA_REG_TAILDESC)); + chan->err = true; + } + } + + if (status & XILINX_VDMA_DMASR_DLY_CNT_IRQ) { + /* + * Device takes too long to do the transfer when user requires + * responsiveness. + */ + dev_dbg(chan->dev, "Inter-packet latency too long\n"); + } + + if (status & XILINX_VDMA_DMASR_FRM_CNT_IRQ) { + xilinx_vdma_complete_descriptor(chan); + xilinx_vdma_start_transfer(chan); + } + + tasklet_schedule(&chan->tasklet); + return IRQ_HANDLED; +} + +/** + * xilinx_vdma_tx_submit - Submit DMA transaction + * @tx: Async transaction descriptor + * + * Return: cookie value on success and failure value on error + */ +static dma_cookie_t xilinx_vdma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct xilinx_vdma_tx_descriptor *desc = to_vdma_tx_descriptor(tx); + struct xilinx_vdma_chan *chan = to_xilinx_chan(tx->chan); + dma_cookie_t cookie; + unsigned long flags; + int err; + + if (chan->err) { + /* + * If reset fails, need to hard reset the system. + * Channel is no longer functional + */ + err = xilinx_vdma_chan_reset(chan); + if (err < 0) + return err; + } + + spin_lock_irqsave(&chan->lock, flags); + + cookie = dma_cookie_assign(tx); + + /* Append the transaction to the pending transactions queue. */ + list_add_tail(&desc->node, &chan->pending_list); + + /* Free the allocated desc */ + chan->allocated_desc = NULL; + + spin_unlock_irqrestore(&chan->lock, flags); + + return cookie; +} + +/** + * xilinx_vdma_dma_prep_interleaved - prepare a descriptor for a + * DMA_SLAVE transaction + * @dchan: DMA channel + * @xt: Interleaved template pointer + * @flags: transfer ack flags + * + * Return: Async transaction descriptor on success and NULL on failure + */ +static struct dma_async_tx_descriptor * +xilinx_vdma_dma_prep_interleaved(struct dma_chan *dchan, + struct dma_interleaved_template *xt, + unsigned long flags) +{ + struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_vdma_tx_descriptor *desc; + struct xilinx_vdma_tx_segment *segment, *prev = NULL; + struct xilinx_vdma_desc_hw *hw; + + if (!is_slave_direction(xt->dir)) + return NULL; + + if (!xt->numf || !xt->sgl[0].size) + return NULL; + + /* Allocate a transaction descriptor. */ + desc = xilinx_vdma_alloc_tx_descriptor(chan); + if (!desc) + return NULL; + + dma_async_tx_descriptor_init(&desc->async_tx, &chan->common); + desc->async_tx.tx_submit = xilinx_vdma_tx_submit; + async_tx_ack(&desc->async_tx); + + /* Allocate the link descriptor from DMA pool */ + segment = xilinx_vdma_alloc_tx_segment(chan); + if (!segment) + goto error; + + /* Fill in the hardware descriptor */ + hw = &segment->hw; + hw->vsize = xt->numf; + hw->hsize = xt->sgl[0].size; + hw->stride = xt->sgl[0].icg << + XILINX_VDMA_FRMDLY_STRIDE_STRIDE_SHIFT; + hw->stride |= chan->config.frm_dly << + XILINX_VDMA_FRMDLY_STRIDE_FRMDLY_SHIFT; + + if (xt->dir != DMA_MEM_TO_DEV) + hw->buf_addr = xt->dst_start; + else + hw->buf_addr = xt->src_start; + + /* Link the previous next descriptor to current */ + prev = list_last_entry(&desc->segments, + struct xilinx_vdma_tx_segment, node); + prev->hw.next_desc = segment->phys; + + /* Insert the segment into the descriptor segments list. */ + list_add_tail(&segment->node, &desc->segments); + + prev = segment; + + /* Link the last hardware descriptor with the first. */ + segment = list_first_entry(&desc->segments, + struct xilinx_vdma_tx_segment, node); + prev->hw.next_desc = segment->phys; + + return &desc->async_tx; + +error: + xilinx_vdma_free_tx_descriptor(chan, desc); + return NULL; +} + +/** + * xilinx_vdma_terminate_all - Halt the channel and free descriptors + * @chan: Driver specific VDMA Channel pointer + */ +static void xilinx_vdma_terminate_all(struct xilinx_vdma_chan *chan) +{ + /* Halt the DMA engine */ + xilinx_vdma_halt(chan); + + /* Remove and free all of the descriptors in the lists */ + xilinx_vdma_free_descriptors(chan); +} + +/** + * xilinx_vdma_channel_set_config - Configure VDMA channel + * Run-time configuration for Axi VDMA, supports: + * . halt the channel + * . configure interrupt coalescing and inter-packet delay threshold + * . start/stop parking + * . enable genlock + * + * @dchan: DMA channel + * @cfg: VDMA device configuration pointer + * + * Return: '0' on success and failure value on error + */ +int xilinx_vdma_channel_set_config(struct dma_chan *dchan, + struct xilinx_vdma_config *cfg) +{ + struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan); + u32 dmacr; + + if (cfg->reset) + return xilinx_vdma_chan_reset(chan); + + dmacr = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR); + + chan->config.frm_dly = cfg->frm_dly; + chan->config.park = cfg->park; + + /* genlock settings */ + chan->config.gen_lock = cfg->gen_lock; + chan->config.master = cfg->master; + + if (cfg->gen_lock && chan->genlock) { + dmacr |= XILINX_VDMA_DMACR_GENLOCK_EN; + dmacr |= cfg->master << XILINX_VDMA_DMACR_MASTER_SHIFT; + } + + chan->config.frm_cnt_en = cfg->frm_cnt_en; + if (cfg->park) + chan->config.park_frm = cfg->park_frm; + else + chan->config.park_frm = -1; + + chan->config.coalesc = cfg->coalesc; + chan->config.delay = cfg->delay; + + if (cfg->coalesc <= XILINX_VDMA_DMACR_FRAME_COUNT_MAX) { + dmacr |= cfg->coalesc << XILINX_VDMA_DMACR_FRAME_COUNT_SHIFT; + chan->config.coalesc = cfg->coalesc; + } + + if (cfg->delay <= XILINX_VDMA_DMACR_DELAY_MAX) { + dmacr |= cfg->delay << XILINX_VDMA_DMACR_DELAY_SHIFT; + chan->config.delay = cfg->delay; + } + + /* FSync Source selection */ + dmacr &= ~XILINX_VDMA_DMACR_FSYNCSRC_MASK; + dmacr |= cfg->ext_fsync << XILINX_VDMA_DMACR_FSYNCSRC_SHIFT; + + vdma_ctrl_write(chan, XILINX_VDMA_REG_DMACR, dmacr); + + return 0; +} +EXPORT_SYMBOL(xilinx_vdma_channel_set_config); + +/** + * xilinx_vdma_device_control - Configure DMA channel of the device + * @dchan: DMA Channel pointer + * @cmd: DMA control command + * @arg: Channel configuration + * + * Return: '0' on success and failure value on error + */ +static int xilinx_vdma_device_control(struct dma_chan *dchan, + enum dma_ctrl_cmd cmd, unsigned long arg) +{ + struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan); + + if (cmd != DMA_TERMINATE_ALL) + return -ENXIO; + + xilinx_vdma_terminate_all(chan); + + return 0; +} + +/* ----------------------------------------------------------------------------- + * Probe and remove + */ + +/** + * xilinx_vdma_chan_remove - Per Channel remove function + * @chan: Driver specific VDMA channel + */ +static void xilinx_vdma_chan_remove(struct xilinx_vdma_chan *chan) +{ + /* Disable all interrupts */ + vdma_ctrl_clr(chan, XILINX_VDMA_REG_DMACR, + XILINX_VDMA_DMAXR_ALL_IRQ_MASK); + + if (chan->irq > 0) + free_irq(chan->irq, chan); + + tasklet_kill(&chan->tasklet); + + list_del(&chan->common.device_node); +} + +/** + * xilinx_vdma_chan_probe - Per Channel Probing + * It get channel features from the device tree entry and + * initialize special channel handling routines + * + * @xdev: Driver specific device structure + * @node: Device node + * + * Return: '0' on success and failure value on error + */ +static int xilinx_vdma_chan_probe(struct xilinx_vdma_device *xdev, + struct device_node *node) +{ + struct xilinx_vdma_chan *chan; + bool has_dre = false; + u32 value, width; + int err; + + /* Allocate and initialize the channel structure */ + chan = devm_kzalloc(xdev->dev, sizeof(*chan), GFP_KERNEL); + if (!chan) + return -ENOMEM; + + chan->dev = xdev->dev; + chan->xdev = xdev; + chan->has_sg = xdev->has_sg; + + spin_lock_init(&chan->lock); + INIT_LIST_HEAD(&chan->pending_list); + INIT_LIST_HEAD(&chan->done_list); + + /* Retrieve the channel properties from the device tree */ + has_dre = of_property_read_bool(node, "xlnx,include-dre"); + + chan->genlock = of_property_read_bool(node, "xlnx,genlock-mode"); + + err = of_property_read_u32(node, "xlnx,datawidth", &value); + if (err) { + dev_err(xdev->dev, "missing xlnx,datawidth property\n"); + return err; + } + width = value >> 3; /* Convert bits to bytes */ + + /* If data width is greater than 8 bytes, DRE is not in hw */ + if (width > 8) + has_dre = false; + + if (!has_dre) + xdev->common.copy_align = fls(width - 1); + + if (of_device_is_compatible(node, "xlnx,axi-vdma-mm2s-channel")) { + chan->direction = DMA_MEM_TO_DEV; + chan->id = 0; + + chan->ctrl_offset = XILINX_VDMA_MM2S_CTRL_OFFSET; + chan->desc_offset = XILINX_VDMA_MM2S_DESC_OFFSET; + + if (xdev->flush_on_fsync == XILINX_VDMA_FLUSH_BOTH || + xdev->flush_on_fsync == XILINX_VDMA_FLUSH_MM2S) + chan->flush_on_fsync = true; + } else if (of_device_is_compatible(node, + "xlnx,axi-vdma-s2mm-channel")) { + chan->direction = DMA_DEV_TO_MEM; + chan->id = 1; + + chan->ctrl_offset = XILINX_VDMA_S2MM_CTRL_OFFSET; + chan->desc_offset = XILINX_VDMA_S2MM_DESC_OFFSET; + + if (xdev->flush_on_fsync == XILINX_VDMA_FLUSH_BOTH || + xdev->flush_on_fsync == XILINX_VDMA_FLUSH_S2MM) + chan->flush_on_fsync = true; + } else { + dev_err(xdev->dev, "Invalid channel compatible node\n"); + return -EINVAL; + } + + /* Request the interrupt */ + chan->irq = irq_of_parse_and_map(node, 0); + err = request_irq(chan->irq, xilinx_vdma_irq_handler, IRQF_SHARED, + "xilinx-vdma-controller", chan); + if (err) { + dev_err(xdev->dev, "unable to request IRQ %d\n", chan->irq); + return err; + } + + /* Initialize the tasklet */ + tasklet_init(&chan->tasklet, xilinx_vdma_do_tasklet, + (unsigned long)chan); + + /* + * Initialize the DMA channel and add it to the DMA engine channels + * list. + */ + chan->common.device = &xdev->common; + + list_add_tail(&chan->common.device_node, &xdev->common.channels); + xdev->chan[chan->id] = chan; + + /* Reset the channel */ + err = xilinx_vdma_chan_reset(chan); + if (err < 0) { + dev_err(xdev->dev, "Reset channel failed\n"); + return err; + } + + return 0; +} + +/** + * of_dma_xilinx_xlate - Translation function + * @dma_spec: Pointer to DMA specifier as found in the device tree + * @ofdma: Pointer to DMA controller data + * + * Return: DMA channel pointer on success and NULL on error + */ +static struct dma_chan *of_dma_xilinx_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct xilinx_vdma_device *xdev = ofdma->of_dma_data; + int chan_id = dma_spec->args[0]; + + if (chan_id >= XILINX_VDMA_MAX_CHANS_PER_DEVICE) + return NULL; + + return dma_get_slave_channel(&xdev->chan[chan_id]->common); +} + +/** + * xilinx_vdma_probe - Driver probe function + * @pdev: Pointer to the platform_device structure + * + * Return: '0' on success and failure value on error + */ +static int xilinx_vdma_probe(struct platform_device *pdev) +{ + struct device_node *node = pdev->dev.of_node; + struct xilinx_vdma_device *xdev; + struct device_node *child; + struct resource *io; + u32 num_frames; + int i, err; + + /* Allocate and initialize the DMA engine structure */ + xdev = devm_kzalloc(&pdev->dev, sizeof(*xdev), GFP_KERNEL); + if (!xdev) + return -ENOMEM; + + xdev->dev = &pdev->dev; + + /* Request and map I/O memory */ + io = platform_get_resource(pdev, IORESOURCE_MEM, 0); + xdev->regs = devm_ioremap_resource(&pdev->dev, io); + if (IS_ERR(xdev->regs)) + return PTR_ERR(xdev->regs); + + /* Retrieve the DMA engine properties from the device tree */ + xdev->has_sg = of_property_read_bool(node, "xlnx,include-sg"); + + err = of_property_read_u32(node, "xlnx,num-fstores", &num_frames); + if (err < 0) { + dev_err(xdev->dev, "missing xlnx,num-fstores property\n"); + return err; + } + + err = of_property_read_u32(node, "xlnx,flush-fsync", + &xdev->flush_on_fsync); + if (err < 0) + dev_warn(xdev->dev, "missing xlnx,flush-fsync property\n"); + + /* Initialize the DMA engine */ + xdev->common.dev = &pdev->dev; + + INIT_LIST_HEAD(&xdev->common.channels); + dma_cap_set(DMA_SLAVE, xdev->common.cap_mask); + dma_cap_set(DMA_PRIVATE, xdev->common.cap_mask); + + xdev->common.device_alloc_chan_resources = + xilinx_vdma_alloc_chan_resources; + xdev->common.device_free_chan_resources = + xilinx_vdma_free_chan_resources; + xdev->common.device_prep_interleaved_dma = + xilinx_vdma_dma_prep_interleaved; + xdev->common.device_control = xilinx_vdma_device_control; + xdev->common.device_tx_status = xilinx_vdma_tx_status; + xdev->common.device_issue_pending = xilinx_vdma_issue_pending; + + platform_set_drvdata(pdev, xdev); + + /* Initialize the channels */ + for_each_child_of_node(node, child) { + err = xilinx_vdma_chan_probe(xdev, child); + if (err < 0) + goto error; + } + + for (i = 0; i < XILINX_VDMA_MAX_CHANS_PER_DEVICE; i++) + if (xdev->chan[i]) + xdev->chan[i]->num_frms = num_frames; + + /* Register the DMA engine with the core */ + dma_async_device_register(&xdev->common); + + err = of_dma_controller_register(node, of_dma_xilinx_xlate, + xdev); + if (err < 0) { + dev_err(&pdev->dev, "Unable to register DMA to DT\n"); + dma_async_device_unregister(&xdev->common); + goto error; + } + + dev_info(&pdev->dev, "Xilinx AXI VDMA Engine Driver Probed!!\n"); + + return 0; + +error: + for (i = 0; i < XILINX_VDMA_MAX_CHANS_PER_DEVICE; i++) + if (xdev->chan[i]) + xilinx_vdma_chan_remove(xdev->chan[i]); + + return err; +} + +/** + * xilinx_vdma_remove - Driver remove function + * @pdev: Pointer to the platform_device structure + * + * Return: Always '0' + */ +static int xilinx_vdma_remove(struct platform_device *pdev) +{ + struct xilinx_vdma_device *xdev = platform_get_drvdata(pdev); + int i; + + of_dma_controller_free(pdev->dev.of_node); + + dma_async_device_unregister(&xdev->common); + + for (i = 0; i < XILINX_VDMA_MAX_CHANS_PER_DEVICE; i++) + if (xdev->chan[i]) + xilinx_vdma_chan_remove(xdev->chan[i]); + + return 0; +} + +static const struct of_device_id xilinx_vdma_of_ids[] = { + { .compatible = "xlnx,axi-vdma-1.00.a",}, + {} +}; + +static struct platform_driver xilinx_vdma_driver = { + .driver = { + .name = "xilinx-vdma", + .owner = THIS_MODULE, + .of_match_table = xilinx_vdma_of_ids, + }, + .probe = xilinx_vdma_probe, + .remove = xilinx_vdma_remove, +}; + +module_platform_driver(xilinx_vdma_driver); + +MODULE_AUTHOR("Xilinx, Inc."); +MODULE_DESCRIPTION("Xilinx VDMA driver"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/amba/xilinx_dma.h b/include/linux/amba/xilinx_dma.h new file mode 100644 index 000000000000..34b98f276ed0 --- /dev/null +++ b/include/linux/amba/xilinx_dma.h @@ -0,0 +1,47 @@ +/* + * Xilinx DMA Engine drivers support header file + * + * Copyright (C) 2010-2014 Xilinx, Inc. All rights reserved. + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __DMA_XILINX_DMA_H +#define __DMA_XILINX_DMA_H + +#include +#include + +/** + * struct xilinx_vdma_config - VDMA Configuration structure + * @frm_dly: Frame delay + * @gen_lock: Whether in gen-lock mode + * @master: Master that it syncs to + * @frm_cnt_en: Enable frame count enable + * @park: Whether wants to park + * @park_frm: Frame to park on + * @coalesc: Interrupt coalescing threshold + * @delay: Delay counter + * @reset: Reset Channel + * @ext_fsync: External Frame Sync source + */ +struct xilinx_vdma_config { + int frm_dly; + int gen_lock; + int master; + int frm_cnt_en; + int park; + int park_frm; + int coalesc; + int delay; + int reset; + int ext_fsync; +}; + +int xilinx_vdma_channel_set_config(struct dma_chan *dchan, + struct xilinx_vdma_config *cfg); + +#endif -- cgit v1.2.3-71-gd317 From 7fa857ed041537ee6cbc7ee4ab0204a1231cfcb9 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 28 Apr 2014 11:14:27 -0700 Subject: net: dsa: add ds_to_priv DSA drivers have a trick which consists in allocating "priv_size" more bytes to account for the DSA driver private context. Add a helper function to access that private context instead of open-coding it in drivers with (void *)(ds + 1). Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 7828ebf99ee1..6efce384451e 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -181,6 +181,11 @@ struct dsa_switch_driver { void register_switch_driver(struct dsa_switch_driver *type); void unregister_switch_driver(struct dsa_switch_driver *type); +static inline void *ds_to_priv(struct dsa_switch *ds) +{ + return (void *)(ds + 1); +} + /* * The original DSA tag format and some other tag formats have no * ethertype, which means that we need to add a little hack to the -- cgit v1.2.3-71-gd317 From 5c98631cca574ac6255885cf372f6bcf9dcfd483 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Tue, 29 Apr 2014 11:57:34 +0900 Subject: net: ipv6: Introduce ip6_sk_dst_hoplimit. This replaces 6 identical code snippets with a call to a new static inline function. Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- include/net/addrconf.h | 5 ----- include/net/ipv6.h | 19 +++++++++++++++++++ net/ipv6/icmp.c | 14 ++------------ net/ipv6/ip6_flowlabel.c | 1 - net/ipv6/ping.c | 7 +------ net/ipv6/raw.c | 10 ++-------- net/ipv6/udp.c | 10 ++-------- net/l2tp/l2tp_ip6.c | 10 ++-------- 8 files changed, 28 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 933a9f22a05f..f679877bb601 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -306,11 +306,6 @@ static inline void addrconf_addr_solict_mult(const struct in6_addr *addr, htonl(0xFF000000) | addr->s6_addr32[3]); } -static inline bool ipv6_addr_is_multicast(const struct in6_addr *addr) -{ - return (addr->s6_addr32[0] & htonl(0xFF000000)) == htonl(0xFF000000); -} - static inline bool ipv6_addr_is_ll_all_nodes(const struct in6_addr *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 diff --git a/include/net/ipv6.h b/include/net/ipv6.h index d640925bc454..5b40ad297b8c 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -583,6 +583,11 @@ static inline bool ipv6_addr_orchid(const struct in6_addr *a) return (a->s6_addr32[0] & htonl(0xfffffff0)) == htonl(0x20010010); } +static inline bool ipv6_addr_is_multicast(const struct in6_addr *addr) +{ + return (addr->s6_addr32[0] & htonl(0xFF000000)) == htonl(0xFF000000); +} + static inline void ipv6_addr_set_v4mapped(const __be32 addr, struct in6_addr *v4mapped) { @@ -664,6 +669,20 @@ void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt); int ip6_dst_hoplimit(struct dst_entry *dst); +static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6, + struct dst_entry *dst) +{ + int hlimit; + + if (ipv6_addr_is_multicast(&fl6->daddr)) + hlimit = np->mcast_hops; + else + hlimit = np->hop_limit; + if (hlimit < 0) + hlimit = ip6_dst_hoplimit(dst); + return hlimit; +} + /* * Header manipulation */ diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 7b326529e6a2..3b0905b77127 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -493,12 +493,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) if (IS_ERR(dst)) goto out; - if (ipv6_addr_is_multicast(&fl6.daddr)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); + hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); msg.skb = skb; msg.offset = skb_network_offset(skb); @@ -593,12 +588,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (IS_ERR(dst)) goto out; - if (ipv6_addr_is_multicast(&fl6.daddr)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); + hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); idev = __in6_dev_get(skb->dev); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 0961b5ef866d..4052694c6f2c 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -26,7 +26,6 @@ #include #include -#include #include #include diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index bda74291c3e0..a2a1d80dfe0c 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -168,12 +168,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, pfh.wcheck = 0; pfh.family = AF_INET6; - if (ipv6_addr_is_multicast(&fl6.daddr)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); + hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); lock_sock(sk); err = ip6_append_data(sk, ping_getfrag, &pfh, len, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 1f29996e368a..dddfb5fa2b7a 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -873,14 +873,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, err = PTR_ERR(dst); goto out; } - if (hlimit < 0) { - if (ipv6_addr_is_multicast(&fl6.daddr)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); - } + if (hlimit < 0) + hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); if (tclass < 0) tclass = np->tclass; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1e586d92260e..d8d6ca04bb9d 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1232,14 +1232,8 @@ do_udp_sendmsg: goto out; } - if (hlimit < 0) { - if (ipv6_addr_is_multicast(&fl6.daddr)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); - } + if (hlimit < 0) + hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); if (tclass < 0) tclass = np->tclass; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 7704ea9502fd..e472d44a3b91 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -605,14 +605,8 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk, goto out; } - if (hlimit < 0) { - if (ipv6_addr_is_multicast(&fl6.daddr)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); - } + if (hlimit < 0) + hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); if (tclass < 0) tclass = np->tclass; -- cgit v1.2.3-71-gd317 From b87577b7c768683736eea28f70779e8c75b4df62 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 1 May 2014 09:26:53 +1000 Subject: drm: try harder to avoid regression when merging mode bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For QXL hw we really want the bits to be replaced as we change the preferred mode on the fly, and the same goes for virgl when I get to it, however the original fix for this seems to have caused a wierd regression on Intel G33 that in a stunning display of failure at opposition to his normal self, Daniel failed to diagnose. So we are left doing this, ugly ugly ugly ugly, Daniel you fixed that G33 yet?, ugly, ugly. Tested-by: Marc-André Lureau Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_modes.c | 9 ++++-- drivers/gpu/drm/drm_probe_helper.c | 64 +++++++++++++++++++++++++------------ drivers/gpu/drm/qxl/qxl_display.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 2 +- include/drm/drm_crtc_helper.h | 4 +++ include/drm/drm_modes.h | 2 +- 6 files changed, 57 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 8b410576fce4..bedf1894e17e 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -1013,6 +1013,7 @@ EXPORT_SYMBOL(drm_mode_sort); /** * drm_mode_connector_list_update - update the mode list for the connector * @connector: the connector to update + * @merge_type_bits: whether to merge or overright type bits. * * This moves the modes from the @connector probed_modes list * to the actual mode list. It compares the probed mode against the current @@ -1021,7 +1022,8 @@ EXPORT_SYMBOL(drm_mode_sort); * This is just a helper functions doesn't validate any modes itself and also * doesn't prune any invalid modes. Callers need to do that themselves. */ -void drm_mode_connector_list_update(struct drm_connector *connector) +void drm_mode_connector_list_update(struct drm_connector *connector, + bool merge_type_bits) { struct drm_display_mode *mode; struct drm_display_mode *pmode, *pt; @@ -1039,7 +1041,10 @@ void drm_mode_connector_list_update(struct drm_connector *connector) /* if equal delete the probed mode */ mode->status = pmode->status; /* Merge type bits together */ - mode->type |= pmode->type; + if (merge_type_bits) + mode->type |= pmode->type; + else + mode->type = pmode->type; list_del(&pmode->head); drm_mode_destroy(connector->dev, pmode); break; diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index e70f54d4a581..8afdd0998a8c 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -82,26 +82,8 @@ static void drm_mode_validate_flag(struct drm_connector *connector, return; } -/** - * drm_helper_probe_single_connector_modes - get complete set of display modes - * @connector: connector to probe - * @maxX: max width for modes - * @maxY: max height for modes - * - * Based on the helper callbacks implemented by @connector try to detect all - * valid modes. Modes will first be added to the connector's probed_modes list, - * then culled (based on validity and the @maxX, @maxY parameters) and put into - * the normal modes list. - * - * Intended to be use as a generic implementation of the ->fill_modes() - * @connector vfunc for drivers that use the crtc helpers for output mode - * filtering and detection. - * - * Returns: - * The number of modes found on @connector. - */ -int drm_helper_probe_single_connector_modes(struct drm_connector *connector, - uint32_t maxX, uint32_t maxY) +static int drm_helper_probe_single_connector_modes_merge_bits(struct drm_connector *connector, + uint32_t maxX, uint32_t maxY, bool merge_type_bits) { struct drm_device *dev = connector->dev; struct drm_display_mode *mode; @@ -155,7 +137,7 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector, if (count == 0) goto prune; - drm_mode_connector_list_update(connector); + drm_mode_connector_list_update(connector, merge_type_bits); if (maxX && maxY) drm_mode_validate_size(dev, &connector->modes, maxX, maxY); @@ -194,8 +176,48 @@ prune: return count; } + +/** + * drm_helper_probe_single_connector_modes - get complete set of display modes + * @connector: connector to probe + * @maxX: max width for modes + * @maxY: max height for modes + * + * Based on the helper callbacks implemented by @connector try to detect all + * valid modes. Modes will first be added to the connector's probed_modes list, + * then culled (based on validity and the @maxX, @maxY parameters) and put into + * the normal modes list. + * + * Intended to be use as a generic implementation of the ->fill_modes() + * @connector vfunc for drivers that use the crtc helpers for output mode + * filtering and detection. + * + * Returns: + * The number of modes found on @connector. + */ +int drm_helper_probe_single_connector_modes(struct drm_connector *connector, + uint32_t maxX, uint32_t maxY) +{ + return drm_helper_probe_single_connector_modes_merge_bits(connector, maxX, maxY, true); +} EXPORT_SYMBOL(drm_helper_probe_single_connector_modes); +/** + * drm_helper_probe_single_connector_modes_nomerge - get complete set of display modes + * @connector: connector to probe + * @maxX: max width for modes + * @maxY: max height for modes + * + * This operates like drm_hehlper_probe_single_connector_modes except it + * replaces the mode bits instead of merging them for preferred modes. + */ +int drm_helper_probe_single_connector_modes_nomerge(struct drm_connector *connector, + uint32_t maxX, uint32_t maxY) +{ + return drm_helper_probe_single_connector_modes_merge_bits(connector, maxX, maxY, false); +} +EXPORT_SYMBOL(drm_helper_probe_single_connector_modes_nomerge); + /** * drm_kms_helper_hotplug_event - fire off KMS hotplug events * @dev: drm_device whose connector state changed diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 41bdd174657e..3ab9072d3623 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -841,7 +841,7 @@ static const struct drm_connector_funcs qxl_connector_funcs = { .save = qxl_conn_save, .restore = qxl_conn_restore, .detect = qxl_conn_detect, - .fill_modes = drm_helper_probe_single_connector_modes, + .fill_modes = drm_helper_probe_single_connector_modes_nomerge, .set_property = qxl_conn_set_property, .destroy = qxl_conn_destroy, }; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index a2dde5ad8138..e7199b454ca0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -2001,7 +2001,7 @@ int vmw_du_connector_fill_modes(struct drm_connector *connector, if (du->pref_mode) list_move(&du->pref_mode->head, &connector->probed_modes); - drm_mode_connector_list_update(connector); + drm_mode_connector_list_update(connector, true); return 1; } diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h index 36a5febac2a6..f51c8393e9a8 100644 --- a/include/drm/drm_crtc_helper.h +++ b/include/drm/drm_crtc_helper.h @@ -165,6 +165,10 @@ extern void drm_helper_resume_force_mode(struct drm_device *dev); extern int drm_helper_probe_single_connector_modes(struct drm_connector *connector, uint32_t maxX, uint32_t maxY); +extern int drm_helper_probe_single_connector_modes_nomerge(struct drm_connector + *connector, + uint32_t maxX, + uint32_t maxY); extern void drm_kms_helper_poll_init(struct drm_device *dev); extern void drm_kms_helper_poll_fini(struct drm_device *dev); extern bool drm_helper_hpd_irq_event(struct drm_device *dev); diff --git a/include/drm/drm_modes.h b/include/drm/drm_modes.h index 2dbbf9976669..91d0582f924e 100644 --- a/include/drm/drm_modes.h +++ b/include/drm/drm_modes.h @@ -223,7 +223,7 @@ void drm_mode_validate_size(struct drm_device *dev, void drm_mode_prune_invalid(struct drm_device *dev, struct list_head *mode_list, bool verbose); void drm_mode_sort(struct list_head *mode_list); -void drm_mode_connector_list_update(struct drm_connector *connector); +void drm_mode_connector_list_update(struct drm_connector *connector, bool merge_type_bits); /* parsing cmdline modes */ bool -- cgit v1.2.3-71-gd317 From dfbb85cab5f0819d0424a3637b03e7892704fa42 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 2 Apr 2014 20:17:00 -0700 Subject: DMA: shdma: add cyclic transfer support This patch add cyclic transfer support and enables dmaengine_prep_dma_cyclic() Signed-off-by: Kuninori Morimoto [reflown changelog for readablity] Signed-off-by: Vinod Koul --- drivers/dma/sh/shdma-base.c | 72 ++++++++++++++++++++++++++++++++++++++++----- include/linux/shdma-base.h | 1 + 2 files changed, 66 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c index 6786ecbd5ed4..974794cdb6ed 100644 --- a/drivers/dma/sh/shdma-base.c +++ b/drivers/dma/sh/shdma-base.c @@ -304,6 +304,7 @@ static dma_async_tx_callback __ld_cleanup(struct shdma_chan *schan, bool all) dma_async_tx_callback callback = NULL; void *param = NULL; unsigned long flags; + LIST_HEAD(cyclic_list); spin_lock_irqsave(&schan->chan_lock, flags); list_for_each_entry_safe(desc, _desc, &schan->ld_queue, node) { @@ -369,10 +370,16 @@ static dma_async_tx_callback __ld_cleanup(struct shdma_chan *schan, bool all) if (((desc->mark == DESC_COMPLETED || desc->mark == DESC_WAITING) && async_tx_test_ack(&desc->async_tx)) || all) { - /* Remove from ld_queue list */ - desc->mark = DESC_IDLE; - list_move(&desc->node, &schan->ld_free); + if (all || !desc->cyclic) { + /* Remove from ld_queue list */ + desc->mark = DESC_IDLE; + list_move(&desc->node, &schan->ld_free); + } else { + /* reuse as cyclic */ + desc->mark = DESC_SUBMITTED; + list_move_tail(&desc->node, &cyclic_list); + } if (list_empty(&schan->ld_queue)) { dev_dbg(schan->dev, "Bring down channel %d\n", schan->id); @@ -389,6 +396,8 @@ static dma_async_tx_callback __ld_cleanup(struct shdma_chan *schan, bool all) */ schan->dma_chan.completed_cookie = schan->dma_chan.cookie; + list_splice_tail(&cyclic_list, &schan->ld_queue); + spin_unlock_irqrestore(&schan->chan_lock, flags); if (callback) @@ -521,7 +530,7 @@ static struct shdma_desc *shdma_add_desc(struct shdma_chan *schan, */ static struct dma_async_tx_descriptor *shdma_prep_sg(struct shdma_chan *schan, struct scatterlist *sgl, unsigned int sg_len, dma_addr_t *addr, - enum dma_transfer_direction direction, unsigned long flags) + enum dma_transfer_direction direction, unsigned long flags, bool cyclic) { struct scatterlist *sg; struct shdma_desc *first = NULL, *new = NULL /* compiler... */; @@ -569,7 +578,11 @@ static struct dma_async_tx_descriptor *shdma_prep_sg(struct shdma_chan *schan, if (!new) goto err_get_desc; - new->chunks = chunks--; + new->cyclic = cyclic; + if (cyclic) + new->chunks = 1; + else + new->chunks = chunks--; list_add_tail(&new->node, &tx_list); } while (len); } @@ -612,7 +625,8 @@ static struct dma_async_tx_descriptor *shdma_prep_memcpy( sg_dma_address(&sg) = dma_src; sg_dma_len(&sg) = len; - return shdma_prep_sg(schan, &sg, 1, &dma_dest, DMA_MEM_TO_MEM, flags); + return shdma_prep_sg(schan, &sg, 1, &dma_dest, DMA_MEM_TO_MEM, + flags, false); } static struct dma_async_tx_descriptor *shdma_prep_slave_sg( @@ -640,7 +654,50 @@ static struct dma_async_tx_descriptor *shdma_prep_slave_sg( slave_addr = ops->slave_addr(schan); return shdma_prep_sg(schan, sgl, sg_len, &slave_addr, - direction, flags); + direction, flags, false); +} + +struct dma_async_tx_descriptor *shdma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct shdma_chan *schan = to_shdma_chan(chan); + struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device); + const struct shdma_ops *ops = sdev->ops; + unsigned int sg_len = buf_len / period_len; + int slave_id = schan->slave_id; + dma_addr_t slave_addr; + struct scatterlist sgl[sg_len]; + int i; + + if (!chan) + return NULL; + + BUG_ON(!schan->desc_num); + + /* Someone calling slave DMA on a generic channel? */ + if (slave_id < 0 || (buf_len < period_len)) { + dev_warn(schan->dev, + "%s: bad parameter: buf_len=%d, period_len=%d, id=%d\n", + __func__, buf_len, period_len, slave_id); + return NULL; + } + + slave_addr = ops->slave_addr(schan); + + sg_init_table(sgl, sg_len); + for (i = 0; i < sg_len; i++) { + dma_addr_t src = buf_addr + (period_len * i); + + sg_set_page(&sgl[i], pfn_to_page(PFN_DOWN(src)), period_len, + offset_in_page(src)); + sg_dma_address(&sgl[i]) = src; + sg_dma_len(&sgl[i]) = period_len; + } + + return shdma_prep_sg(schan, sgl, sg_len, &slave_addr, + direction, flags, true); } static int shdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, @@ -915,6 +972,7 @@ int shdma_init(struct device *dev, struct shdma_dev *sdev, /* Compulsory for DMA_SLAVE fields */ dma_dev->device_prep_slave_sg = shdma_prep_slave_sg; + dma_dev->device_prep_dma_cyclic = shdma_prep_dma_cyclic; dma_dev->device_control = shdma_control; dma_dev->dev = dev; diff --git a/include/linux/shdma-base.h b/include/linux/shdma-base.h index f92c0a43c54c..abdf1f229dc3 100644 --- a/include/linux/shdma-base.h +++ b/include/linux/shdma-base.h @@ -54,6 +54,7 @@ struct shdma_desc { dma_cookie_t cookie; int chunks; int mark; + bool cyclic; /* used as cyclic transfer */ }; struct shdma_chan { -- cgit v1.2.3-71-gd317 From 6d48f44b7b2af67b33c1ae5994b8f642685c8bc8 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Wed, 30 Apr 2014 15:23:33 +0300 Subject: mdio_bus: implement devm_mdiobus_alloc/devm_mdiobus_free Add a resource managed devm_mdiobus_alloc[_size]()/devm_mdiobus_free() to automatically clean up MDIO bus alocations made by MDIO drivers, thus leading to simplified MDIO drivers code. Cc: Florian Fainelli Cc: Sergei Shtylyov Acked-and-tested-by: Lad, Prabhakar Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller --- Documentation/driver-model/devres.txt | 5 +++ drivers/net/phy/mdio_bus.c | 67 +++++++++++++++++++++++++++++++++++ include/linux/phy.h | 7 ++++ 3 files changed, 79 insertions(+) (limited to 'include') diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt index 4f7897e99cba..c74e04494ade 100644 --- a/Documentation/driver-model/devres.txt +++ b/Documentation/driver-model/devres.txt @@ -308,3 +308,8 @@ SLAVE DMA ENGINE SPI devm_spi_register_master() + +MDIO + devm_mdiobus_alloc() + devm_mdiobus_alloc_size() + devm_mdiobus_free() diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 76f54b32a120..68a9a3867c0f 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -69,6 +69,73 @@ struct mii_bus *mdiobus_alloc_size(size_t size) } EXPORT_SYMBOL(mdiobus_alloc_size); +static void _devm_mdiobus_free(struct device *dev, void *res) +{ + mdiobus_free(*(struct mii_bus **)res); +} + +static int devm_mdiobus_match(struct device *dev, void *res, void *data) +{ + struct mii_bus **r = res; + + if (WARN_ON(!r || !*r)) + return 0; + + return *r == data; +} + +/** + * devm_mdiobus_alloc_size - Resource-managed mdiobus_alloc_size() + * @dev: Device to allocate mii_bus for + * @sizeof_priv: Space to allocate for private structure. + * + * Managed mdiobus_alloc_size. mii_bus allocated with this function is + * automatically freed on driver detach. + * + * If an mii_bus allocated with this function needs to be freed separately, + * devm_mdiobus_free() must be used. + * + * RETURNS: + * Pointer to allocated mii_bus on success, NULL on failure. + */ +struct mii_bus *devm_mdiobus_alloc_size(struct device *dev, int sizeof_priv) +{ + struct mii_bus **ptr, *bus; + + ptr = devres_alloc(_devm_mdiobus_free, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return NULL; + + /* use raw alloc_dr for kmalloc caller tracing */ + bus = mdiobus_alloc_size(sizeof_priv); + if (bus) { + *ptr = bus; + devres_add(dev, ptr); + } else { + devres_free(ptr); + } + + return bus; +} +EXPORT_SYMBOL_GPL(devm_mdiobus_alloc); + +/** + * devm_mdiobus_free - Resource-managed mdiobus_free() + * @dev: Device this mii_bus belongs to + * @bus: the mii_bus associated with the device + * + * Free mii_bus allocated with devm_mdiobus_alloc_size(). + */ +void devm_mdiobus_free(struct device *dev, struct mii_bus *bus) +{ + int rc; + + rc = devres_release(dev, _devm_mdiobus_free, + devm_mdiobus_match, bus); + WARN_ON(rc); +} +EXPORT_SYMBOL_GPL(devm_mdiobus_free); + /** * mdiobus_release - mii_bus device release callback * @d: the target struct device that contains the mii_bus diff --git a/include/linux/phy.h b/include/linux/phy.h index 51d15f684e7e..864ddafad8cc 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -198,6 +198,13 @@ static inline struct mii_bus *mdiobus_alloc(void) int mdiobus_register(struct mii_bus *bus); void mdiobus_unregister(struct mii_bus *bus); void mdiobus_free(struct mii_bus *bus); +struct mii_bus *devm_mdiobus_alloc_size(struct device *dev, int sizeof_priv); +static inline struct mii_bus *devm_mdiobus_alloc(struct device *dev) +{ + return devm_mdiobus_alloc_size(dev, 0); +} + +void devm_mdiobus_free(struct device *dev, struct mii_bus *bus); struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); -- cgit v1.2.3-71-gd317 From e114a710aa5058c0ba4aa1dfb105132aefeb5e04 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 30 Apr 2014 11:58:13 -0700 Subject: tcp: fix cwnd limited checking to improve congestion control Yuchung discovered tcp_is_cwnd_limited() was returning false in slow start phase even if the application filled the socket write queue. All congestion modules take into account tcp_is_cwnd_limited() before increasing cwnd, so this behavior limits slow start from probing the bandwidth at full speed. The problem is that even if write queue is full (aka we are _not_ application limited), cwnd can be under utilized if TSO should auto defer or TCP Small queues decided to hold packets. So the in_flight can be kept to smaller value, and we can get to the point tcp_is_cwnd_limited() returns false. With TCP Small Queues and FQ/pacing, this issue is more visible. We fix this by having tcp_cwnd_validate(), which is supposed to track such things, take into account unsent_segs, the number of segs that we are not sending at the moment due to TSO or TSQ, but intend to send real soon. Then when we are cwnd-limited, remember this fact while we are processing the window of ACKs that comes back. For example, suppose we have a brand new connection with cwnd=10; we are in slow start, and we send a flight of 9 packets. By the time we have received ACKs for all 9 packets we want our cwnd to be 18. We implement this by setting tp->lsnd_pending to 9, and considering ourselves to be cwnd-limited while cwnd is less than twice tp->lsnd_pending (2*9 -> 18). This makes tcp_is_cwnd_limited() more understandable, by removing the GSO/TSO kludge, that tried to work around the issue. Note the in_flight parameter can be removed in a followup cleanup patch. Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + include/net/tcp.h | 22 +++++++++++++++++++++- net/ipv4/tcp_cong.c | 20 -------------------- net/ipv4/tcp_output.c | 21 ++++++++++++++------- 4 files changed, 36 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 239946868142..4e37c71ecd74 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -230,6 +230,7 @@ struct tcp_sock { u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ u32 snd_cwnd_used; u32 snd_cwnd_stamp; + u32 lsnd_pending; /* packets inflight or unsent since last xmit */ u32 prior_cwnd; /* Congestion window at start of Recovery. */ u32 prr_delivered; /* Number of newly delivered packets to * receiver in Recovery. */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 163d2b467d78..a9fe7bc4f4bb 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -974,7 +974,27 @@ static inline u32 tcp_wnd_end(const struct tcp_sock *tp) { return tp->snd_una + tp->snd_wnd; } -bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight); + +/* We follow the spirit of RFC2861 to validate cwnd but implement a more + * flexible approach. The RFC suggests cwnd should not be raised unless + * it was fully used previously. But we allow cwnd to grow as long as the + * application has used half the cwnd. + * Example : + * cwnd is 10 (IW10), but application sends 9 frames. + * We allow cwnd to reach 18 when all frames are ACKed. + * This check is safe because it's as aggressive as slow start which already + * risks 100% overshoot. The advantage is that we discourage application to + * either send more filler packets or data to artificially blow up the cwnd + * usage, and allow application-limited process to probe bw more aggressively. + * + * TODO: remove in_flight once we can fix all callers, and their callers... + */ +static inline bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) +{ + const struct tcp_sock *tp = tcp_sk(sk); + + return tp->snd_cwnd < 2 * tp->lsnd_pending; +} static inline void tcp_check_probe_timer(struct sock *sk) { diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 2b9464c93b88..a93b41ba05ff 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -276,26 +276,6 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) return err; } -/* RFC2861 Check whether we are limited by application or congestion window - * This is the inverse of cwnd check in tcp_tso_should_defer - */ -bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) -{ - const struct tcp_sock *tp = tcp_sk(sk); - u32 left; - - if (in_flight >= tp->snd_cwnd) - return true; - - left = tp->snd_cwnd - in_flight; - if (sk_can_gso(sk) && - left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && - left < tp->xmit_size_goal_segs) - return true; - return left <= tcp_max_tso_deferred_mss(tp); -} -EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited); - /* Slow start is used when congestion window is no greater than the slow start * threshold. We base on RFC2581 and also handle stretch ACKs properly. * We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 20847de991ea..f9181a133462 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1402,12 +1402,13 @@ static void tcp_cwnd_application_limited(struct sock *sk) tp->snd_cwnd_stamp = tcp_time_stamp; } -/* Congestion window validation. (RFC2861) */ -static void tcp_cwnd_validate(struct sock *sk) +static void tcp_cwnd_validate(struct sock *sk, u32 unsent_segs) { struct tcp_sock *tp = tcp_sk(sk); - if (tp->packets_out >= tp->snd_cwnd) { + tp->lsnd_pending = tp->packets_out + unsent_segs; + + if (tcp_is_cwnd_limited(sk, 0)) { /* Network is feed fully. */ tp->snd_cwnd_used = 0; tp->snd_cwnd_stamp = tcp_time_stamp; @@ -1880,7 +1881,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - unsigned int tso_segs, sent_pkts; + unsigned int tso_segs, sent_pkts, unsent_segs = 0; int cwnd_quota; int result; @@ -1924,7 +1925,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, break; } else { if (!push_one && tcp_tso_should_defer(sk, skb)) - break; + goto compute_unsent_segs; } /* TCP Small Queues : @@ -1949,8 +1950,14 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, * there is no smp_mb__after_set_bit() yet */ smp_mb__after_clear_bit(); - if (atomic_read(&sk->sk_wmem_alloc) > limit) + if (atomic_read(&sk->sk_wmem_alloc) > limit) { + u32 unsent_bytes; + +compute_unsent_segs: + unsent_bytes = tp->write_seq - tp->snd_nxt; + unsent_segs = DIV_ROUND_UP(unsent_bytes, mss_now); break; + } } limit = mss_now; @@ -1990,7 +1997,7 @@ repair: /* Send one loss probe per tail loss episode. */ if (push_one != 2) tcp_schedule_loss_probe(sk); - tcp_cwnd_validate(sk); + tcp_cwnd_validate(sk, unsent_segs); return false; } return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk)); -- cgit v1.2.3-71-gd317 From 249015515fe3fc9818d86cb5c83bbc92505ad7dc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 2 May 2014 21:18:05 -0700 Subject: tcp: remove in_flight parameter from cong_avoid() methods Commit e114a710aa505 ("tcp: fix cwnd limited checking to improve congestion control") obsoleted in_flight parameter from tcp_is_cwnd_limited() and its callers. This patch does the removal as promised. Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 8 +++----- net/ipv4/tcp_bic.c | 5 ++--- net/ipv4/tcp_cong.c | 4 ++-- net/ipv4/tcp_cubic.c | 5 ++--- net/ipv4/tcp_highspeed.c | 4 ++-- net/ipv4/tcp_htcp.c | 4 ++-- net/ipv4/tcp_hybla.c | 7 +++---- net/ipv4/tcp_illinois.c | 5 ++--- net/ipv4/tcp_input.c | 9 ++++----- net/ipv4/tcp_lp.c | 5 ++--- net/ipv4/tcp_output.c | 2 +- net/ipv4/tcp_scalable.c | 5 ++--- net/ipv4/tcp_vegas.c | 7 +++---- net/ipv4/tcp_veno.c | 9 ++++----- net/ipv4/tcp_yeah.c | 5 ++--- 15 files changed, 36 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index a9fe7bc4f4bb..3c9418456640 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -796,7 +796,7 @@ struct tcp_congestion_ops { /* return slow start threshold (required) */ u32 (*ssthresh)(struct sock *sk); /* do new cwnd calculation (required) */ - void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked, u32 in_flight); + void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked); /* call before changing ca_state (optional) */ void (*set_state)(struct sock *sk, u8 new_state); /* call when cwnd event occurs (optional) */ @@ -828,7 +828,7 @@ void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w); extern struct tcp_congestion_ops tcp_init_congestion_ops; u32 tcp_reno_ssthresh(struct sock *sk); -void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight); +void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); extern struct tcp_congestion_ops tcp_reno; static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state) @@ -986,10 +986,8 @@ static inline u32 tcp_wnd_end(const struct tcp_sock *tp) * risks 100% overshoot. The advantage is that we discourage application to * either send more filler packets or data to artificially blow up the cwnd * usage, and allow application-limited process to probe bw more aggressively. - * - * TODO: remove in_flight once we can fix all callers, and their callers... */ -static inline bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) +static inline bool tcp_is_cwnd_limited(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 821846fb0a7e..d5de69bc04f5 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -140,13 +140,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) ca->cnt = 1; } -static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; if (tp->snd_cwnd <= tp->snd_ssthresh) diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index a93b41ba05ff..7b09d8b49fa5 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -317,11 +317,11 @@ EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai); /* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */ -void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) +void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; /* In "safe" area, increase. */ diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 8bf224516ba2..ba2a4f3a6a1e 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -304,13 +304,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) ca->cnt = 1; } -static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; if (tp->snd_cwnd <= tp->snd_ssthresh) { diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 8b9e7bad77c0..1c4908280d92 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -109,12 +109,12 @@ static void hstcp_init(struct sock *sk) tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); } -static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) +static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct hstcp *ca = inet_csk_ca(sk); - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; if (tp->snd_cwnd <= tp->snd_ssthresh) diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 4a194acfd923..031361311a8b 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -227,12 +227,12 @@ static u32 htcp_recalc_ssthresh(struct sock *sk) return max((tp->snd_cwnd * ca->beta) >> 7, 2U); } -static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) +static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct htcp *ca = inet_csk_ca(sk); - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; if (tp->snd_cwnd <= tp->snd_ssthresh) diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index a15a799bf768..d8f8f05a4951 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -87,8 +87,7 @@ static inline u32 hybla_fraction(u32 odds) * o Give cwnd a new value based on the model proposed * o remember increments <1 */ -static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct hybla *ca = inet_csk_ca(sk); @@ -101,11 +100,11 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked, ca->minrtt_us = tp->srtt_us; } - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; if (!ca->hybla_en) { - tcp_reno_cong_avoid(sk, ack, acked, in_flight); + tcp_reno_cong_avoid(sk, ack, acked); return; } diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 863d105e3015..5999b3972e64 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -255,8 +255,7 @@ static void tcp_illinois_state(struct sock *sk, u8 new_state) /* * Increase window in response to successful acknowledgment. */ -static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct illinois *ca = inet_csk_ca(sk); @@ -265,7 +264,7 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked, update_params(sk); /* RFC2861 only increase cwnd if fully utilized */ - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; /* In slow start */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6efed134ab63..350b2072f0ab 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2938,10 +2938,11 @@ static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp) tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L); } -static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) +static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { const struct inet_connection_sock *icsk = inet_csk(sk); - icsk->icsk_ca_ops->cong_avoid(sk, ack, acked, in_flight); + + icsk->icsk_ca_ops->cong_avoid(sk, ack, acked); tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp; } @@ -3364,7 +3365,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; bool is_dupack = false; - u32 prior_in_flight; u32 prior_fackets; int prior_packets = tp->packets_out; const int prior_unsacked = tp->packets_out - tp->sacked_out; @@ -3397,7 +3397,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) flag |= FLAG_SND_UNA_ADVANCED; prior_fackets = tp->fackets_out; - prior_in_flight = tcp_packets_in_flight(tp); /* ts_recent update must be made after we are sure that the packet * is in window. @@ -3452,7 +3451,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) /* Advance cwnd if state allows */ if (tcp_may_raise_cwnd(sk, flag)) - tcp_cong_avoid(sk, ack, acked, prior_in_flight); + tcp_cong_avoid(sk, ack, acked); if (tcp_ack_is_dubious(sk, flag)) { is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index c9aecae31327..1e70fa8fa793 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -115,13 +115,12 @@ static void tcp_lp_init(struct sock *sk) * Will only call newReno CA when away from inference. * From TCP-LP's paper, this will be handled in additive increasement. */ -static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct lp *lp = inet_csk_ca(sk); if (!(lp->flag & LP_WITHIN_INF)) - tcp_reno_cong_avoid(sk, ack, acked, in_flight); + tcp_reno_cong_avoid(sk, ack, acked); } /** diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f9181a133462..89277a34f2c9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1408,7 +1408,7 @@ static void tcp_cwnd_validate(struct sock *sk, u32 unsent_segs) tp->lsnd_pending = tp->packets_out + unsent_segs; - if (tcp_is_cwnd_limited(sk, 0)) { + if (tcp_is_cwnd_limited(sk)) { /* Network is feed fully. */ tp->snd_cwnd_used = 0; tp->snd_cwnd_stamp = tcp_time_stamp; diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 0ac50836da4d..8250949b8853 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -15,12 +15,11 @@ #define TCP_SCALABLE_AI_CNT 50U #define TCP_SCALABLE_MD_SCALE 3 -static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; if (tp->snd_cwnd <= tp->snd_ssthresh) diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 48539fff6357..9a5e05f27f4f 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -163,14 +163,13 @@ static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) return min(tp->snd_ssthresh, tp->snd_cwnd-1); } -static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct vegas *vegas = inet_csk_ca(sk); if (!vegas->doing_vegas_now) { - tcp_reno_cong_avoid(sk, ack, acked, in_flight); + tcp_reno_cong_avoid(sk, ack, acked); return; } @@ -195,7 +194,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked, /* We don't have enough RTT samples to do the Vegas * calculation, so we'll behave like Reno. */ - tcp_reno_cong_avoid(sk, ack, acked, in_flight); + tcp_reno_cong_avoid(sk, ack, acked); } else { u32 rtt, diff; u64 target_cwnd; diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 1b8e28fcd7e1..27b9825753d1 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -114,19 +114,18 @@ static void tcp_veno_cwnd_event(struct sock *sk, enum tcp_ca_event event) tcp_veno_init(sk); } -static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct veno *veno = inet_csk_ca(sk); if (!veno->doing_veno_now) { - tcp_reno_cong_avoid(sk, ack, acked, in_flight); + tcp_reno_cong_avoid(sk, ack, acked); return; } /* limited by applications */ - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; /* We do the Veno calculations only if we got enough rtt samples */ @@ -134,7 +133,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked, /* We don't have enough rtt samples to do the Veno * calculation, so we'll behave like Reno. */ - tcp_reno_cong_avoid(sk, ack, acked, in_flight); + tcp_reno_cong_avoid(sk, ack, acked); } else { u64 target_cwnd; u32 rtt; diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 5ede0e727945..599b79b8eac0 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -69,13 +69,12 @@ static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, s32 rtt_us) tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us); } -static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct yeah *yeah = inet_csk_ca(sk); - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; if (tp->snd_cwnd <= tp->snd_ssthresh) -- cgit v1.2.3-71-gd317 From 69dfa00ccb72a37f3810687ca110e5a8154c6eed Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 4 May 2014 15:09:13 -0400 Subject: cgroup: make flags and subsys_masks unsigned int There's no reason to use atomic bitops for cgroup_subsys_state->flags, cgroup_root->flags and various subsys_masks. This patch updates those to use bitwise and/or operations instead and converts them form unsigned long to unsigned int. This makes the fields occupy (marginally) smaller space and makes it clear that they don't require atomicity. This patch doesn't cause any behavior difference. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 8 ++++---- kernel/cgroup.c | 37 ++++++++++++++++++------------------- 2 files changed, 22 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 4b38e2d6110d..c6c703f2486b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -62,7 +62,7 @@ struct cgroup_subsys_state { /* the parent css */ struct cgroup_subsys_state *parent; - unsigned long flags; + unsigned int flags; /* percpu_ref killing and RCU release */ struct rcu_head rcu_head; @@ -185,7 +185,7 @@ struct cgroup { u64 serial_nr; /* the bitmask of subsystems enabled on the child cgroups */ - unsigned long child_subsys_mask; + unsigned int child_subsys_mask; /* Private pointers for each registered subsystem */ struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; @@ -312,7 +312,7 @@ struct cgroup_root { struct kernfs_root *kf_root; /* The bitmask of subsystems attached to this hierarchy */ - unsigned long subsys_mask; + unsigned int subsys_mask; /* Unique id for this hierarchy. */ int hierarchy_id; @@ -327,7 +327,7 @@ struct cgroup_root { struct list_head root_list; /* Hierarchy-specific flags */ - unsigned long flags; + unsigned int flags; /* IDs for cgroups in this hierarchy */ struct idr cgroup_idr; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 3873267c9ee3..21667f396a1e 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -181,7 +181,7 @@ static struct cftype cgroup_base_files[]; static void cgroup_put(struct cgroup *cgrp); static int rebind_subsystems(struct cgroup_root *dst_root, - unsigned long ss_mask); + unsigned int ss_mask); static void cgroup_destroy_css_killed(struct cgroup *cgrp); static int cgroup_destroy_locked(struct cgroup *cgrp); static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss); @@ -963,7 +963,7 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task, * update of a tasks cgroup pointer by cgroup_attach_task() */ -static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask); +static int cgroup_populate_dir(struct cgroup *cgrp, unsigned int subsys_mask); static struct kernfs_syscall_ops cgroup_kf_syscall_ops; static const struct file_operations proc_cgroupstats_operations; @@ -1079,7 +1079,7 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) * @cgrp: target cgroup * @subsys_mask: mask of the subsystem ids whose files should be removed */ -static void cgroup_clear_dir(struct cgroup *cgrp, unsigned long subsys_mask) +static void cgroup_clear_dir(struct cgroup *cgrp, unsigned int subsys_mask) { struct cgroup_subsys *ss; int i; @@ -1087,15 +1087,14 @@ static void cgroup_clear_dir(struct cgroup *cgrp, unsigned long subsys_mask) for_each_subsys(ss, i) { struct cftype *cfts; - if (!test_bit(i, &subsys_mask)) + if (!(subsys_mask & (1 << i))) continue; list_for_each_entry(cfts, &ss->cfts, node) cgroup_addrm_files(cgrp, cfts, false); } } -static int rebind_subsystems(struct cgroup_root *dst_root, - unsigned long ss_mask) +static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask) { struct cgroup_subsys *ss; int ssid, i, ret; @@ -1128,7 +1127,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root, * Just warn about it and continue. */ if (cgrp_dfl_root_visible) { - pr_warn("failed to create files (%d) while rebinding 0x%lx to default root\n", + pr_warn("failed to create files (%d) while rebinding 0x%x to default root\n", ret, ss_mask); pr_warn("you may retry by moving them to a different hierarchy and unbinding\n"); } @@ -1214,8 +1213,8 @@ static int cgroup_show_options(struct seq_file *seq, } struct cgroup_sb_opts { - unsigned long subsys_mask; - unsigned long flags; + unsigned int subsys_mask; + unsigned int flags; char *release_agent; bool cpuset_clone_children; char *name; @@ -1227,12 +1226,12 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) { char *token, *o = data; bool all_ss = false, one_ss = false; - unsigned long mask = (unsigned long)-1; + unsigned int mask = -1U; struct cgroup_subsys *ss; int i; #ifdef CONFIG_CPUSETS - mask = ~(1UL << cpuset_cgrp_id); + mask = ~(1U << cpuset_cgrp_id); #endif memset(opts, 0, sizeof(*opts)); @@ -1313,7 +1312,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) /* Mutually exclusive option 'all' + subsystem name */ if (all_ss) return -EINVAL; - set_bit(i, &opts->subsys_mask); + opts->subsys_mask |= (1 << i); one_ss = true; break; @@ -1342,7 +1341,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) if (all_ss || (!one_ss && !opts->none && !opts->name)) for_each_subsys(ss, i) if (!ss->disabled) - set_bit(i, &opts->subsys_mask); + opts->subsys_mask |= (1 << i); /* * We either have to specify by name or by subsystems. (So @@ -1373,7 +1372,7 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) int ret = 0; struct cgroup_root *root = cgroup_root_from_kf(kf_root); struct cgroup_sb_opts opts; - unsigned long added_mask, removed_mask; + unsigned int added_mask, removed_mask; if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) { pr_err("sane_behavior: remount is not allowed\n"); @@ -1398,7 +1397,7 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) /* Don't allow flags or name to change at remount */ if (((opts.flags ^ root->flags) & CGRP_ROOT_OPTION_MASK) || (opts.name && strcmp(opts.name, root->name))) { - pr_err("option or name mismatch, new: 0x%lx \"%s\", old: 0x%lx \"%s\"\n", + pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n", opts.flags & CGRP_ROOT_OPTION_MASK, opts.name ?: "", root->flags & CGRP_ROOT_OPTION_MASK, root->name); ret = -EINVAL; @@ -1522,7 +1521,7 @@ static void init_cgroup_root(struct cgroup_root *root, set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); } -static int cgroup_setup_root(struct cgroup_root *root, unsigned long ss_mask) +static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask) { LIST_HEAD(tmp_links); struct cgroup *root_cgrp = &root->cgrp; @@ -2507,7 +2506,7 @@ out_finish: static int cgroup_subtree_control_write(struct cgroup_subsys_state *dummy_css, struct cftype *cft, char *buffer) { - unsigned long enable_req = 0, disable_req = 0, enable, disable; + unsigned int enable_req = 0, disable_req = 0, enable, disable; struct cgroup *cgrp = dummy_css->cgroup, *child; struct cgroup_subsys *ss; char *tok, *p; @@ -3998,7 +3997,7 @@ static struct cftype cgroup_base_files[] = { * * On failure, no file is added. */ -static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask) +static int cgroup_populate_dir(struct cgroup *cgrp, unsigned int subsys_mask) { struct cgroup_subsys *ss; int i, ret = 0; @@ -4007,7 +4006,7 @@ static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask) for_each_subsys(ss, i) { struct cftype *cfts; - if (!test_bit(i, &subsys_mask)) + if (!(subsys_mask & (1 << i))) continue; list_for_each_entry(cfts, &ss->cfts, node) { -- cgit v1.2.3-71-gd317 From 7d699ddb2b181a2c76e5ea18b1bdf102c4bebe4b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 4 May 2014 15:09:13 -0400 Subject: cgroup, memcg: allocate cgroup ID from 1 Currently, cgroup->id is allocated from 0, which is always assigned to the root cgroup; unfortunately, memcg wants to use ID 0 to indicate invalid IDs and ends up incrementing all IDs by one. It's reasonable to reserve 0 for special purposes. This patch updates cgroup core so that ID 0 is not used and the root cgroups get ID 1. The ID incrementing is removed form memcg. Signed-off-by: Tejun Heo Acked-by: Michal Hocko Cc: Johannes Weiner Acked-by: Li Zefan --- include/linux/cgroup.h | 4 ++-- kernel/cgroup.c | 4 ++-- mm/memcontrol.c | 8 ++------ 3 files changed, 6 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c6c703f2486b..793f70a48820 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -144,8 +144,8 @@ struct cgroup { /* * idr allocated in-hierarchy ID. * - * The ID of the root cgroup is always 0, and a new cgroup - * will be assigned with a smallest available ID. + * ID 0 is not used, the ID of the root cgroup is always 1, and a + * new cgroup will be assigned with a smallest available ID. * * Allocating/Removing ID must be protected by cgroup_mutex. */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 21667f396a1e..3fa0463e74bb 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1531,7 +1531,7 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask) lockdep_assert_held(&cgroup_tree_mutex); lockdep_assert_held(&cgroup_mutex); - ret = idr_alloc(&root->cgroup_idr, root_cgrp, 0, 1, GFP_KERNEL); + ret = idr_alloc(&root->cgroup_idr, root_cgrp, 1, 2, GFP_KERNEL); if (ret < 0) goto out; root_cgrp->id = ret; @@ -4225,7 +4225,7 @@ static long cgroup_create(struct cgroup *parent, const char *name, * Temporarily set the pointer to NULL, so idr_find() won't return * a half-baked cgroup. */ - cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL); + cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_KERNEL); if (cgrp->id < 0) { err = -ENOMEM; goto err_unlock; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 29501f040568..1d0b29715b73 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -527,18 +527,14 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) { - /* - * The ID of the root cgroup is 0, but memcg treat 0 as an - * invalid ID, so we return (cgroup_id + 1). - */ - return memcg->css.cgroup->id + 1; + return memcg->css.cgroup->id; } static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) { struct cgroup_subsys_state *css; - css = css_from_id(id - 1, &memory_cgrp_subsys); + css = css_from_id(id, &memory_cgrp_subsys); return mem_cgroup_from_css(css); } -- cgit v1.2.3-71-gd317 From 15a4c835e4ed3e60dd68727cd1907e3dd89563f4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 4 May 2014 15:09:14 -0400 Subject: cgroup, memcg: implement css->id and convert css_from_id() to use it Until now, cgroup->id has been used to identify all the associated csses and css_from_id() takes cgroup ID and returns the matching css by looking up the cgroup and then dereferencing the css associated with it; however, now that the lifetimes of cgroup and css are separate, this is incorrect and breaks on the unified hierarchy when a controller is disabled and enabled back again before the previous instance is released. This patch adds css->id which is a subsystem-unique ID and converts css_from_id() to look up by the new css->id instead. memcg is the only user of css_from_id() and also converted to use css->id instead. For traditional hierarchies, this shouldn't make any functional difference. Signed-off-by: Tejun Heo Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Jianyu Zhan Acked-by: Li Zefan --- include/linux/cgroup.h | 9 ++++++++ kernel/cgroup.c | 59 ++++++++++++++++++++++++++++++++------------------ mm/memcontrol.c | 4 ++-- 3 files changed, 49 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 793f70a48820..2dfabb3b749a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -62,6 +62,12 @@ struct cgroup_subsys_state { /* the parent css */ struct cgroup_subsys_state *parent; + /* + * Subsys-unique ID. 0 is unused and root is always 1. The + * matching css can be looked up using css_from_id(). + */ + int id; + unsigned int flags; /* percpu_ref killing and RCU release */ @@ -655,6 +661,9 @@ struct cgroup_subsys { /* link to parent, protected by cgroup_lock() */ struct cgroup_root *root; + /* idr for css->id */ + struct idr css_idr; + /* * List of cftypes. Each entry is the first entry of an array * terminated by zero length name. diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f1c98c527b2d..a1a20e8c973a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -100,8 +100,8 @@ static DECLARE_RWSEM(css_set_rwsem); #endif /* - * Protects cgroup_idr so that IDs can be released without grabbing - * cgroup_mutex. + * Protects cgroup_idr and css_idr so that IDs can be released without + * grabbing cgroup_mutex. */ static DEFINE_SPINLOCK(cgroup_idr_lock); @@ -1089,12 +1089,6 @@ static void cgroup_put(struct cgroup *cgrp) if (WARN_ON_ONCE(cgrp->parent && !cgroup_is_dead(cgrp))) return; - /* - * XXX: cgrp->id is only used to look up css's. As cgroup and - * css's lifetimes will be decoupled, it should be made - * per-subsystem and moved to css->id so that lookups are - * successful until the target css is released. - */ cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id); cgrp->id = -1; @@ -4104,8 +4098,11 @@ static void css_release(struct percpu_ref *ref) { struct cgroup_subsys_state *css = container_of(ref, struct cgroup_subsys_state, refcnt); + struct cgroup_subsys *ss = css->ss; + + RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL); + cgroup_idr_remove(&ss->css_idr, css->id); - RCU_INIT_POINTER(css->cgroup->subsys[css->ss->id], NULL); call_rcu(&css->rcu_head, css_free_rcu_fn); } @@ -4195,9 +4192,17 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss) if (err) goto err_free_css; + err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_NOWAIT); + if (err < 0) + goto err_free_percpu_ref; + css->id = err; + err = cgroup_populate_dir(cgrp, 1 << ss->id); if (err) - goto err_free_percpu_ref; + goto err_free_id; + + /* @css is ready to be brought online now, make it visible */ + cgroup_idr_replace(&ss->css_idr, css, css->id); err = online_css(css); if (err) @@ -4216,6 +4221,8 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss) err_clear_dir: cgroup_clear_dir(css->cgroup, 1 << css->ss->id); +err_free_id: + cgroup_idr_remove(&ss->css_idr, css->id); err_free_percpu_ref: percpu_ref_cancel_init(&css->refcnt); err_free_css: @@ -4642,7 +4649,7 @@ static struct kernfs_syscall_ops cgroup_kf_syscall_ops = { .rename = cgroup_rename, }; -static void __init cgroup_init_subsys(struct cgroup_subsys *ss) +static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) { struct cgroup_subsys_state *css; @@ -4651,6 +4658,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) mutex_lock(&cgroup_tree_mutex); mutex_lock(&cgroup_mutex); + idr_init(&ss->css_idr); INIT_LIST_HEAD(&ss->cfts); /* Create the root cgroup state for this subsystem */ @@ -4659,6 +4667,13 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) /* We don't handle early failures gracefully */ BUG_ON(IS_ERR(css)); init_and_link_css(css, ss, &cgrp_dfl_root.cgrp); + if (early) { + /* idr_alloc() can't be called safely during early init */ + css->id = 1; + } else { + css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL); + BUG_ON(css->id < 0); + } /* Update the init_css_set to contain a subsys * pointer to this state - since the subsystem is @@ -4709,7 +4724,7 @@ int __init cgroup_init_early(void) ss->name = cgroup_subsys_name[i]; if (ss->early_init) - cgroup_init_subsys(ss); + cgroup_init_subsys(ss, true); } return 0; } @@ -4741,8 +4756,16 @@ int __init cgroup_init(void) mutex_unlock(&cgroup_tree_mutex); for_each_subsys(ss, ssid) { - if (!ss->early_init) - cgroup_init_subsys(ss); + if (ss->early_init) { + struct cgroup_subsys_state *css = + init_css_set.subsys[ss->id]; + + css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, + GFP_KERNEL); + BUG_ON(css->id < 0); + } else { + cgroup_init_subsys(ss, false); + } list_add_tail(&init_css_set.e_cset_node[ssid], &cgrp_dfl_root.cgrp.e_csets[ssid]); @@ -5196,14 +5219,8 @@ struct cgroup_subsys_state *css_tryget_from_dir(struct dentry *dentry, */ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss) { - struct cgroup *cgrp; - WARN_ON_ONCE(!rcu_read_lock_held()); - - cgrp = idr_find(&ss->root->cgroup_idr, id); - if (cgrp) - return cgroup_css(cgrp, ss); - return NULL; + return idr_find(&ss->css_idr, id); } #ifdef CONFIG_CGROUP_DEBUG diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1d0b29715b73..c3f82f69ef58 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -527,7 +527,7 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) { - return memcg->css.cgroup->id; + return memcg->css.id; } static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) @@ -6401,7 +6401,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css)); - if (css->cgroup->id > MEM_CGROUP_ID_MAX) + if (css->id > MEM_CGROUP_ID_MAX) return -ENOSPC; if (!parent) -- cgit v1.2.3-71-gd317 From 5bcfedf06f7fdf9efcf65dc11198e9012f7530f4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 1 May 2014 18:34:18 +0200 Subject: net: filter: simplify label names from jump-table This patch simplifies label naming for the BPF jump-table. When we define labels via DL(), we just concatenate/textify the combination of instruction opcode which consists of the class, subclass, word size, target register and so on. Each time we leave BPF_ prefix intact, so that e.g. the preprocessor generates a label BPF_ALU_BPF_ADD_BPF_X for DL(BPF_ALU, BPF_ADD, BPF_X) whereas a label name of ALU_ADD_X is much more easy to grasp. Pure cleanup only. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 3 + net/core/filter.c | 308 ++++++++++++++++++++++++------------------------- 2 files changed, 157 insertions(+), 154 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 759abf78dd61..b042d1db127f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -37,6 +37,9 @@ #define BPF_CALL 0x80 /* function call */ #define BPF_EXIT 0x90 /* function return */ +/* Placeholder/dummy for 0 */ +#define BPF_0 0 + /* BPF has 10 general purpose 64-bit registers and stack frame. */ #define MAX_BPF_REG 11 diff --git a/net/core/filter.c b/net/core/filter.c index 7c4db3dd3d1e..a1784e95b049 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -156,94 +156,94 @@ unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn) static const void *jumptable[256] = { [0 ... 255] = &&default_label, /* Now overwrite non-defaults ... */ -#define DL(A, B, C) [A|B|C] = &&A##_##B##_##C - DL(BPF_ALU, BPF_ADD, BPF_X), - DL(BPF_ALU, BPF_ADD, BPF_K), - DL(BPF_ALU, BPF_SUB, BPF_X), - DL(BPF_ALU, BPF_SUB, BPF_K), - DL(BPF_ALU, BPF_AND, BPF_X), - DL(BPF_ALU, BPF_AND, BPF_K), - DL(BPF_ALU, BPF_OR, BPF_X), - DL(BPF_ALU, BPF_OR, BPF_K), - DL(BPF_ALU, BPF_LSH, BPF_X), - DL(BPF_ALU, BPF_LSH, BPF_K), - DL(BPF_ALU, BPF_RSH, BPF_X), - DL(BPF_ALU, BPF_RSH, BPF_K), - DL(BPF_ALU, BPF_XOR, BPF_X), - DL(BPF_ALU, BPF_XOR, BPF_K), - DL(BPF_ALU, BPF_MUL, BPF_X), - DL(BPF_ALU, BPF_MUL, BPF_K), - DL(BPF_ALU, BPF_MOV, BPF_X), - DL(BPF_ALU, BPF_MOV, BPF_K), - DL(BPF_ALU, BPF_DIV, BPF_X), - DL(BPF_ALU, BPF_DIV, BPF_K), - DL(BPF_ALU, BPF_MOD, BPF_X), - DL(BPF_ALU, BPF_MOD, BPF_K), - DL(BPF_ALU, BPF_NEG, 0), - DL(BPF_ALU, BPF_END, BPF_TO_BE), - DL(BPF_ALU, BPF_END, BPF_TO_LE), - DL(BPF_ALU64, BPF_ADD, BPF_X), - DL(BPF_ALU64, BPF_ADD, BPF_K), - DL(BPF_ALU64, BPF_SUB, BPF_X), - DL(BPF_ALU64, BPF_SUB, BPF_K), - DL(BPF_ALU64, BPF_AND, BPF_X), - DL(BPF_ALU64, BPF_AND, BPF_K), - DL(BPF_ALU64, BPF_OR, BPF_X), - DL(BPF_ALU64, BPF_OR, BPF_K), - DL(BPF_ALU64, BPF_LSH, BPF_X), - DL(BPF_ALU64, BPF_LSH, BPF_K), - DL(BPF_ALU64, BPF_RSH, BPF_X), - DL(BPF_ALU64, BPF_RSH, BPF_K), - DL(BPF_ALU64, BPF_XOR, BPF_X), - DL(BPF_ALU64, BPF_XOR, BPF_K), - DL(BPF_ALU64, BPF_MUL, BPF_X), - DL(BPF_ALU64, BPF_MUL, BPF_K), - DL(BPF_ALU64, BPF_MOV, BPF_X), - DL(BPF_ALU64, BPF_MOV, BPF_K), - DL(BPF_ALU64, BPF_ARSH, BPF_X), - DL(BPF_ALU64, BPF_ARSH, BPF_K), - DL(BPF_ALU64, BPF_DIV, BPF_X), - DL(BPF_ALU64, BPF_DIV, BPF_K), - DL(BPF_ALU64, BPF_MOD, BPF_X), - DL(BPF_ALU64, BPF_MOD, BPF_K), - DL(BPF_ALU64, BPF_NEG, 0), - DL(BPF_JMP, BPF_CALL, 0), - DL(BPF_JMP, BPF_JA, 0), - DL(BPF_JMP, BPF_JEQ, BPF_X), - DL(BPF_JMP, BPF_JEQ, BPF_K), - DL(BPF_JMP, BPF_JNE, BPF_X), - DL(BPF_JMP, BPF_JNE, BPF_K), - DL(BPF_JMP, BPF_JGT, BPF_X), - DL(BPF_JMP, BPF_JGT, BPF_K), - DL(BPF_JMP, BPF_JGE, BPF_X), - DL(BPF_JMP, BPF_JGE, BPF_K), - DL(BPF_JMP, BPF_JSGT, BPF_X), - DL(BPF_JMP, BPF_JSGT, BPF_K), - DL(BPF_JMP, BPF_JSGE, BPF_X), - DL(BPF_JMP, BPF_JSGE, BPF_K), - DL(BPF_JMP, BPF_JSET, BPF_X), - DL(BPF_JMP, BPF_JSET, BPF_K), - DL(BPF_JMP, BPF_EXIT, 0), - DL(BPF_STX, BPF_MEM, BPF_B), - DL(BPF_STX, BPF_MEM, BPF_H), - DL(BPF_STX, BPF_MEM, BPF_W), - DL(BPF_STX, BPF_MEM, BPF_DW), - DL(BPF_STX, BPF_XADD, BPF_W), - DL(BPF_STX, BPF_XADD, BPF_DW), - DL(BPF_ST, BPF_MEM, BPF_B), - DL(BPF_ST, BPF_MEM, BPF_H), - DL(BPF_ST, BPF_MEM, BPF_W), - DL(BPF_ST, BPF_MEM, BPF_DW), - DL(BPF_LDX, BPF_MEM, BPF_B), - DL(BPF_LDX, BPF_MEM, BPF_H), - DL(BPF_LDX, BPF_MEM, BPF_W), - DL(BPF_LDX, BPF_MEM, BPF_DW), - DL(BPF_LD, BPF_ABS, BPF_W), - DL(BPF_LD, BPF_ABS, BPF_H), - DL(BPF_LD, BPF_ABS, BPF_B), - DL(BPF_LD, BPF_IND, BPF_W), - DL(BPF_LD, BPF_IND, BPF_H), - DL(BPF_LD, BPF_IND, BPF_B), +#define DL(A, B, C) [BPF_##A|BPF_##B|BPF_##C] = &&A##_##B##_##C + DL(ALU, ADD, X), + DL(ALU, ADD, K), + DL(ALU, SUB, X), + DL(ALU, SUB, K), + DL(ALU, AND, X), + DL(ALU, AND, K), + DL(ALU, OR, X), + DL(ALU, OR, K), + DL(ALU, LSH, X), + DL(ALU, LSH, K), + DL(ALU, RSH, X), + DL(ALU, RSH, K), + DL(ALU, XOR, X), + DL(ALU, XOR, K), + DL(ALU, MUL, X), + DL(ALU, MUL, K), + DL(ALU, MOV, X), + DL(ALU, MOV, K), + DL(ALU, DIV, X), + DL(ALU, DIV, K), + DL(ALU, MOD, X), + DL(ALU, MOD, K), + DL(ALU, NEG, 0), + DL(ALU, END, TO_BE), + DL(ALU, END, TO_LE), + DL(ALU64, ADD, X), + DL(ALU64, ADD, K), + DL(ALU64, SUB, X), + DL(ALU64, SUB, K), + DL(ALU64, AND, X), + DL(ALU64, AND, K), + DL(ALU64, OR, X), + DL(ALU64, OR, K), + DL(ALU64, LSH, X), + DL(ALU64, LSH, K), + DL(ALU64, RSH, X), + DL(ALU64, RSH, K), + DL(ALU64, XOR, X), + DL(ALU64, XOR, K), + DL(ALU64, MUL, X), + DL(ALU64, MUL, K), + DL(ALU64, MOV, X), + DL(ALU64, MOV, K), + DL(ALU64, ARSH, X), + DL(ALU64, ARSH, K), + DL(ALU64, DIV, X), + DL(ALU64, DIV, K), + DL(ALU64, MOD, X), + DL(ALU64, MOD, K), + DL(ALU64, NEG, 0), + DL(JMP, CALL, 0), + DL(JMP, JA, 0), + DL(JMP, JEQ, X), + DL(JMP, JEQ, K), + DL(JMP, JNE, X), + DL(JMP, JNE, K), + DL(JMP, JGT, X), + DL(JMP, JGT, K), + DL(JMP, JGE, X), + DL(JMP, JGE, K), + DL(JMP, JSGT, X), + DL(JMP, JSGT, K), + DL(JMP, JSGE, X), + DL(JMP, JSGE, K), + DL(JMP, JSET, X), + DL(JMP, JSET, K), + DL(JMP, EXIT, 0), + DL(STX, MEM, B), + DL(STX, MEM, H), + DL(STX, MEM, W), + DL(STX, MEM, DW), + DL(STX, XADD, W), + DL(STX, XADD, DW), + DL(ST, MEM, B), + DL(ST, MEM, H), + DL(ST, MEM, W), + DL(ST, MEM, DW), + DL(LDX, MEM, B), + DL(LDX, MEM, H), + DL(LDX, MEM, W), + DL(LDX, MEM, DW), + DL(LD, ABS, W), + DL(LD, ABS, H), + DL(LD, ABS, B), + DL(LD, IND, W), + DL(LD, IND, H), + DL(LD, IND, B), #undef DL }; @@ -257,93 +257,93 @@ select_insn: /* ALU */ #define ALU(OPCODE, OP) \ - BPF_ALU64_##OPCODE##_BPF_X: \ + ALU64_##OPCODE##_X: \ A = A OP X; \ CONT; \ - BPF_ALU_##OPCODE##_BPF_X: \ + ALU_##OPCODE##_X: \ A = (u32) A OP (u32) X; \ CONT; \ - BPF_ALU64_##OPCODE##_BPF_K: \ + ALU64_##OPCODE##_K: \ A = A OP K; \ CONT; \ - BPF_ALU_##OPCODE##_BPF_K: \ + ALU_##OPCODE##_K: \ A = (u32) A OP (u32) K; \ CONT; - ALU(BPF_ADD, +) - ALU(BPF_SUB, -) - ALU(BPF_AND, &) - ALU(BPF_OR, |) - ALU(BPF_LSH, <<) - ALU(BPF_RSH, >>) - ALU(BPF_XOR, ^) - ALU(BPF_MUL, *) + ALU(ADD, +) + ALU(SUB, -) + ALU(AND, &) + ALU(OR, |) + ALU(LSH, <<) + ALU(RSH, >>) + ALU(XOR, ^) + ALU(MUL, *) #undef ALU - BPF_ALU_BPF_NEG_0: + ALU_NEG_0: A = (u32) -A; CONT; - BPF_ALU64_BPF_NEG_0: + ALU64_NEG_0: A = -A; CONT; - BPF_ALU_BPF_MOV_BPF_X: + ALU_MOV_X: A = (u32) X; CONT; - BPF_ALU_BPF_MOV_BPF_K: + ALU_MOV_K: A = (u32) K; CONT; - BPF_ALU64_BPF_MOV_BPF_X: + ALU64_MOV_X: A = X; CONT; - BPF_ALU64_BPF_MOV_BPF_K: + ALU64_MOV_K: A = K; CONT; - BPF_ALU64_BPF_ARSH_BPF_X: + ALU64_ARSH_X: (*(s64 *) &A) >>= X; CONT; - BPF_ALU64_BPF_ARSH_BPF_K: + ALU64_ARSH_K: (*(s64 *) &A) >>= K; CONT; - BPF_ALU64_BPF_MOD_BPF_X: + ALU64_MOD_X: if (unlikely(X == 0)) return 0; tmp = A; A = do_div(tmp, X); CONT; - BPF_ALU_BPF_MOD_BPF_X: + ALU_MOD_X: if (unlikely(X == 0)) return 0; tmp = (u32) A; A = do_div(tmp, (u32) X); CONT; - BPF_ALU64_BPF_MOD_BPF_K: + ALU64_MOD_K: tmp = A; A = do_div(tmp, K); CONT; - BPF_ALU_BPF_MOD_BPF_K: + ALU_MOD_K: tmp = (u32) A; A = do_div(tmp, (u32) K); CONT; - BPF_ALU64_BPF_DIV_BPF_X: + ALU64_DIV_X: if (unlikely(X == 0)) return 0; do_div(A, X); CONT; - BPF_ALU_BPF_DIV_BPF_X: + ALU_DIV_X: if (unlikely(X == 0)) return 0; tmp = (u32) A; do_div(tmp, (u32) X); A = (u32) tmp; CONT; - BPF_ALU64_BPF_DIV_BPF_K: + ALU64_DIV_K: do_div(A, K); CONT; - BPF_ALU_BPF_DIV_BPF_K: + ALU_DIV_K: tmp = (u32) A; do_div(tmp, (u32) K); A = (u32) tmp; CONT; - BPF_ALU_BPF_END_BPF_TO_BE: + ALU_END_TO_BE: switch (K) { case 16: A = (__force u16) cpu_to_be16(A); @@ -356,7 +356,7 @@ select_insn: break; } CONT; - BPF_ALU_BPF_END_BPF_TO_LE: + ALU_END_TO_LE: switch (K) { case 16: A = (__force u16) cpu_to_le16(A); @@ -371,7 +371,7 @@ select_insn: CONT; /* CALL */ - BPF_JMP_BPF_CALL_0: + JMP_CALL_0: /* Function call scratches R1-R5 registers, preserves R6-R9, * and stores return value into R0. */ @@ -380,122 +380,122 @@ select_insn: CONT; /* JMP */ - BPF_JMP_BPF_JA_0: + JMP_JA_0: insn += insn->off; CONT; - BPF_JMP_BPF_JEQ_BPF_X: + JMP_JEQ_X: if (A == X) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JEQ_BPF_K: + JMP_JEQ_K: if (A == K) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JNE_BPF_X: + JMP_JNE_X: if (A != X) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JNE_BPF_K: + JMP_JNE_K: if (A != K) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JGT_BPF_X: + JMP_JGT_X: if (A > X) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JGT_BPF_K: + JMP_JGT_K: if (A > K) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JGE_BPF_X: + JMP_JGE_X: if (A >= X) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JGE_BPF_K: + JMP_JGE_K: if (A >= K) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JSGT_BPF_X: - if (((s64)A) > ((s64)X)) { + JMP_JSGT_X: + if (((s64) A) > ((s64) X)) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JSGT_BPF_K: - if (((s64)A) > ((s64)K)) { + JMP_JSGT_K: + if (((s64) A) > ((s64) K)) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JSGE_BPF_X: - if (((s64)A) >= ((s64)X)) { + JMP_JSGE_X: + if (((s64) A) >= ((s64) X)) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JSGE_BPF_K: - if (((s64)A) >= ((s64)K)) { + JMP_JSGE_K: + if (((s64) A) >= ((s64) K)) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JSET_BPF_X: + JMP_JSET_X: if (A & X) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_JSET_BPF_K: + JMP_JSET_K: if (A & K) { insn += insn->off; CONT_JMP; } CONT; - BPF_JMP_BPF_EXIT_0: + JMP_EXIT_0: return R0; /* STX and ST and LDX*/ #define LDST(SIZEOP, SIZE) \ - BPF_STX_BPF_MEM_##SIZEOP: \ + STX_MEM_##SIZEOP: \ *(SIZE *)(unsigned long) (A + insn->off) = X; \ CONT; \ - BPF_ST_BPF_MEM_##SIZEOP: \ + ST_MEM_##SIZEOP: \ *(SIZE *)(unsigned long) (A + insn->off) = K; \ CONT; \ - BPF_LDX_BPF_MEM_##SIZEOP: \ + LDX_MEM_##SIZEOP: \ A = *(SIZE *)(unsigned long) (X + insn->off); \ CONT; - LDST(BPF_B, u8) - LDST(BPF_H, u16) - LDST(BPF_W, u32) - LDST(BPF_DW, u64) + LDST(B, u8) + LDST(H, u16) + LDST(W, u32) + LDST(DW, u64) #undef LDST - BPF_STX_BPF_XADD_BPF_W: /* lock xadd *(u32 *)(A + insn->off) += X */ + STX_XADD_W: /* lock xadd *(u32 *)(A + insn->off) += X */ atomic_add((u32) X, (atomic_t *)(unsigned long) (A + insn->off)); CONT; - BPF_STX_BPF_XADD_BPF_DW: /* lock xadd *(u64 *)(A + insn->off) += X */ + STX_XADD_DW: /* lock xadd *(u64 *)(A + insn->off) += X */ atomic64_add((u64) X, (atomic64_t *)(unsigned long) (A + insn->off)); CONT; - BPF_LD_BPF_ABS_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + K)) */ + LD_ABS_W: /* R0 = ntohl(*(u32 *) (skb->data + K)) */ off = K; load_word: /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only @@ -524,7 +524,7 @@ load_word: CONT; } return 0; - BPF_LD_BPF_ABS_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + K)) */ + LD_ABS_H: /* R0 = ntohs(*(u16 *) (skb->data + K)) */ off = K; load_half: ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp); @@ -533,7 +533,7 @@ load_half: CONT; } return 0; - BPF_LD_BPF_ABS_BPF_B: /* R0 = *(u8 *) (ctx + K) */ + LD_ABS_B: /* R0 = *(u8 *) (ctx + K) */ off = K; load_byte: ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp); @@ -542,13 +542,13 @@ load_byte: CONT; } return 0; - BPF_LD_BPF_IND_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + X + K)) */ + LD_IND_W: /* R0 = ntohl(*(u32 *) (skb->data + X + K)) */ off = K + X; goto load_word; - BPF_LD_BPF_IND_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + X + K)) */ + LD_IND_H: /* R0 = ntohs(*(u16 *) (skb->data + X + K)) */ off = K + X; goto load_half; - BPF_LD_BPF_IND_BPF_B: /* R0 = *(u8 *) (skb->data + X + K) */ + LD_IND_B: /* R0 = *(u8 *) (skb->data + X + K) */ off = K + X; goto load_byte; -- cgit v1.2.3-71-gd317 From 30743837dd204d2b04fd4e9d3db78cc7b118c81a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 1 May 2014 18:34:19 +0200 Subject: net: filter: make register naming more comprehensible The current code is a bit hard to parse on which registers can be used, how they are mapped and all play together. It makes much more sense to define this a bit more clearly so that the code is a bit more intuitive. This patch cleans this up, and makes naming a bit more consistent among the code. This also allows for moving some of the defines into the header file. Clearing of A and X registers in __sk_run_filter() do not get a particular register name assigned as they have not an 'official' function, but rather just result from the concrete initial mapping of old BPF programs. Since for BPF helper functions for BPF_CALL we already use small letters, so be consistent here as well. No functional changes. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 44 ++++++++-- net/core/filter.c | 215 +++++++++++++++++++++++++------------------------ 2 files changed, 145 insertions(+), 114 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index b042d1db127f..ed1efab10b8f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -40,16 +40,47 @@ /* Placeholder/dummy for 0 */ #define BPF_0 0 +/* Register numbers */ +enum { + BPF_REG_0 = 0, + BPF_REG_1, + BPF_REG_2, + BPF_REG_3, + BPF_REG_4, + BPF_REG_5, + BPF_REG_6, + BPF_REG_7, + BPF_REG_8, + BPF_REG_9, + BPF_REG_10, + __MAX_BPF_REG, +}; + /* BPF has 10 general purpose 64-bit registers and stack frame. */ -#define MAX_BPF_REG 11 +#define MAX_BPF_REG __MAX_BPF_REG + +/* ArgX, context and stack frame pointer register positions. Note, + * Arg1, Arg2, Arg3, etc are used as argument mappings of function + * calls in BPF_CALL instruction. + */ +#define BPF_REG_ARG1 BPF_REG_1 +#define BPF_REG_ARG2 BPF_REG_2 +#define BPF_REG_ARG3 BPF_REG_3 +#define BPF_REG_ARG4 BPF_REG_4 +#define BPF_REG_ARG5 BPF_REG_5 +#define BPF_REG_CTX BPF_REG_6 +#define BPF_REG_FP BPF_REG_10 + +/* Additional register mappings for converted user programs. */ +#define BPF_REG_A BPF_REG_0 +#define BPF_REG_X BPF_REG_7 +#define BPF_REG_TMP BPF_REG_8 /* BPF program can access up to 512 bytes of stack space. */ #define MAX_BPF_STACK 512 -/* Arg1, context and stack frame pointer register positions. */ -#define ARG1_REG 1 -#define CTX_REG 6 -#define FP_REG 10 +/* Macro to invoke filter function. */ +#define SK_RUN_FILTER(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi) struct sock_filter_int { __u8 code; /* opcode */ @@ -100,9 +131,6 @@ static inline unsigned int sk_filter_size(unsigned int proglen) #define sk_filter_proglen(fprog) \ (fprog->len * sizeof(fprog->filter[0])) -#define SK_RUN_FILTER(filter, ctx) \ - (*filter->bpf_func)(ctx, filter->insnsi) - int sk_filter(struct sock *sk, struct sk_buff *skb); u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx, diff --git a/net/core/filter.c b/net/core/filter.c index a1784e95b049..c93ca8d66f37 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -45,6 +45,27 @@ #include #include +/* Registers */ +#define R0 regs[BPF_REG_0] +#define R1 regs[BPF_REG_1] +#define R2 regs[BPF_REG_2] +#define R3 regs[BPF_REG_3] +#define R4 regs[BPF_REG_4] +#define R5 regs[BPF_REG_5] +#define R6 regs[BPF_REG_6] +#define R7 regs[BPF_REG_7] +#define R8 regs[BPF_REG_8] +#define R9 regs[BPF_REG_9] +#define R10 regs[BPF_REG_10] + +/* Named registers */ +#define A regs[insn->a_reg] +#define X regs[insn->x_reg] +#define FP regs[BPF_REG_FP] +#define ARG1 regs[BPF_REG_ARG1] +#define CTX regs[BPF_REG_CTX] +#define K insn->imm + /* No hurry in this branch * * Exported for the bpf jit load helper. @@ -122,13 +143,6 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) return 0; } -/* Register mappings for user programs. */ -#define A_REG 0 -#define X_REG 7 -#define TMP_REG 8 -#define ARG2_REG 2 -#define ARG3_REG 3 - /** * __sk_run_filter - run a filter on a given context * @ctx: buffer to run the filter on @@ -142,17 +156,6 @@ unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn) { u64 stack[MAX_BPF_STACK / sizeof(u64)]; u64 regs[MAX_BPF_REG], tmp; - void *ptr; - int off; - -#define K insn->imm -#define A regs[insn->a_reg] -#define X regs[insn->x_reg] -#define R0 regs[0] - -#define CONT ({insn++; goto select_insn; }) -#define CONT_JMP ({insn++; goto select_insn; }) - static const void *jumptable[256] = { [0 ... 255] = &&default_label, /* Now overwrite non-defaults ... */ @@ -246,11 +249,18 @@ unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn) DL(LD, IND, B), #undef DL }; + void *ptr; + int off; - regs[FP_REG] = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; - regs[ARG1_REG] = (u64) (unsigned long) ctx; - regs[A_REG] = 0; - regs[X_REG] = 0; +#define CONT ({ insn++; goto select_insn; }) +#define CONT_JMP ({ insn++; goto select_insn; }) + + FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; + ARG1 = (u64) (unsigned long) ctx; + + /* Register for user BPF programs need to be reset first. */ + regs[BPF_REG_A] = 0; + regs[BPF_REG_X] = 0; select_insn: goto *jumptable[insn->code]; @@ -375,8 +385,7 @@ select_insn: /* Function call scratches R1-R5 registers, preserves R6-R9, * and stores return value into R0. */ - R0 = (__bpf_call_base + insn->imm)(regs[1], regs[2], regs[3], - regs[4], regs[5]); + R0 = (__bpf_call_base + insn->imm)(R1, R2, R3, R4, R5); CONT; /* JMP */ @@ -500,7 +509,7 @@ select_insn: load_word: /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only * appearing in the programs where ctx == skb. All programs - * keep 'ctx' in regs[CTX_REG] == R6, sk_convert_filter() + * keep 'ctx' in regs[BPF_REG_CTX] == R6, sk_convert_filter() * saves it in R6, internal BPF verifier will check that * R6 == ctx. * @@ -556,13 +565,6 @@ load_byte: /* If we ever reach this, we have a bug somewhere. */ WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code); return 0; -#undef CONT_JMP -#undef CONT - -#undef R0 -#undef X -#undef A -#undef K } u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx, @@ -594,14 +596,14 @@ static unsigned int pkt_type_offset(void) return -1; } -static u64 __skb_get_pay_offset(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { struct sk_buff *skb = (struct sk_buff *)(long) ctx; return __skb_get_poff(skb); } -static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { struct sk_buff *skb = (struct sk_buff *)(long) ctx; struct nlattr *nla; @@ -612,17 +614,17 @@ static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) if (skb->len < sizeof(struct nlattr)) return 0; - if (A > skb->len - sizeof(struct nlattr)) + if (a > skb->len - sizeof(struct nlattr)) return 0; - nla = nla_find((struct nlattr *) &skb->data[A], skb->len - A, X); + nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x); if (nla) return (void *) nla - (void *) skb->data; return 0; } -static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { struct sk_buff *skb = (struct sk_buff *)(long) ctx; struct nlattr *nla; @@ -633,27 +635,27 @@ static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) if (skb->len < sizeof(struct nlattr)) return 0; - if (A > skb->len - sizeof(struct nlattr)) + if (a > skb->len - sizeof(struct nlattr)) return 0; - nla = (struct nlattr *) &skb->data[A]; - if (nla->nla_len > skb->len - A) + nla = (struct nlattr *) &skb->data[a]; + if (nla->nla_len > skb->len - a) return 0; - nla = nla_find_nested(nla, X); + nla = nla_find_nested(nla, x); if (nla) return (void *) nla - (void *) skb->data; return 0; } -static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { return raw_smp_processor_id(); } /* note that this only generates 32-bit random numbers */ -static u64 __get_random_u32(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { return (u64)prandom_u32(); } @@ -668,28 +670,28 @@ static bool convert_bpf_extensions(struct sock_filter *fp, BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); insn->code = BPF_LDX | BPF_MEM | BPF_H; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_CTX; insn->off = offsetof(struct sk_buff, protocol); insn++; /* A = ntohs(A) [emitting a nop or swap16] */ insn->code = BPF_ALU | BPF_END | BPF_FROM_BE; - insn->a_reg = A_REG; + insn->a_reg = BPF_REG_A; insn->imm = 16; break; case SKF_AD_OFF + SKF_AD_PKTTYPE: insn->code = BPF_LDX | BPF_MEM | BPF_B; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_CTX; insn->off = pkt_type_offset(); if (insn->off < 0) return false; insn++; insn->code = BPF_ALU | BPF_AND | BPF_K; - insn->a_reg = A_REG; + insn->a_reg = BPF_REG_A; insn->imm = PKT_TYPE_MAX; break; @@ -699,13 +701,13 @@ static bool convert_bpf_extensions(struct sock_filter *fp, insn->code = BPF_LDX | BPF_MEM | BPF_DW; else insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = TMP_REG; - insn->x_reg = CTX_REG; + insn->a_reg = BPF_REG_TMP; + insn->x_reg = BPF_REG_CTX; insn->off = offsetof(struct sk_buff, dev); insn++; insn->code = BPF_JMP | BPF_JNE | BPF_K; - insn->a_reg = TMP_REG; + insn->a_reg = BPF_REG_TMP; insn->imm = 0; insn->off = 1; insn++; @@ -716,8 +718,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); - insn->a_reg = A_REG; - insn->x_reg = TMP_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_TMP; if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) { insn->code = BPF_LDX | BPF_MEM | BPF_W; @@ -732,8 +734,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_CTX; insn->off = offsetof(struct sk_buff, mark); break; @@ -741,8 +743,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_CTX; insn->off = offsetof(struct sk_buff, hash); break; @@ -750,8 +752,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); insn->code = BPF_LDX | BPF_MEM | BPF_H; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_CTX; insn->off = offsetof(struct sk_buff, queue_mapping); break; @@ -760,8 +762,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); insn->code = BPF_LDX | BPF_MEM | BPF_H; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_CTX; insn->off = offsetof(struct sk_buff, vlan_tci); insn++; @@ -769,16 +771,16 @@ static bool convert_bpf_extensions(struct sock_filter *fp, if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) { insn->code = BPF_ALU | BPF_AND | BPF_K; - insn->a_reg = A_REG; + insn->a_reg = BPF_REG_A; insn->imm = ~VLAN_TAG_PRESENT; } else { insn->code = BPF_ALU | BPF_RSH | BPF_K; - insn->a_reg = A_REG; + insn->a_reg = BPF_REG_A; insn->imm = 12; insn++; insn->code = BPF_ALU | BPF_AND | BPF_K; - insn->a_reg = A_REG; + insn->a_reg = BPF_REG_A; insn->imm = 1; } break; @@ -790,20 +792,20 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_RANDOM: /* arg1 = ctx */ insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = ARG1_REG; - insn->x_reg = CTX_REG; + insn->a_reg = BPF_REG_ARG1; + insn->x_reg = BPF_REG_CTX; insn++; /* arg2 = A */ insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = ARG2_REG; - insn->x_reg = A_REG; + insn->a_reg = BPF_REG_ARG2; + insn->x_reg = BPF_REG_A; insn++; /* arg3 = X */ insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = ARG3_REG; - insn->x_reg = X_REG; + insn->a_reg = BPF_REG_ARG3; + insn->x_reg = BPF_REG_X; insn++; /* Emit call(ctx, arg2=A, arg3=X) */ @@ -829,8 +831,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_ALU_XOR_X: insn->code = BPF_ALU | BPF_XOR | BPF_X; - insn->a_reg = A_REG; - insn->x_reg = X_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_X; break; default: @@ -880,7 +882,7 @@ int sk_convert_filter(struct sock_filter *prog, int len, u8 bpf_src; BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK); - BUILD_BUG_ON(FP_REG + 1 != MAX_BPF_REG); + BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); if (len <= 0 || len >= BPF_MAXINSNS) return -EINVAL; @@ -897,8 +899,8 @@ do_pass: if (new_insn) { new_insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - new_insn->a_reg = CTX_REG; - new_insn->x_reg = ARG1_REG; + new_insn->a_reg = BPF_REG_CTX; + new_insn->x_reg = BPF_REG_ARG1; } new_insn++; @@ -948,8 +950,8 @@ do_pass: break; insn->code = fp->code; - insn->a_reg = A_REG; - insn->x_reg = X_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_X; insn->imm = fp->k; break; @@ -983,16 +985,16 @@ do_pass: * in compare insn. */ insn->code = BPF_ALU | BPF_MOV | BPF_K; - insn->a_reg = TMP_REG; + insn->a_reg = BPF_REG_TMP; insn->imm = fp->k; insn++; - insn->a_reg = A_REG; - insn->x_reg = TMP_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_TMP; bpf_src = BPF_X; } else { - insn->a_reg = A_REG; - insn->x_reg = X_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_X; insn->imm = fp->k; bpf_src = BPF_SRC(fp->code); } @@ -1027,33 +1029,33 @@ do_pass: /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */ case BPF_LDX | BPF_MSH | BPF_B: insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = TMP_REG; - insn->x_reg = A_REG; + insn->a_reg = BPF_REG_TMP; + insn->x_reg = BPF_REG_A; insn++; insn->code = BPF_LD | BPF_ABS | BPF_B; - insn->a_reg = A_REG; + insn->a_reg = BPF_REG_A; insn->imm = fp->k; insn++; insn->code = BPF_ALU | BPF_AND | BPF_K; - insn->a_reg = A_REG; + insn->a_reg = BPF_REG_A; insn->imm = 0xf; insn++; insn->code = BPF_ALU | BPF_LSH | BPF_K; - insn->a_reg = A_REG; + insn->a_reg = BPF_REG_A; insn->imm = 2; insn++; insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = X_REG; - insn->x_reg = A_REG; + insn->a_reg = BPF_REG_X; + insn->x_reg = BPF_REG_A; insn++; insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = A_REG; - insn->x_reg = TMP_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_TMP; break; /* RET_K, RET_A are remaped into 2 insns. */ @@ -1063,7 +1065,7 @@ do_pass: (BPF_RVAL(fp->code) == BPF_K ? BPF_K : BPF_X); insn->a_reg = 0; - insn->x_reg = A_REG; + insn->x_reg = BPF_REG_A; insn->imm = fp->k; insn++; @@ -1074,8 +1076,9 @@ do_pass: case BPF_ST: case BPF_STX: insn->code = BPF_STX | BPF_MEM | BPF_W; - insn->a_reg = FP_REG; - insn->x_reg = fp->code == BPF_ST ? A_REG : X_REG; + insn->a_reg = BPF_REG_FP; + insn->x_reg = fp->code == BPF_ST ? + BPF_REG_A : BPF_REG_X; insn->off = -(BPF_MEMWORDS - fp->k) * 4; break; @@ -1084,8 +1087,8 @@ do_pass: case BPF_LDX | BPF_MEM: insn->code = BPF_LDX | BPF_MEM | BPF_W; insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? - A_REG : X_REG; - insn->x_reg = FP_REG; + BPF_REG_A : BPF_REG_X; + insn->x_reg = BPF_REG_FP; insn->off = -(BPF_MEMWORDS - fp->k) * 4; break; @@ -1094,22 +1097,22 @@ do_pass: case BPF_LDX | BPF_IMM: insn->code = BPF_ALU | BPF_MOV | BPF_K; insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? - A_REG : X_REG; + BPF_REG_A : BPF_REG_X; insn->imm = fp->k; break; /* X = A */ case BPF_MISC | BPF_TAX: insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = X_REG; - insn->x_reg = A_REG; + insn->a_reg = BPF_REG_X; + insn->x_reg = BPF_REG_A; break; /* A = X */ case BPF_MISC | BPF_TXA: insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = A_REG; - insn->x_reg = X_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_X; break; /* A = skb->len or X = skb->len */ @@ -1117,16 +1120,16 @@ do_pass: case BPF_LDX | BPF_W | BPF_LEN: insn->code = BPF_LDX | BPF_MEM | BPF_W; insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? - A_REG : X_REG; - insn->x_reg = CTX_REG; + BPF_REG_A : BPF_REG_X; + insn->x_reg = BPF_REG_CTX; insn->off = offsetof(struct sk_buff, len); break; /* access seccomp_data fields */ case BPF_LDX | BPF_ABS | BPF_W: insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; + insn->a_reg = BPF_REG_A; + insn->x_reg = BPF_REG_CTX; insn->off = fp->k; break; -- cgit v1.2.3-71-gd317 From fd3c269f8ff940cc0fbb3b7f7e84c0572f6f759a Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 17 Apr 2014 10:37:35 +0800 Subject: drm/i915: Split the BDW device definition to prepare for dual BSD rings on BDW GT3 Based on the hardware spec, the BDW GT3 has the different configuration with the BDW GT1/GT2. So split the BDW device info definition. This is to do the preparation for adding the Dual BSD rings on BDW GT3 machine. V1->V2: Follow Daniel's comment to pay attention to the stolen check for BDW in kernel/early-quirks.c Reviewed-by: Imre Deak Signed-off-by: Zhao Yakui Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 26 ++++++++++++++++++++++++-- include/drm/i915_pciids.h | 22 +++++++++++++++++----- 2 files changed, 41 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index f2ca3e929cd7..9f47aab7915f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -279,6 +279,26 @@ static const struct intel_device_info intel_broadwell_m_info = { GEN_DEFAULT_PIPEOFFSETS, }; +static const struct intel_device_info intel_broadwell_gt3d_info = { + .gen = 8, .num_pipes = 3, + .need_gfx_hws = 1, .has_hotplug = 1, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .has_llc = 1, + .has_ddi = 1, + .has_fbc = 1, + GEN_DEFAULT_PIPEOFFSETS, +}; + +static const struct intel_device_info intel_broadwell_gt3m_info = { + .gen = 8, .is_mobile = 1, .num_pipes = 3, + .need_gfx_hws = 1, .has_hotplug = 1, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .has_llc = 1, + .has_ddi = 1, + .has_fbc = 1, + GEN_DEFAULT_PIPEOFFSETS, +}; + /* * Make sure any device matches here are from most specific to most * general. For example, since the Quanta match is based on the subsystem @@ -311,8 +331,10 @@ static const struct intel_device_info intel_broadwell_m_info = { INTEL_HSW_M_IDS(&intel_haswell_m_info), \ INTEL_VLV_M_IDS(&intel_valleyview_m_info), \ INTEL_VLV_D_IDS(&intel_valleyview_d_info), \ - INTEL_BDW_M_IDS(&intel_broadwell_m_info), \ - INTEL_BDW_D_IDS(&intel_broadwell_d_info) + INTEL_BDW_GT12M_IDS(&intel_broadwell_m_info), \ + INTEL_BDW_GT12D_IDS(&intel_broadwell_d_info), \ + INTEL_BDW_GT3M_IDS(&intel_broadwell_gt3m_info), \ + INTEL_BDW_GT3D_IDS(&intel_broadwell_gt3d_info) static const struct pci_device_id pciidlist[] = { /* aka */ INTEL_PCI_IDS, diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 940ece4934ba..24f3cad045db 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -223,14 +223,26 @@ _INTEL_BDW_D(gt, 0x160A, info), /* Server */ \ _INTEL_BDW_D(gt, 0x160D, info) /* Workstation */ -#define INTEL_BDW_M_IDS(info) \ +#define INTEL_BDW_GT12M_IDS(info) \ _INTEL_BDW_M_IDS(1, info), \ - _INTEL_BDW_M_IDS(2, info), \ - _INTEL_BDW_M_IDS(3, info) + _INTEL_BDW_M_IDS(2, info) -#define INTEL_BDW_D_IDS(info) \ +#define INTEL_BDW_GT12D_IDS(info) \ _INTEL_BDW_D_IDS(1, info), \ - _INTEL_BDW_D_IDS(2, info), \ + _INTEL_BDW_D_IDS(2, info) + +#define INTEL_BDW_GT3M_IDS(info) \ + _INTEL_BDW_M_IDS(3, info) + +#define INTEL_BDW_GT3D_IDS(info) \ _INTEL_BDW_D_IDS(3, info) +#define INTEL_BDW_M_IDS(info) \ + INTEL_BDW_GT12M_IDS(info), \ + INTEL_BDW_GT3M_IDS(info) + +#define INTEL_BDW_D_IDS(info) \ + INTEL_BDW_GT12D_IDS(info), \ + INTEL_BDW_GT3D_IDS(info) + #endif /* _I915_PCIIDS_H */ -- cgit v1.2.3-71-gd317 From 9da93f9b7cdf8ab28da6b364cdc1fafc8670b4dc Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 5 May 2014 17:25:50 +1000 Subject: xfs: fix Q_XQUOTARM ioctl The Q_XQUOTARM quotactl was not working properly, because we weren't passing around proper flags. The xfs_fs_set_xstate() ioctl handler used the same flags for Q_XQUOTAON/OFF as well as for Q_XQUOTARM, but Q_XQUOTAON/OFF look for XFS_UQUOTA_ACCT, XFS_UQUOTA_ENFD, XFS_GQUOTA_ACCT etc, i.e. quota type + state, while Q_XQUOTARM looks only for the type of quota, i.e. XFS_DQ_USER, XFS_DQ_GROUP etc. Unfortunately these flag spaces overlap a bit, so we got semi-random results for Q_XQUOTARM; i.e. the value for XFS_DQ_USER == XFS_UQUOTA_ACCT, etc. yeargh. Add a new quotactl op vector specifically for the QUOTARM operation, since it operates with a different flag space. This has been broken more or less forever, AFAICT. Signed-off-by: Eric Sandeen Acked-by: Jan Kara Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/quota/quota.c | 14 +++++++++++++- fs/xfs/xfs_quotaops.c | 29 +++++++++++++++++++++++++---- include/linux/quota.h | 1 + 3 files changed, 39 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 2b363e23f36e..ff3f0b3cfdb3 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -278,6 +278,17 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id, return ret; } +static int quota_rmxquota(struct super_block *sb, void __user *addr) +{ + __u32 flags; + + if (copy_from_user(&flags, addr, sizeof(flags))) + return -EFAULT; + if (!sb->s_qcop->rm_xquota) + return -ENOSYS; + return sb->s_qcop->rm_xquota(sb, flags); +} + /* Copy parameters and call proper function */ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void __user *addr, struct path *path) @@ -316,8 +327,9 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, return sb->s_qcop->quota_sync(sb, type); case Q_XQUOTAON: case Q_XQUOTAOFF: - case Q_XQUOTARM: return quota_setxstate(sb, cmd, addr); + case Q_XQUOTARM: + return quota_rmxquota(sb, addr); case Q_XGETQSTAT: return quota_getxstate(sb, addr); case Q_XGETQSTATV: diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index af33cafe69b6..2ad1b9822e92 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -100,15 +100,35 @@ xfs_fs_set_xstate( if (!XFS_IS_QUOTA_ON(mp)) return -EINVAL; return -xfs_qm_scall_quotaoff(mp, flags); - case Q_XQUOTARM: - if (XFS_IS_QUOTA_ON(mp)) - return -EINVAL; - return -xfs_qm_scall_trunc_qfiles(mp, flags); } return -EINVAL; } +STATIC int +xfs_fs_rm_xquota( + struct super_block *sb, + unsigned int uflags) +{ + struct xfs_mount *mp = XFS_M(sb); + unsigned int flags = 0; + + if (sb->s_flags & MS_RDONLY) + return -EROFS; + + if (XFS_IS_QUOTA_ON(mp)) + return -EINVAL; + + if (uflags & FS_USER_QUOTA) + flags |= XFS_DQ_USER; + if (uflags & FS_GROUP_QUOTA) + flags |= XFS_DQ_GROUP; + if (uflags & FS_USER_QUOTA) + flags |= XFS_DQ_PROJ; + + return -xfs_qm_scall_trunc_qfiles(mp, flags); +} + STATIC int xfs_fs_get_dqblk( struct super_block *sb, @@ -149,6 +169,7 @@ const struct quotactl_ops xfs_quotactl_operations = { .get_xstatev = xfs_fs_get_xstatev, .get_xstate = xfs_fs_get_xstate, .set_xstate = xfs_fs_set_xstate, + .rm_xquota = xfs_fs_rm_xquota, .get_dqblk = xfs_fs_get_dqblk, .set_dqblk = xfs_fs_set_dqblk, }; diff --git a/include/linux/quota.h b/include/linux/quota.h index cc7494a35429..0f3c5d38da1f 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -329,6 +329,7 @@ struct quotactl_ops { int (*get_xstate)(struct super_block *, struct fs_quota_stat *); int (*set_xstate)(struct super_block *, unsigned int, int); int (*get_xstatev)(struct super_block *, struct fs_quota_statv *); + int (*rm_xquota)(struct super_block *, unsigned int); }; struct quota_format_type { -- cgit v1.2.3-71-gd317 From 0c4972ccaa27620fe4281ac5c8c536978a563345 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Thu, 1 May 2014 10:17:27 +0300 Subject: mac80211: set an external flag for TDLS stations Expose a new tdls flag for the public ieee80211_sta struct. This can be used in some rate control decisions. Signed-off-by: Arik Nemtsov Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ net/mac80211/cfg.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 451c1bf00df9..bdb4a7cbab31 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1374,6 +1374,7 @@ struct ieee80211_sta_rates { * the station moves to associated state. * @smps_mode: current SMPS mode (off, static or dynamic) * @rates: rate control selection table + * @tdls: indicates whether the STA is a TDLS peer */ struct ieee80211_sta { u32 supp_rates[IEEE80211_NUM_BANDS]; @@ -1388,6 +1389,7 @@ struct ieee80211_sta { enum ieee80211_sta_rx_bandwidth bandwidth; enum ieee80211_smps_mode smps_mode; struct ieee80211_sta_rates __rcu *rates; + bool tdls; /* must be last */ u8 drv_priv[0] __aligned(sizeof(void *)); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 7b8d3cf89574..d8b236633ca3 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1459,6 +1459,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) { sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); + } else { + sta->sta.tdls = true; } err = sta_apply_parameters(local, sta, params); -- cgit v1.2.3-71-gd317 From 8757ad65d30f009fe0beeb2d70d3cd834cb998f2 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 11 Apr 2014 10:37:39 -0400 Subject: NVMe: Update copyright headers Make the copyright dates accurate and remove the final paragraph that includes the address of the FSF. Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 4 ---- drivers/block/nvme-scsi.c | 6 +----- include/linux/nvme.h | 6 +----- include/uapi/linux/nvme.h | 6 +----- 4 files changed, 3 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 7c64fa756cce..eacd64e36be3 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -10,10 +10,6 @@ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ #include diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 2c3f5be06da1..da3b252dea6f 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -1,6 +1,6 @@ /* * NVM Express device driver - * Copyright (c) 2011, Intel Corporation. + * Copyright (c) 2011-2014, Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -10,10 +10,6 @@ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ /* diff --git a/include/linux/nvme.h b/include/linux/nvme.h index a50173ca1d72..cfd084cab22b 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1,6 +1,6 @@ /* * Definitions for the NVM Express interface - * Copyright (c) 2011-2013, Intel Corporation. + * Copyright (c) 2011-2014, Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -10,10 +10,6 @@ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ #ifndef _LINUX_NVME_H diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h index 096fe1c6f83d..ad9014e49693 100644 --- a/include/uapi/linux/nvme.h +++ b/include/uapi/linux/nvme.h @@ -1,6 +1,6 @@ /* * Definitions for the NVM Express interface - * Copyright (c) 2011-2013, Intel Corporation. + * Copyright (c) 2011-2014, Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -10,10 +10,6 @@ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ #ifndef _UAPI_LINUX_NVME_H -- cgit v1.2.3-71-gd317 From a7d2ce2832d84e0182585f63bf96ca7323b3aee7 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 29 Apr 2014 11:41:28 -0600 Subject: NVMe: Configure support for block flush This configures an nvme request_queue as flush capable if the device has a volatile write cache present. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 3 +++ include/linux/nvme.h | 1 + include/uapi/linux/nvme.h | 1 + 3 files changed, 5 insertions(+) (limited to 'include') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 025dd4cad4a6..e7c4fdb6a651 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1897,6 +1897,8 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid, blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); if (dev->max_hw_sectors) blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors); + if (dev->vwc & NVME_CTRL_VWC_PRESENT) + blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA); disk->major = nvme_major; disk->first_minor = 0; @@ -2201,6 +2203,7 @@ static int nvme_dev_add(struct nvme_dev *dev) nn = le32_to_cpup(&ctrl->nn); dev->oncs = le16_to_cpup(&ctrl->oncs); dev->abort_limit = ctrl->acl + 1; + dev->vwc = ctrl->vwc; memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index cfd084cab22b..6266373d3147 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -99,6 +99,7 @@ struct nvme_dev { u32 stripe_size; u16 oncs; u16 abort_limit; + u8 vwc; u8 initialized; }; diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h index ad9014e49693..f090336d5bad 100644 --- a/include/uapi/linux/nvme.h +++ b/include/uapi/linux/nvme.h @@ -73,6 +73,7 @@ enum { NVME_CTRL_ONCS_COMPARE = 1 << 0, NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1, NVME_CTRL_ONCS_DSM = 1 << 2, + NVME_CTRL_VWC_PRESENT = 1 << 0, }; struct nvme_lbaf { -- cgit v1.2.3-71-gd317 From 53562be74bd06bbe74d2acf3caca5398f8eeb160 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 29 Apr 2014 11:41:29 -0600 Subject: NVMe: Flush with data support It is possible a filesystem may send a flush flagged bio with write data. There is no such composite NVMe command, so the driver sends flush and write separately. The device is allowed to execute these commands in any order, so it was possible the driver ends the bio after the write completes, but while the flush is still active. We don't want to let a filesystem believe flush succeeded before it really has; this could cause data corruption on a power loss between these events. To fix, this patch splits the flush and write into chained bios. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 44 ++++++++++++++++++++++++-------------------- include/linux/nvme.h | 1 - 2 files changed, 24 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index e7c4fdb6a651..cd8a8bc711cc 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -197,16 +197,13 @@ static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx, #define CMD_CTX_CANCELLED (0x30C + CMD_CTX_BASE) #define CMD_CTX_COMPLETED (0x310 + CMD_CTX_BASE) #define CMD_CTX_INVALID (0x314 + CMD_CTX_BASE) -#define CMD_CTX_FLUSH (0x318 + CMD_CTX_BASE) -#define CMD_CTX_ABORT (0x31C + CMD_CTX_BASE) +#define CMD_CTX_ABORT (0x318 + CMD_CTX_BASE) static void special_completion(struct nvme_queue *nvmeq, void *ctx, struct nvme_completion *cqe) { if (ctx == CMD_CTX_CANCELLED) return; - if (ctx == CMD_CTX_FLUSH) - return; if (ctx == CMD_CTX_ABORT) { ++nvmeq->dev->abort_limit; return; @@ -629,16 +626,6 @@ static int nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns, return 0; } -int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns) -{ - int cmdid = alloc_cmdid(nvmeq, (void *)CMD_CTX_FLUSH, - special_completion, NVME_IO_TIMEOUT); - if (unlikely(cmdid < 0)) - return cmdid; - - return nvme_submit_flush(nvmeq, ns, cmdid); -} - static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod) { struct bio *bio = iod->private; @@ -654,7 +641,7 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod) if (bio->bi_rw & REQ_DISCARD) return nvme_submit_discard(nvmeq, ns, bio, iod, cmdid); - if ((bio->bi_rw & REQ_FLUSH) && !iod->nents) + if (bio->bi_rw & REQ_FLUSH) return nvme_submit_flush(nvmeq, ns, cmdid); control = 0; @@ -688,6 +675,26 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod) return 0; } +static int nvme_split_flush_data(struct nvme_queue *nvmeq, struct bio *bio) +{ + struct bio *split = bio_clone(bio, GFP_ATOMIC); + if (!split) + return -ENOMEM; + + split->bi_iter.bi_size = 0; + split->bi_phys_segments = 0; + bio->bi_rw &= ~REQ_FLUSH; + bio_chain(split, bio); + + if (!waitqueue_active(&nvmeq->sq_full)) + add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); + bio_list_add(&nvmeq->sq_cong, split); + bio_list_add(&nvmeq->sq_cong, bio); + wake_up_process(nvme_thread); + + return 0; +} + /* * Called with local interrupts disabled and the q_lock held. May not sleep. */ @@ -698,11 +705,8 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, int psegs = bio_phys_segments(ns->queue, bio); int result; - if ((bio->bi_rw & REQ_FLUSH) && psegs) { - result = nvme_submit_flush_data(nvmeq, ns); - if (result) - return result; - } + if ((bio->bi_rw & REQ_FLUSH) && psegs) + return nvme_split_flush_data(nvmeq, bio); iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC); if (!iod) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 6266373d3147..1813cfdb7e80 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -156,7 +156,6 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, void nvme_unmap_user_pages(struct nvme_dev *dev, int write, struct nvme_iod *iod); int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_command *, u32 *); -int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns); int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *, u32 *result); int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns, -- cgit v1.2.3-71-gd317 From 07064c6e022ba8dc0c86ce12f7851a1de24e04fc Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 2 May 2014 16:28:03 -0700 Subject: net: Allow csum_add to be provided in arch csum_add is really nothing more then add-with-carry which can be implemented efficiently in some architectures. Allow architecture to define this protected by HAVE_ARCH_CSUM_ADD. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/checksum.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/checksum.h b/include/net/checksum.h index a28f4e0f6251..87cb1903640d 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -57,12 +57,14 @@ static __inline__ __wsum csum_and_copy_to_user } #endif +#ifndef HAVE_ARCH_CSUM_ADD static inline __wsum csum_add(__wsum csum, __wsum addend) { u32 res = (__force u32)csum; res += (__force u32)addend; return (__force __wsum)(res + (res < (__force u32)addend)); } +#endif static inline __wsum csum_sub(__wsum csum, __wsum addend) { -- cgit v1.2.3-71-gd317 From 76ba0aae673075c77a8b775e9133c8e8b1a44563 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 2 May 2014 16:29:18 -0700 Subject: net: Generalize checksum_init functions Create a general __skb_checksum_validate function (actually a macro) to subsume the various checksum_init functions. This function can either init the checksum, or do the full validation (logically checksum_init+skb_check_complete)-- a flag specifies if full vaidation is performed. Also, there is a flag to the function to indicate that zero checksums are allowed (to support optional UDP checksums). Added several stub functions for calling __skb_checksum_validate. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 08074a810164..3ca0dda5a42e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2741,6 +2741,99 @@ static inline __sum16 skb_checksum_complete(struct sk_buff *skb) 0 : __skb_checksum_complete(skb); } +/* Check if we need to perform checksum complete validation. + * + * Returns true if checksum complete is needed, false otherwise + * (either checksum is unnecessary or zero checksum is allowed). + */ +static inline bool __skb_checksum_validate_needed(struct sk_buff *skb, + bool zero_okay, + __sum16 check) +{ + if (skb_csum_unnecessary(skb)) { + return false; + } else if (zero_okay && !check) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + return false; + } + + return true; +} + +/* For small packets <= CHECKSUM_BREAK peform checksum complete directly + * in checksum_init. + */ +#define CHECKSUM_BREAK 76 + +/* Validate (init) checksum based on checksum complete. + * + * Return values: + * 0: checksum is validated or try to in skb_checksum_complete. In the latter + * case the ip_summed will not be CHECKSUM_UNNECESSARY and the pseudo + * checksum is stored in skb->csum for use in __skb_checksum_complete + * non-zero: value of invalid checksum + * + */ +static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb, + bool complete, + __wsum psum) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) { + if (!csum_fold(csum_add(psum, skb->csum))) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + return 0; + } + } + + skb->csum = psum; + + if (complete || skb->len <= CHECKSUM_BREAK) + return __skb_checksum_complete(skb); + + return 0; +} + +static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto) +{ + return 0; +} + +/* Perform checksum validate (init). Note that this is a macro since we only + * want to calculate the pseudo header which is an input function if necessary. + * First we try to validate without any computation (checksum unnecessary) and + * then calculate based on checksum complete calling the function to compute + * pseudo header. + * + * Return values: + * 0: checksum is validated or try to in skb_checksum_complete + * non-zero: value of invalid checksum + */ +#define __skb_checksum_validate(skb, proto, complete, \ + zero_okay, check, compute_pseudo) \ +({ \ + __sum16 __ret = 0; \ + if (__skb_checksum_validate_needed(skb, zero_okay, check)) \ + __ret = __skb_checksum_validate_complete(skb, \ + complete, compute_pseudo(skb, proto)); \ + __ret; \ +}) + +#define skb_checksum_init(skb, proto, compute_pseudo) \ + __skb_checksum_validate(skb, proto, false, false, 0, compute_pseudo) + +#define skb_checksum_init_zero_check(skb, proto, check, compute_pseudo) \ + __skb_checksum_validate(skb, proto, false, true, check, compute_pseudo) + +#define skb_checksum_validate(skb, proto, compute_pseudo) \ + __skb_checksum_validate(skb, proto, true, false, 0, compute_pseudo) + +#define skb_checksum_validate_zero_check(skb, proto, check, \ + compute_pseudo) \ + __skb_checksum_validate_(skb, proto, true, true, check, compute_pseudo) + +#define skb_checksum_simple_validate(skb) \ + __skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo) + #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) void nf_conntrack_destroy(struct nf_conntrack *nfct); static inline void nf_conntrack_put(struct nf_conntrack *nfct) -- cgit v1.2.3-71-gd317 From ed70fcfcee953a76028bfc3f963d2167c2990020 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 2 May 2014 16:29:38 -0700 Subject: net: Call skb_checksum_init in IPv4 Call skb_checksum_init instead of private functions. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/ip.h | 6 ++++++ net/ipv4/tcp_ipv4.c | 25 ++----------------------- net/ipv4/udp.c | 19 ++----------------- 3 files changed, 10 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 3ec2b0fb9d83..1988cefdbb70 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -342,6 +342,12 @@ static inline void ip_select_ident_more(struct sk_buff *skb, struct dst_entry *d __ip_select_ident(iph, dst, more); } +static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto) +{ + return csum_tcpudp_nofold(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, + skb->len, proto, 0); +} + /* * Map a multicast IP onto multicast MAC for type ethernet. */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 438f3b95143d..ad166dcc278f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1744,28 +1744,6 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) return sk; } -static __sum16 tcp_v4_checksum_init(struct sk_buff *skb) -{ - const struct iphdr *iph = ip_hdr(skb); - - if (skb->ip_summed == CHECKSUM_COMPLETE) { - if (!tcp_v4_check(skb->len, iph->saddr, - iph->daddr, skb->csum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - return 0; - } - } - - skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb->len, IPPROTO_TCP, 0); - - if (skb->len <= 76) { - return __skb_checksum_complete(skb); - } - return 0; -} - - /* The socket must have it's spinlock held when we get * here. * @@ -1960,7 +1938,8 @@ int tcp_v4_rcv(struct sk_buff *skb) * Packet length and doff are validated by header prediction, * provided case of th->doff==0 is eliminated. * So, we defer the checks. */ - if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb)) + + if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo)) goto csum_error; th = tcp_hdr(skb); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 4468e1adc094..f2d05d7be743 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1672,7 +1672,6 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) { - const struct iphdr *iph; int err; UDP_SKB_CB(skb)->partial_cov = 0; @@ -1684,22 +1683,8 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, return err; } - iph = ip_hdr(skb); - if (uh->check == 0) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - } else if (skb->ip_summed == CHECKSUM_COMPLETE) { - if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, - proto, skb->csum)) - skb->ip_summed = CHECKSUM_UNNECESSARY; - } - if (!skb_csum_unnecessary(skb)) - skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb->len, proto, 0); - /* Probably, we should checksum udp header (it should be in cache - * in any case) and data in tiny packets (< rx copybreak). - */ - - return 0; + return skb_checksum_init_zero_check(skb, proto, uh->check, + inet_compute_pseudo); } /* -- cgit v1.2.3-71-gd317 From e4f45b7f40bdaade5ef8f45e7c6daed4c909fdf5 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 2 May 2014 16:29:51 -0700 Subject: net: Call skb_checksum_init in IPv6 Call skb_checksum_init instead of private functions. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/ip6_checksum.h | 7 +++++++ net/ipv6/ip6_checksum.c | 11 +---------- net/ipv6/tcp_ipv6.c | 21 +-------------------- 3 files changed, 9 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h index 9e3c540c1b11..8ac5c21f8456 100644 --- a/include/net/ip6_checksum.h +++ b/include/net/ip6_checksum.h @@ -41,6 +41,13 @@ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, __wsum csum); #endif +static inline __wsum ip6_compute_pseudo(struct sk_buff *skb, int proto) +{ + return ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + skb->len, proto, 0)); +} + static __inline__ __sum16 tcp_v6_check(int len, const struct in6_addr *saddr, const struct in6_addr *daddr, diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index ee7a97f510cb..c69fe37b8104 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -84,16 +84,7 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) &ipv6_hdr(skb)->daddr, ntohs(uh->dest)); return 1; } - if (skb->ip_summed == CHECKSUM_COMPLETE && - !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, - skb->len, proto, skb->csum)) - skb->ip_summed = CHECKSUM_UNNECESSARY; - if (!skb_csum_unnecessary(skb)) - skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, - skb->len, proto, 0)); - - return 0; + return skb_checksum_init(skb, IPPROTO_UDP, ip6_compute_pseudo); } EXPORT_SYMBOL(udp6_csum_init); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e289830ed6e3..7fa67439f4d6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1294,25 +1294,6 @@ out: return NULL; } -static __sum16 tcp_v6_checksum_init(struct sk_buff *skb) -{ - if (skb->ip_summed == CHECKSUM_COMPLETE) { - if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, skb->csum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - return 0; - } - } - - skb->csum = ~csum_unfold(tcp_v6_check(skb->len, - &ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, 0)); - - if (skb->len <= 76) - return __skb_checksum_complete(skb); - return 0; -} - /* The socket must have it's spinlock held when we get * here. * @@ -1486,7 +1467,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, th->doff*4)) goto discard_it; - if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb)) + if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) goto csum_error; th = tcp_hdr(skb); -- cgit v1.2.3-71-gd317 From 86aae6c7b577654b7293374973985a153e0c147e Mon Sep 17 00:00:00 2001 From: Libor Pechacek Date: Tue, 29 Apr 2014 20:38:34 +0200 Subject: Bluetooth: Convert RFCOMM spinlocks into mutexes Enabling CONFIG_DEBUG_ATOMIC_SLEEP has shown that some rfcomm functions acquiring spinlocks call sleeping locks further in the chain. Converting the offending spinlocks into mutexes makes sleeping safe. Signed-off-by: Libor Pechacek Signed-off-by: Marcel Holtmann --- include/net/bluetooth/rfcomm.h | 6 +++--- net/bluetooth/rfcomm/core.c | 2 +- net/bluetooth/rfcomm/tty.c | 20 ++++++++++---------- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index 2611cc389d7d..578b83127af1 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -173,7 +173,7 @@ struct rfcomm_dlc { struct sk_buff_head tx_queue; struct timer_list timer; - spinlock_t lock; + struct mutex lock; unsigned long state; unsigned long flags; atomic_t refcnt; @@ -244,8 +244,8 @@ int rfcomm_dlc_get_modem_status(struct rfcomm_dlc *d, u8 *v24_sig); void rfcomm_dlc_accept(struct rfcomm_dlc *d); struct rfcomm_dlc *rfcomm_dlc_exists(bdaddr_t *src, bdaddr_t *dst, u8 channel); -#define rfcomm_dlc_lock(d) spin_lock(&d->lock) -#define rfcomm_dlc_unlock(d) spin_unlock(&d->lock) +#define rfcomm_dlc_lock(d) mutex_lock(&d->lock) +#define rfcomm_dlc_unlock(d) mutex_unlock(&d->lock) static inline void rfcomm_dlc_hold(struct rfcomm_dlc *d) { diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 633cceeb943e..05c609b12a1d 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -307,7 +307,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio) setup_timer(&d->timer, rfcomm_dlc_timeout, (unsigned long)d); skb_queue_head_init(&d->tx_queue); - spin_lock_init(&d->lock); + mutex_init(&d->lock); atomic_set(&d->refcnt, 1); rfcomm_dlc_clear_state(d); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 403ec09f480a..8e385a0ae60e 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -70,7 +70,7 @@ struct rfcomm_dev { }; static LIST_HEAD(rfcomm_dev_list); -static DEFINE_SPINLOCK(rfcomm_dev_lock); +static DEFINE_MUTEX(rfcomm_dev_lock); static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb); static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err); @@ -96,9 +96,9 @@ static void rfcomm_dev_destruct(struct tty_port *port) if (dev->tty_dev) tty_unregister_device(rfcomm_tty_driver, dev->id); - spin_lock(&rfcomm_dev_lock); + mutex_lock(&rfcomm_dev_lock); list_del(&dev->list); - spin_unlock(&rfcomm_dev_lock); + mutex_unlock(&rfcomm_dev_lock); kfree(dev); @@ -161,14 +161,14 @@ static struct rfcomm_dev *rfcomm_dev_get(int id) { struct rfcomm_dev *dev; - spin_lock(&rfcomm_dev_lock); + mutex_lock(&rfcomm_dev_lock); dev = __rfcomm_dev_lookup(id); if (dev && !tty_port_get(&dev->port)) dev = NULL; - spin_unlock(&rfcomm_dev_lock); + mutex_unlock(&rfcomm_dev_lock); return dev; } @@ -224,7 +224,7 @@ static struct rfcomm_dev *__rfcomm_dev_add(struct rfcomm_dev_req *req, if (!dev) return ERR_PTR(-ENOMEM); - spin_lock(&rfcomm_dev_lock); + mutex_lock(&rfcomm_dev_lock); if (req->dev_id < 0) { dev->id = 0; @@ -305,11 +305,11 @@ static struct rfcomm_dev *__rfcomm_dev_add(struct rfcomm_dev_req *req, holds reference to this module. */ __module_get(THIS_MODULE); - spin_unlock(&rfcomm_dev_lock); + mutex_unlock(&rfcomm_dev_lock); return dev; out: - spin_unlock(&rfcomm_dev_lock); + mutex_unlock(&rfcomm_dev_lock); kfree(dev); return ERR_PTR(err); } @@ -524,7 +524,7 @@ static int rfcomm_get_dev_list(void __user *arg) di = dl->dev_info; - spin_lock(&rfcomm_dev_lock); + mutex_lock(&rfcomm_dev_lock); list_for_each_entry(dev, &rfcomm_dev_list, list) { if (!tty_port_get(&dev->port)) @@ -540,7 +540,7 @@ static int rfcomm_get_dev_list(void __user *arg) break; } - spin_unlock(&rfcomm_dev_lock); + mutex_unlock(&rfcomm_dev_lock); dl->dev_num = n; size = sizeof(*dl) + n * sizeof(*di); -- cgit v1.2.3-71-gd317 From bdffd893a0e9c431304142d12d9a0a21d365c502 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 29 Apr 2014 14:17:40 -0500 Subject: tracing: Replace __get_cpu_var uses with this_cpu_ptr Replace uses of &__get_cpu_var for address calculation with this_cpu_ptr. Link: http://lkml.kernel.org/p/alpine.DEB.2.10.1404291415560.18364@gentwo.org Acked-by: Masami Hiramatsu Signed-off-by: Christoph Lameter Signed-off-by: Steven Rostedt --- include/linux/kprobes.h | 2 +- kernel/trace/ftrace.c | 4 ++-- kernel/trace/trace.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 925eaf28fca9..7bd2ad01e39c 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -355,7 +355,7 @@ static inline void reset_current_kprobe(void) static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void) { - return (&__get_cpu_var(kprobe_ctlblk)); + return this_cpu_ptr(&kprobe_ctlblk); } int register_kprobe(struct kprobe *p); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 9eb1aa03a18d..38e5cf73b9ae 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -822,7 +822,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip, local_irq_save(flags); - stat = &__get_cpu_var(ftrace_profile_stats); + stat = this_cpu_ptr(&ftrace_profile_stats); if (!stat->hash || !ftrace_profile_enabled) goto out; @@ -853,7 +853,7 @@ static void profile_graph_return(struct ftrace_graph_ret *trace) unsigned long flags; local_irq_save(flags); - stat = &__get_cpu_var(ftrace_profile_stats); + stat = this_cpu_ptr(&ftrace_profile_stats); if (!stat->hash || !ftrace_profile_enabled) goto out; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4c392c8238bf..05431696b10c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1726,7 +1726,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer, */ barrier(); if (use_stack == 1) { - trace.entries = &__get_cpu_var(ftrace_stack).calls[0]; + trace.entries = this_cpu_ptr(ftrace_stack.calls); trace.max_entries = FTRACE_STACK_MAX_ENTRIES; if (regs) -- cgit v1.2.3-71-gd317 From 59af6928d2099479c0bc2ef3f66cc7b33998120a Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Wed, 9 Apr 2014 15:10:59 +0200 Subject: mac80211: fix CSA tx queue stopping It was possible for tx queues to be stuck stopped if AP CSA finalization failed. In that case neither stop_ap nor do_stop woke the queues up. This means it was impossible to perform tx at all until driver was reloaded or a successful CSA was performed later. It was possible to solve this in a simpler manner however this is more robust and future proof (having multi-vif CSA in mind). New sdata->csa_block_tx is introduced to keep track of which interfaces requested tx to be blocked for CSA. This is required because mac80211 stops all tx queues for that purpose. This means queues must be awoken only when last tx-blocking CSA interface is finished. It is still possible to have tx queues stopped after CSA failure but as soon as offending interfaces are stopped from userspace (stop_ap or ifdown) tx queues are woken up properly. Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++- net/mac80211/cfg.c | 77 ++++++++++++++++++++++++++++++++++++++-------- net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/iface.c | 7 +++++ net/mac80211/mlme.c | 37 ++++++++++++++++------ 5 files changed, 104 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index bdb4a7cbab31..3541c48a97cd 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1113,7 +1113,9 @@ enum ieee80211_vif_flags { * @addr: address of this interface * @p2p: indicates whether this AP or STA interface is a p2p * interface, i.e. a GO or p2p-sta respectively - * @csa_active: marks whether a channel switch is going on + * @csa_active: marks whether a channel switch is going on. Internally it is + * write-protected by sdata_lock and local->mtx so holding either is fine + * for read access. * @driver_flags: flags/capabilities the driver has for this interface, * these need to be set (or cleared) when the interface is added * or, if supported by the driver, the interface type is changed diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 19a7e6ff45d3..f789c3198af4 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1084,6 +1084,31 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, return 0; } +bool ieee80211_csa_needs_block_tx(struct ieee80211_local *local) +{ + struct ieee80211_sub_if_data *sdata; + + lockdep_assert_held(&local->mtx); + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (!ieee80211_sdata_running(sdata)) + continue; + + if (!sdata->vif.csa_active) + continue; + + if (!sdata->csa_block_tx) + continue; + + rcu_read_unlock(); + return true; + } + rcu_read_unlock(); + + return false; +} + static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -1101,7 +1126,14 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) old_probe_resp = sdata_dereference(sdata->u.ap.probe_resp, sdata); /* abort any running channel switch */ + mutex_lock(&local->mtx); sdata->vif.csa_active = false; + if (!ieee80211_csa_needs_block_tx(local)) + ieee80211_wake_queues_by_reason(&local->hw, + IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_CSA); + mutex_unlock(&local->mtx); + kfree(sdata->u.ap.next_beacon); sdata->u.ap.next_beacon = NULL; @@ -3027,11 +3059,10 @@ static void ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) int err, changed = 0; sdata_assert_lock(sdata); + lockdep_assert_held(&local->mtx); - mutex_lock(&local->mtx); sdata->radar_required = sdata->csa_radar_required; err = ieee80211_vif_change_channel(sdata, &changed); - mutex_unlock(&local->mtx); if (WARN_ON(err < 0)) return; @@ -3072,11 +3103,12 @@ static void ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) ieee80211_bss_info_change_notify(sdata, changed); - ieee80211_wake_queues_by_reason(&sdata->local->hw, + cfg80211_ch_switch_notify(sdata->dev, &sdata->csa_chandef); + + if (!ieee80211_csa_needs_block_tx(local)) + ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); - - cfg80211_ch_switch_notify(sdata->dev, &sdata->csa_chandef); } void ieee80211_csa_finalize_work(struct work_struct *work) @@ -3084,8 +3116,11 @@ void ieee80211_csa_finalize_work(struct work_struct *work) struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, csa_finalize_work); + struct ieee80211_local *local = sdata->local; sdata_lock(sdata); + mutex_lock(&local->mtx); + /* AP might have been stopped while waiting for the lock. */ if (!sdata->vif.csa_active) goto unlock; @@ -3096,6 +3131,7 @@ void ieee80211_csa_finalize_work(struct work_struct *work) ieee80211_csa_finalize(sdata); unlock: + mutex_unlock(&local->mtx); sdata_unlock(sdata); } @@ -3222,8 +3258,8 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, return 0; } -int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_csa_settings *params) +int __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_csa_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -3232,6 +3268,7 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, int err, num_chanctx, changed = 0; sdata_assert_lock(sdata); + lockdep_assert_held(&local->mtx); if (!list_empty(&local->roc_list) || local->scanning) return -EBUSY; @@ -3274,15 +3311,15 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, return err; sdata->csa_radar_required = params->radar_required; - - if (params->block_tx) - ieee80211_stop_queues_by_reason(&local->hw, - IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->csa_chandef = params->chandef; + sdata->csa_block_tx = params->block_tx; sdata->vif.csa_active = true; + if (sdata->csa_block_tx) + ieee80211_stop_queues_by_reason(&local->hw, + IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_CSA); + if (changed) { ieee80211_bss_info_change_notify(sdata, changed); drv_channel_switch_beacon(sdata, ¶ms->chandef); @@ -3294,6 +3331,20 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, return 0; } +int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_csa_settings *params) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + int err; + + mutex_lock(&local->mtx); + err = __ieee80211_channel_switch(wiphy, dev, params); + mutex_unlock(&local->mtx); + + return err; +} + static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_mgmt_tx_params *params, u64 *cookie) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f86d06aaf54b..f4ba0c4a4314 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -756,6 +756,7 @@ struct ieee80211_sub_if_data { int csa_counter_offset_beacon; int csa_counter_offset_presp; bool csa_radar_required; + bool csa_block_tx; /* write-protected by sdata_lock and local->mtx */ struct cfg80211_chan_def csa_chandef; struct list_head assigned_chanctx_list; /* protected by chanctx_mtx */ @@ -1472,6 +1473,7 @@ void ieee80211_sw_roc_work(struct work_struct *work); void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc); /* channel switch handling */ +bool ieee80211_csa_needs_block_tx(struct ieee80211_local *local); void ieee80211_csa_finalize_work(struct work_struct *work); int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_csa_settings *params); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 7fff3dcaac43..79fc98815da8 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -838,8 +838,15 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, cancel_work_sync(&sdata->recalc_smps); sdata_lock(sdata); + mutex_lock(&local->mtx); sdata->vif.csa_active = false; + if (!ieee80211_csa_needs_block_tx(local)) + ieee80211_wake_queues_by_reason(&local->hw, + IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_CSA); + mutex_unlock(&local->mtx); sdata_unlock(sdata); + cancel_work_sync(&sdata->csa_finalize_work); cancel_delayed_work_sync(&sdata->dfs_cac_timer_work); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 488826f188a7..d68e73cbdcd6 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -975,16 +975,20 @@ static void ieee80211_chswitch_work(struct work_struct *work) /* XXX: shouldn't really modify cfg80211-owned data! */ ifmgd->associated->channel = sdata->csa_chandef.chan; + ieee80211_bss_info_change_notify(sdata, changed); + + mutex_lock(&local->mtx); + sdata->vif.csa_active = false; /* XXX: wait for a beacon first? */ - ieee80211_wake_queues_by_reason(&local->hw, + if (!ieee80211_csa_needs_block_tx(local)) + ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); + mutex_unlock(&local->mtx); - ieee80211_bss_info_change_notify(sdata, changed); - - out: - sdata->vif.csa_active = false; ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; + +out: sdata_unlock(sdata); } @@ -1100,12 +1104,16 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, mutex_unlock(&local->chanctx_mtx); sdata->csa_chandef = csa_ie.chandef; + + mutex_lock(&local->mtx); sdata->vif.csa_active = true; + sdata->csa_block_tx = csa_ie.mode; - if (csa_ie.mode) + if (sdata->csa_block_tx) ieee80211_stop_queues_by_reason(&local->hw, - IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_CSA); + IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_CSA); + mutex_unlock(&local->mtx); if (local->ops->channel_switch) { /* use driver's channel switch callback */ @@ -1817,6 +1825,12 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ifmgd->flags = 0; mutex_lock(&local->mtx); ieee80211_vif_release_channel(sdata); + + sdata->vif.csa_active = false; + if (!ieee80211_csa_needs_block_tx(local)) + ieee80211_wake_queues_by_reason(&local->hw, + IEEE80211_MAX_QUEUE_MAP, + IEEE80211_QUEUE_STOP_REASON_CSA); mutex_unlock(&local->mtx); sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; @@ -2045,6 +2059,7 @@ EXPORT_SYMBOL(ieee80211_ap_probereq_get); static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) { + struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; @@ -2058,10 +2073,14 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, true, frame_buf); ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; + + mutex_lock(&local->mtx); sdata->vif.csa_active = false; - ieee80211_wake_queues_by_reason(&sdata->local->hw, + if (!ieee80211_csa_needs_block_tx(local)) + ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); + mutex_unlock(&local->mtx); cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); -- cgit v1.2.3-71-gd317 From f04c22033c25f71617ac62bcfe75698baa17a0b8 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Wed, 9 Apr 2014 15:11:01 +0200 Subject: cfg80211: export interface stopping function This exports a new cfg80211_stop_iface() function. This is intended for driver internal interface combination management and channel switching. Due to locking issues (it re-enters driver) the call is asynchronous and uses cfg80211 event list/worker. Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 15 +++++++++++++++ net/wireless/ap.c | 4 ++-- net/wireless/core.c | 43 ++++++++++++++++++++++++++++++++++++------- net/wireless/core.h | 7 +++++++ net/wireless/mesh.c | 4 ++-- net/wireless/trace.h | 15 +++++++++++++++ net/wireless/util.c | 3 +++ 7 files changed, 80 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7eae46ccec01..0631230b01eb 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4756,6 +4756,21 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, void *data), void *data); +/* + * cfg80211_stop_iface - trigger interface disconnection + * + * @wiphy: the wiphy + * @wdev: wireless device + * @gfp: context flags + * + * Trigger interface to be stopped as if AP was stopped, IBSS/mesh left, STA + * disconnected. + * + * Note: This doesn't need any locks and is asynchronous. + */ +void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev, + gfp_t gfp); + /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 3e02ade508d8..bdad1f951561 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -6,8 +6,8 @@ #include "rdev-ops.h" -static int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, - struct net_device *dev, bool notify) +int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, + struct net_device *dev, bool notify) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; diff --git a/net/wireless/core.c b/net/wireless/core.c index f509da4d9be9..7e023b74f009 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -792,23 +792,23 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, rdev->num_running_monitor_ifaces += num; } -void cfg80211_leave(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev) +void __cfg80211_leave(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) { struct net_device *dev = wdev->netdev; ASSERT_RTNL(); + ASSERT_WDEV_LOCK(wdev); switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: - cfg80211_leave_ibss(rdev, dev, true); + __cfg80211_leave_ibss(rdev, dev, true); break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: if (rdev->sched_scan_req && dev == rdev->sched_scan_req->dev) __cfg80211_stop_sched_scan(rdev, false); - wdev_lock(wdev); #ifdef CONFIG_CFG80211_WEXT kfree(wdev->wext.ie); wdev->wext.ie = NULL; @@ -817,20 +817,49 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, #endif cfg80211_disconnect(rdev, dev, WLAN_REASON_DEAUTH_LEAVING, true); - wdev_unlock(wdev); break; case NL80211_IFTYPE_MESH_POINT: - cfg80211_leave_mesh(rdev, dev); + __cfg80211_leave_mesh(rdev, dev); break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - cfg80211_stop_ap(rdev, dev, true); + __cfg80211_stop_ap(rdev, dev, true); break; default: break; } } +void cfg80211_leave(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + wdev_lock(wdev); + __cfg80211_leave(rdev, wdev); + wdev_unlock(wdev); +} + +void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev, + gfp_t gfp) +{ + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct cfg80211_event *ev; + unsigned long flags; + + trace_cfg80211_stop_iface(wiphy, wdev); + + ev = kzalloc(sizeof(*ev), gfp); + if (!ev) + return; + + ev->type = EVENT_STOPPED; + + spin_lock_irqsave(&wdev->event_lock, flags); + list_add_tail(&ev->list, &wdev->event_list); + spin_unlock_irqrestore(&wdev->event_lock, flags); + queue_work(cfg80211_wq, &rdev->event_work); +} +EXPORT_SYMBOL(cfg80211_stop_iface); + static int cfg80211_netdev_notifier_call(struct notifier_block *nb, unsigned long state, void *ptr) { diff --git a/net/wireless/core.h b/net/wireless/core.h index 681b8fa4355b..e9afbf10e756 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -185,6 +185,7 @@ enum cfg80211_event_type { EVENT_ROAMED, EVENT_DISCONNECTED, EVENT_IBSS_JOINED, + EVENT_STOPPED, }; struct cfg80211_event { @@ -281,6 +282,8 @@ int cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev, struct mesh_setup *setup, const struct mesh_config *conf); +int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, + struct net_device *dev); int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev); int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, @@ -288,6 +291,8 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, struct cfg80211_chan_def *chandef); /* AP */ +int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, + struct net_device *dev, bool notify); int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, struct net_device *dev, bool notify); @@ -441,6 +446,8 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, int num); +void __cfg80211_leave(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev); void cfg80211_leave(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 3ddfb7cd335e..092300b30c37 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -238,8 +238,8 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, return 0; } -static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, - struct net_device *dev) +int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, + struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index f3c13ff4d04c..cdfbb00e1b37 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2636,6 +2636,21 @@ TRACE_EVENT(cfg80211_ft_event, WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(target_ap)) ); +TRACE_EVENT(cfg80211_stop_iface, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), + TP_ARGS(wiphy, wdev), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, + WIPHY_PR_ARG, WDEV_PR_ARG) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/wireless/util.c b/net/wireless/util.c index 7c47fa07b276..a756429b3a0a 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -839,6 +839,9 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev) __cfg80211_ibss_joined(wdev->netdev, ev->ij.bssid, ev->ij.channel); break; + case EVENT_STOPPED: + __cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev); + break; } wdev_unlock(wdev); -- cgit v1.2.3-71-gd317 From e7c24607b5d68a4cdc56e09d70a3c8bae5f0519f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 10 Apr 2014 20:54:51 -0400 Subject: kill iov_iter_copy_from_user() all callers can use copy_page_from_iter() and it actually simplifies them. Signed-off-by: Al Viro --- fs/ceph/file.c | 3 +-- fs/cifs/file.c | 7 +++---- include/linux/uio.h | 2 -- mm/iov_iter.c | 27 --------------------------- mm/process_vm_access.c | 6 +----- 5 files changed, 5 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 88a6df4cbe6d..ef9115e4a6fa 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -737,13 +737,12 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov, left = len; for (n = 0; n < num_pages; n++) { size_t plen = min_t(size_t, left, PAGE_SIZE); - ret = iov_iter_copy_from_user(pages[n], &i, 0, plen); + ret = copy_page_from_iter(pages[n], 0, plen, &i); if (ret != plen) { ret = -EFAULT; break; } left -= ret; - iov_iter_advance(&i, ret); } if (ret < 0) { diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 5ed03e0b8b40..2900d150654e 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2444,11 +2444,10 @@ cifs_iovec_write(struct file *file, const struct iovec *iov, save_len = cur_len; for (i = 0; i < nr_pages; i++) { - bytes = min_t(const size_t, cur_len, PAGE_SIZE); - copied = iov_iter_copy_from_user(wdata->pages[i], &it, - 0, bytes); + bytes = min_t(size_t, cur_len, PAGE_SIZE); + copied = copy_page_from_iter(wdata->pages[i], 0, bytes, + &it); cur_len -= copied; - iov_iter_advance(&it, copied); /* * If we didn't copy as much as we expected, then that * may mean we trod into an unmapped area. Stop copying diff --git a/include/linux/uio.h b/include/linux/uio.h index 199bcc34241b..abbe83ded630 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -62,8 +62,6 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to); size_t iov_iter_copy_from_user_atomic(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes); -size_t iov_iter_copy_from_user(struct page *page, - struct iov_iter *i, unsigned long offset, size_t bytes); void iov_iter_advance(struct iov_iter *i, size_t bytes); int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); size_t iov_iter_single_seg_count(const struct iov_iter *i); diff --git a/mm/iov_iter.c b/mm/iov_iter.c index 10e46cd721de..22ec1ef068a8 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c @@ -129,33 +129,6 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, } EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); -/* - * This has the same sideeffects and return value as - * iov_iter_copy_from_user_atomic(). - * The difference is that it attempts to resolve faults. - * Page must not be locked. - */ -size_t iov_iter_copy_from_user(struct page *page, - struct iov_iter *i, unsigned long offset, size_t bytes) -{ - char *kaddr; - size_t copied; - - kaddr = kmap(page); - if (likely(i->nr_segs == 1)) { - int left; - char __user *buf = i->iov->iov_base + i->iov_offset; - left = __copy_from_user(kaddr + offset, buf, bytes); - copied = bytes - left; - } else { - copied = __iovec_copy_from_user_inatomic(kaddr + offset, - i->iov, i->iov_offset, bytes); - } - kunmap(page); - return copied; -} -EXPORT_SYMBOL(iov_iter_copy_from_user); - void iov_iter_advance(struct iov_iter *i, size_t bytes) { BUG_ON(i->count < bytes); diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index 8505c9262b35..f32b1fbbfe69 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -46,11 +46,7 @@ static int process_vm_rw_pages(struct page **pages, copy = len; if (vm_write) { - if (copy > iov_iter_count(iter)) - copy = iov_iter_count(iter); - copied = iov_iter_copy_from_user(page, iter, - offset, copy); - iov_iter_advance(iter, copied); + copied = copy_page_from_iter(page, offset, copy, iter); set_page_dirty_lock(page); } else { copied = copy_page_to_iter(page, offset, copy, iter); -- cgit v1.2.3-71-gd317 From f8579f8673b7ecdb7a81d5d5bb1d981093d9aa94 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 3 Mar 2014 22:03:20 -0500 Subject: generic_file_direct_write(): switch to iov_iter Signed-off-by: Al Viro --- fs/btrfs/file.c | 6 ++---- fs/fuse/file.c | 6 ++---- fs/ocfs2/file.c | 6 +++--- fs/xfs/xfs_file.c | 5 +++-- include/linux/fs.h | 4 ++-- mm/filemap.c | 15 +++++++-------- 6 files changed, 19 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ae6af072b635..9fe20c2052af 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1669,15 +1669,13 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, loff_t endbyte; int err; - written = generic_file_direct_write(iocb, iov, &nr_segs, pos, - count, ocount); + iov_iter_init(&i, iov, nr_segs, count, 0); + written = generic_file_direct_write(iocb, &i, pos, count, ocount); if (written < 0 || written == count) return written; pos += written; - count -= written; - iov_iter_init(&i, iov, nr_segs, count, written); written_buffered = __btrfs_buffered_write(file, &i, pos); if (written_buffered < 0) { err = written_buffered; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 96d513e01a5d..126deb5d0a9c 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1235,15 +1235,13 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, goto out; if (file->f_flags & O_DIRECT) { - written = generic_file_direct_write(iocb, iov, &nr_segs, pos, - count, ocount); + iov_iter_init(&i, iov, nr_segs, count, 0); + written = generic_file_direct_write(iocb, &i, pos, count, ocount); if (written < 0 || written == count) goto out; pos += written; - count -= written; - iov_iter_init(&i, iov, nr_segs, count, written); written_buffered = fuse_perform_write(file, mapping, &i, pos); if (written_buffered < 0) { err = written_buffered; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8970dcf74de5..d6d78c2aa96e 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2251,6 +2251,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, int full_coherency = !(osb->s_mount_opt & OCFS2_MOUNT_COHERENCY_BUFFERED); int unaligned_dio = 0; + struct iov_iter from; trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry, (unsigned long long)OCFS2_I(inode)->ip_blkno, @@ -2365,16 +2366,15 @@ relock: if (ret) goto out_dio; + iov_iter_init(&from, iov, nr_segs, count, 0); if (direct_io) { - written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, + written = generic_file_direct_write(iocb, &from, *ppos, count, ocount); if (written < 0) { ret = written; goto out_dio; } } else { - struct iov_iter from; - iov_iter_init(&from, iov, nr_segs, count, 0); current->backing_dev_info = file->f_mapping->backing_dev_info; written = generic_perform_write(file, &from, *ppos); if (likely(written >= 0)) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 951a2321ee01..8617497867c7 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -641,6 +641,7 @@ xfs_file_dio_aio_write( int iolock; struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; + struct iov_iter from; /* DIO must be aligned to device logical sector size */ if ((pos | count) & target->bt_logical_sectormask) @@ -698,8 +699,8 @@ xfs_file_dio_aio_write( } trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); - ret = generic_file_direct_write(iocb, iovp, - &nr_segs, pos, count, ocount); + iov_iter_init(&from, iovp, nr_segs, count, 0); + ret = generic_file_direct_write(iocb, &from, pos, count, ocount); out: xfs_rw_iunlock(ip, iolock); diff --git a/include/linux/fs.h b/include/linux/fs.h index 878031227c57..262f96e579b8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2407,8 +2407,8 @@ int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isbl extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long); extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); -extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, - unsigned long *, loff_t, size_t, size_t); +extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, + loff_t, size_t, size_t); extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); diff --git a/mm/filemap.c b/mm/filemap.c index 000a220e2a41..a840890ed39f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2385,9 +2385,8 @@ int pagecache_write_end(struct file *file, struct address_space *mapping, EXPORT_SYMBOL(pagecache_write_end); ssize_t -generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long *nr_segs, loff_t pos, - size_t count, size_t ocount) +generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, + loff_t pos, size_t count, size_t ocount) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -2397,9 +2396,9 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, pgoff_t end; if (count != ocount) - *nr_segs = iov_shorten((struct iovec *)iov, *nr_segs, count); + from->nr_segs = iov_shorten((struct iovec *)from->iov, from->nr_segs, count); - write_len = iov_length(iov, *nr_segs); + write_len = iov_length(from->iov, from->nr_segs); end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT; written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1); @@ -2426,7 +2425,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, } } - written = mapping->a_ops->direct_IO(WRITE, iocb, iov, pos, *nr_segs); + written = mapping->a_ops->direct_IO(WRITE, iocb, from->iov, pos, from->nr_segs); /* * Finally, try again to invalidate clean pages which might have been @@ -2443,6 +2442,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, if (written > 0) { pos += written; + iov_iter_advance(from, written); if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) { i_size_write(inode, pos); mark_inode_dirty(inode); @@ -2645,11 +2645,10 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (unlikely(file->f_flags & O_DIRECT)) { loff_t endbyte; - written = generic_file_direct_write(iocb, iov, &from.nr_segs, pos, + written = generic_file_direct_write(iocb, &from, pos, count, ocount); if (written < 0 || written == count) goto out; - iov_iter_advance(&from, written); /* * direct-io write to a hole: fall through to buffered I/O -- cgit v1.2.3-71-gd317 From cb66a7a1f149ff705fa37cad6d1252b046e0ad4f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 4 Mar 2014 15:24:06 -0500 Subject: kill generic_segment_checks() all callers of ->aio_read() and ->aio_write() have iov/nr_segs already checked - generic_segment_checks() done after that is just an odd way to spell iov_length(). Signed-off-by: Al Viro --- drivers/staging/lustre/lustre/llite/file.c | 10 ++---- fs/btrfs/file.c | 7 +--- fs/ceph/file.c | 13 ++------ fs/fuse/file.c | 7 +--- fs/ntfs/file.c | 5 +-- fs/ocfs2/file.c | 7 +--- fs/xfs/xfs_file.c | 9 ++--- include/linux/fs.h | 2 -- mm/filemap.c | 53 ++---------------------------- mm/shmem.c | 7 ++-- 10 files changed, 16 insertions(+), 104 deletions(-) (limited to 'include') diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c index 8e844a6371e0..220bd8390a84 100644 --- a/drivers/staging/lustre/lustre/llite/file.c +++ b/drivers/staging/lustre/lustre/llite/file.c @@ -1180,9 +1180,7 @@ static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov, ssize_t result; int refcheck; - result = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); - if (result) - return result; + count = iov_length(iov, nr_segs); env = cl_env_get(&refcheck); if (IS_ERR(env)) @@ -1235,14 +1233,10 @@ static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov, { struct lu_env *env; struct vvp_io_args *args; - size_t count = 0; + size_t count = iov_length(iov, nr_segs); ssize_t result; int refcheck; - result = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); - if (result) - return result; - env = cl_env_get(&refcheck); if (IS_ERR(env)) return PTR_ERR(env); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1dafe0701daf..a0a94a30d85a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1726,12 +1726,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, mutex_lock(&inode->i_mutex); - err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); - if (err) { - mutex_unlock(&inode->i_mutex); - goto out; - } - count = ocount; + count = ocount = iov_length(iov, nr_segs); current->backing_dev_info = inode->i_mapping->backing_dev_info; err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index ef9115e4a6fa..21a56c27b74c 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -828,12 +828,8 @@ again: inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, ceph_cap_string(got)); - if (!read) { - ret = generic_segment_checks(iov, &nr_segs, - &len, VERIFY_WRITE); - if (ret) - goto out; - } + if (!read) + len = iov_length(iov, nr_segs); iov_iter_init(&i, iov, nr_segs, len, read); @@ -855,7 +851,6 @@ again: ret = generic_file_aio_read(iocb, iov, nr_segs, pos); } -out: dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); ceph_put_cap_refs(ci, got); @@ -911,9 +906,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, mutex_lock(&inode->i_mutex); - err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); - if (err) - goto out; + count = iov_length(iov, nr_segs); /* We can write back this queue in page reclaim */ current->backing_dev_info = file->f_mapping->backing_dev_info; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 126deb5d0a9c..9c7f346879e7 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1208,12 +1208,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, WARN_ON(iocb->ki_pos != pos); - ocount = 0; - err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); - if (err) - return err; - - count = ocount; + count = ocount = iov_length(iov, nr_segs); mutex_lock(&inode->i_mutex); /* We can write back this queue in page reclaim */ diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index db9bd8a31725..b6fa457d8d01 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2091,10 +2091,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, size_t count; /* after file limit checks */ ssize_t written, err; - count = 0; - err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); - if (err) - return err; + count = iov_length(iov, nr_segs); pos = *ppos; /* We can write back this queue in page reclaim. */ current->backing_dev_info = mapping->backing_dev_info; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index d6d78c2aa96e..d33c4ced0baf 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2355,12 +2355,7 @@ relock: /* communicate with ocfs2_dio_end_io */ ocfs2_iocb_set_rw_locked(iocb, rw_level); - ret = generic_segment_checks(iov, &nr_segs, &ocount, - VERIFY_READ); - if (ret) - goto out_dio; - - count = ocount; + count = ocount = iov_length(iov, nr_segs); ret = generic_write_checks(file, ppos, &count, S_ISBLK(inode->i_mode)); if (ret) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 8617497867c7..f0f8084a67be 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -253,9 +253,7 @@ xfs_file_aio_read( if (file->f_mode & FMODE_NOCMTIME) ioflags |= IO_INVIS; - ret = generic_segment_checks(iovp, &nr_segs, &size, VERIFY_WRITE); - if (ret < 0) - return ret; + size = iov_length(iovp, nr_segs); if (unlikely(ioflags & IO_ISDIRECT)) { xfs_buftarg_t *target = @@ -777,10 +775,7 @@ xfs_file_aio_write( BUG_ON(iocb->ki_pos != pos); - ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); - if (ret) - return ret; - + ocount = iov_length(iovp, nr_segs); if (ocount == 0) return 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index 262f96e579b8..796de742fe4a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2412,8 +2412,6 @@ extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); -extern int generic_segment_checks(const struct iovec *iov, - unsigned long *nr_segs, size_t *count, int access_flags); /* fs/block_dev.c */ extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, diff --git a/mm/filemap.c b/mm/filemap.c index a840890ed39f..7c1417b0bd7b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1663,45 +1663,6 @@ out: return written ? written : error; } -/* - * Performs necessary checks before doing a write - * @iov: io vector request - * @nr_segs: number of segments in the iovec - * @count: number of bytes to write - * @access_flags: type of access: %VERIFY_READ or %VERIFY_WRITE - * - * Adjust number of segments and amount of bytes to write (nr_segs should be - * properly initialized first). Returns appropriate error code that caller - * should return or zero in case that write should be allowed. - */ -int generic_segment_checks(const struct iovec *iov, - unsigned long *nr_segs, size_t *count, int access_flags) -{ - unsigned long seg; - size_t cnt = 0; - for (seg = 0; seg < *nr_segs; seg++) { - const struct iovec *iv = &iov[seg]; - - /* - * If any segment has a negative length, or the cumulative - * length ever wraps negative then return -EINVAL. - */ - cnt += iv->iov_len; - if (unlikely((ssize_t)(cnt|iv->iov_len) < 0)) - return -EINVAL; - if (access_ok(access_flags, iv->iov_base, iv->iov_len)) - continue; - if (seg == 0) - return -EFAULT; - *nr_segs = seg; - cnt -= iv->iov_len; /* This segment is no good */ - break; - } - *count = cnt; - return 0; -} -EXPORT_SYMBOL(generic_segment_checks); - /** * generic_file_aio_read - generic filesystem read routine * @iocb: kernel I/O control block @@ -1717,15 +1678,12 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *filp = iocb->ki_filp; - ssize_t retval; + ssize_t retval = 0; size_t count; loff_t *ppos = &iocb->ki_pos; struct iov_iter i; - count = 0; - retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); - if (retval) - return retval; + count = iov_length(iov, nr_segs); iov_iter_init(&i, iov, nr_segs, count, 0); /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ @@ -2615,12 +2573,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, ssize_t status; struct iov_iter from; - ocount = 0; - err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); - if (err) - return err; - - count = ocount; + count = ocount = iov_length(iov, nr_segs); /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; diff --git a/mm/shmem.c b/mm/shmem.c index 9f70e02111c6..2a93e625adaf 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1412,14 +1412,11 @@ static ssize_t shmem_file_aio_read(struct kiocb *iocb, unsigned long offset; enum sgp_type sgp = SGP_READ; int error = 0; - ssize_t retval; - size_t count; + ssize_t retval = 0; + size_t count = iov_length(iov, nr_segs); loff_t *ppos = &iocb->ki_pos; struct iov_iter iter; - retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); - if (retval) - return retval; iov_iter_init(&iter, iov, nr_segs, count, 0); /* -- cgit v1.2.3-71-gd317 From d8d3d94b80aa1a1c0ca75c58b8abdc7356f38418 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 4 Mar 2014 21:27:34 -0500 Subject: pass iov_iter to ->direct_IO() unmodified, for now Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 3 +-- Documentation/filesystems/vfs.txt | 3 +-- drivers/staging/lustre/lustre/llite/rw26.c | 17 ++++++++--------- fs/9p/vfs_addr.c | 5 ++--- fs/block_dev.c | 9 +++++---- fs/btrfs/inode.c | 12 ++++++------ fs/ceph/addr.c | 4 ++-- fs/cifs/file.c | 4 ++-- fs/exofs/inode.c | 2 +- fs/ext2/inode.c | 11 ++++++----- fs/ext3/inode.c | 16 +++++++--------- fs/ext4/inode.c | 11 +++++------ fs/f2fs/data.c | 8 ++++---- fs/fat/inode.c | 13 +++++++------ fs/fuse/file.c | 10 +++++----- fs/gfs2/aops.c | 11 +++++------ fs/hfs/inode.c | 8 ++++---- fs/hfsplus/inode.c | 6 +++--- fs/jfs/inode.c | 8 ++++---- fs/nfs/direct.c | 8 ++++---- fs/nilfs2/inode.c | 10 +++++----- fs/ocfs2/aops.c | 7 +++---- fs/reiserfs/inode.c | 9 ++++----- fs/udf/file.c | 4 ++-- fs/udf/inode.c | 8 ++++---- fs/xfs/xfs_aops.c | 15 +++++++-------- include/linux/fs.h | 3 +-- include/linux/nfs_fs.h | 3 +-- mm/filemap.c | 9 ++++----- mm/page_io.c | 6 ++++-- 30 files changed, 117 insertions(+), 126 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index eba790134253..9b0d5a33c8bf 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -196,8 +196,7 @@ prototypes: void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, int); void (*freepage)(struct page *); - int (*direct_IO)(int, struct kiocb *, const struct iovec *iov, - loff_t offset, unsigned long nr_segs); + int (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset); int (*get_xip_mem)(struct address_space *, pgoff_t, int, void **, unsigned long *); int (*migratepage)(struct address_space *, struct page *, struct page *); diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 617f6d70c077..1846374a5add 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -589,8 +589,7 @@ struct address_space_operations { void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, int); void (*freepage)(struct page *); - ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, - loff_t offset, unsigned long nr_segs); + ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset); struct page* (*get_xip_page)(struct address_space *, sector_t, int); /* migrate the contents of a page to the specified target */ diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c index 7e3e0967993b..66e05c6f4d27 100644 --- a/drivers/staging/lustre/lustre/llite/rw26.c +++ b/drivers/staging/lustre/lustre/llite/rw26.c @@ -363,15 +363,14 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io, #define MAX_DIO_SIZE ((MAX_MALLOC / sizeof(struct brw_page) * PAGE_CACHE_SIZE) & \ ~(DT_MAX_BRW_SIZE - 1)) static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t file_offset, - unsigned long nr_segs) + struct iov_iter *iter, loff_t file_offset) { struct lu_env *env; struct cl_io *io; struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct ccc_object *obj = cl_inode2ccc(inode); - long count = iov_length(iov, nr_segs); + long count = iov_length(iter->iov, iter->nr_segs); long tot_bytes = 0, result = 0; struct ll_inode_info *lli = ll_i2info(inode); unsigned long seg = 0; @@ -392,9 +391,9 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, MAX_DIO_SIZE >> PAGE_CACHE_SHIFT); /* Check that all user buffers are aligned as well */ - for (seg = 0; seg < nr_segs; seg++) { - if (((unsigned long)iov[seg].iov_base & ~CFS_PAGE_MASK) || - (iov[seg].iov_len & ~CFS_PAGE_MASK)) + for (seg = 0; seg < iter->nr_segs; seg++) { + if (((unsigned long)iter->iov[seg].iov_base & ~CFS_PAGE_MASK) || + (iter->iov[seg].iov_len & ~CFS_PAGE_MASK)) return -EINVAL; } @@ -411,9 +410,9 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, mutex_lock(&inode->i_mutex); LASSERT(obj->cob_transient_pages == 0); - for (seg = 0; seg < nr_segs; seg++) { - long iov_left = iov[seg].iov_len; - unsigned long user_addr = (unsigned long)iov[seg].iov_base; + for (seg = 0; seg < iter->nr_segs; seg++) { + long iov_left = iter->iov[seg].iov_len; + unsigned long user_addr = (unsigned long)iter->iov[seg].iov_base; if (rw == READ) { if (file_offset >= i_size_read(inode)) diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index c71e88602ff4..cc1cfae726b3 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -259,8 +259,7 @@ static int v9fs_launder_page(struct page *page) * */ static ssize_t -v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t pos, unsigned long nr_segs) +v9fs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos) { /* * FIXME @@ -269,7 +268,7 @@ v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, */ p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) off/no(%lld/%lu) EINVAL\n", iocb->ki_filp->f_path.dentry->d_name.name, - (long long)pos, nr_segs); + (long long)pos, iter->nr_segs); return -EINVAL; } diff --git a/fs/block_dev.c b/fs/block_dev.c index 552a8d13bc32..938fc707d769 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -165,14 +165,15 @@ blkdev_get_block(struct inode *inode, sector_t iblock, } static ssize_t -blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t offset, unsigned long nr_segs) +blkdev_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset, - nr_segs, blkdev_get_block, NULL, NULL, 0); + return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iter->iov, + offset, iter->nr_segs, blkdev_get_block, + NULL, NULL, 0); } int __sync_blockdev(struct block_device *bdev, int wait) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5f805bc944fa..30a6cc51f32c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7433,8 +7433,7 @@ out: } static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; @@ -7444,8 +7443,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, bool relock = false; ssize_t ret; - if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, - offset, nr_segs)) + if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iter->iov, + offset, iter->nr_segs)) return 0; atomic_inc(&inode->i_dio_count); @@ -7457,7 +7456,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, * we need to flush the dirty pages again to make absolutely sure * that any outstanding dirty pages are on disk. */ - count = iov_length(iov, nr_segs); + count = iov_length(iter->iov, iter->nr_segs); if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &BTRFS_I(inode)->runtime_flags)) filemap_fdatawrite_range(inode->i_mapping, offset, count); @@ -7484,7 +7483,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, ret = __blockdev_direct_IO(rw, iocb, inode, BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, - iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, + iter->iov, offset, iter->nr_segs, + btrfs_get_blocks_direct, NULL, btrfs_submit_direct, flags); if (rw & WRITE) { if (ret < 0 && ret != -EIOCBQUEUED) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index b53278c9fd97..342ca5e423f9 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1187,8 +1187,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, * never get called. */ static ssize_t ceph_direct_io(int rw, struct kiocb *iocb, - const struct iovec *iov, - loff_t pos, unsigned long nr_segs) + struct iov_iter *iter, + loff_t pos) { WARN_ON(1); return -EINVAL; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 2900d150654e..a4ccc39e6c11 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3702,8 +3702,8 @@ void cifs_oplock_break(struct work_struct *work) * Direct IO is not yet supported in the cached mode. */ static ssize_t -cifs_direct_io(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t pos, unsigned long nr_segs) +cifs_direct_io(int rw, struct kiocb *iocb, struct iov_iter *iter, + loff_t pos) { /* * FIXME diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index d1c244d67667..3f9cafd73931 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -964,7 +964,7 @@ static void exofs_invalidatepage(struct page *page, unsigned int offset, /* TODO: Should be easy enough to do proprly */ static ssize_t exofs_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { return 0; } diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index b1d2a4675d42..47fbe760a7f8 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -850,18 +850,19 @@ static sector_t ext2_bmap(struct address_space *mapping, sector_t block) } static ssize_t -ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t offset, unsigned long nr_segs) +ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, - ext2_get_block); + ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, + iter->nr_segs, ext2_get_block); if (ret < 0 && (rw & WRITE)) - ext2_write_failed(mapping, offset + iov_length(iov, nr_segs)); + ext2_write_failed(mapping, offset + + iov_length(iter->iov, iter->nr_segs)); return ret; } diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index f5157d0d1b43..7a5c501dc31b 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1820,8 +1820,7 @@ static int ext3_releasepage(struct page *page, gfp_t wait) * VFS code falls back into buffered path in that case so we are safe. */ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; @@ -1829,10 +1828,10 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, handle_t *handle; ssize_t ret; int orphan = 0; - size_t count = iov_length(iov, nr_segs); + size_t count = iov_length(iter->iov, iter->nr_segs); int retries = 0; - trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); + trace_ext3_direct_IO_enter(inode, offset, count, rw); if (rw == WRITE) { loff_t final_size = offset + count; @@ -1856,15 +1855,15 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, } retry: - ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, - ext3_get_block); + ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, + iter->nr_segs, ext3_get_block); /* * In case of error extending write may have instantiated a few * blocks outside i_size. Trim these off again. */ if (unlikely((rw & WRITE) && ret < 0)) { loff_t isize = i_size_read(inode); - loff_t end = offset + iov_length(iov, nr_segs); + loff_t end = offset + count; if (end > isize) ext3_truncate_failed_direct_write(inode); @@ -1909,8 +1908,7 @@ retry: ret = err; } out: - trace_ext3_direct_IO_exit(inode, offset, - iov_length(iov, nr_segs), rw, ret); + trace_ext3_direct_IO_exit(inode, offset, count, rw, ret); return ret; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d7b7462a0e13..f51db730da39 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3222,8 +3222,7 @@ retake_lock: } static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; @@ -3239,13 +3238,13 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, if (ext4_has_inline_data(inode)) return 0; - trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); + trace_ext4_direct_IO_enter(inode, offset, iov_length(iter->iov, iter->nr_segs), rw); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) - ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); + ret = ext4_ext_direct_IO(rw, iocb, iter->iov, offset, iter->nr_segs); else - ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); + ret = ext4_ind_direct_IO(rw, iocb, iter->iov, offset, iter->nr_segs); trace_ext4_direct_IO_exit(inode, offset, - iov_length(iov, nr_segs), rw, ret); + iov_length(iter->iov, iter->nr_segs), rw, ret); return ret; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 45abd60e2bff..3a6ef121c095 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1010,7 +1010,7 @@ static int check_direct_IO(struct inode *inode, int rw, } static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; @@ -1019,11 +1019,11 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, if (f2fs_has_inline_data(inode)) return 0; - if (check_direct_IO(inode, rw, iov, offset, nr_segs)) + if (check_direct_IO(inode, rw, iter->iov, offset, iter->nr_segs)) return 0; - return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, - get_data_block); + return blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, + iter->nr_segs, get_data_block); } static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, diff --git a/fs/fat/inode.c b/fs/fat/inode.c index b3361fe2bcb5..d5237a199055 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -185,8 +185,8 @@ static int fat_write_end(struct file *file, struct address_space *mapping, } static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, - loff_t offset, unsigned long nr_segs) + struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -203,7 +203,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, * * Return 0, and fallback to normal buffered write. */ - loff_t size = offset + iov_length(iov, nr_segs); + loff_t size = offset + iov_length(iter->iov, iter->nr_segs); if (MSDOS_I(inode)->mmu_private < size) return 0; } @@ -212,10 +212,11 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, * FAT need to use the DIO_LOCKING for avoiding the race * condition of fat_get_block() and ->truncate(). */ - ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, - fat_get_block); + ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, + iter->nr_segs, fat_get_block); if (ret < 0 && (rw & WRITE)) - fat_write_failed(mapping, offset + iov_length(iov, nr_segs)); + fat_write_failed(mapping, offset + + iov_length(iter->iov, iter->nr_segs)); return ret; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 9c7f346879e7..17d96f36df15 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2890,8 +2890,8 @@ static inline loff_t fuse_round_up(loff_t off) } static ssize_t -fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t offset, unsigned long nr_segs) +fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { ssize_t ret = 0; struct file *file = iocb->ki_filp; @@ -2900,7 +2900,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos = 0; struct inode *inode; loff_t i_size; - size_t count = iov_length(iov, nr_segs); + size_t count = iov_length(iter->iov, iter->nr_segs); struct fuse_io_priv *io; pos = offset; @@ -2944,9 +2944,9 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, io->async = false; if (rw == WRITE) - ret = __fuse_direct_write(io, iov, nr_segs, &pos); + ret = __fuse_direct_write(io, iter->iov, iter->nr_segs, &pos); else - ret = __fuse_direct_read(io, iov, nr_segs, &pos, count); + ret = __fuse_direct_read(io, iter->iov, iter->nr_segs, &pos, count); if (io->async) { fuse_aio_complete(io, ret < 0 ? ret : 0, -1); diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index ce62dcac90b6..e84ddaa42104 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -1041,8 +1041,7 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; @@ -1082,7 +1081,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, */ if (mapping->nrpages) { loff_t lstart = offset & (PAGE_CACHE_SIZE - 1); - loff_t len = iov_length(iov, nr_segs); + loff_t len = iov_length(iter->iov, iter->nr_segs); loff_t end = PAGE_ALIGN(offset + len) - 1; rv = 0; @@ -1097,9 +1096,9 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, truncate_inode_pages_range(mapping, lstart, end); } - rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, - offset, nr_segs, gfs2_get_block_direct, - NULL, NULL, 0); + rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, + iter->iov, offset, iter->nr_segs, + gfs2_get_block_direct, NULL, NULL, 0); out: gfs2_glock_dq(&gh); gfs2_holder_uninit(&gh); diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 9e2fecd62f62..09cff13528c5 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -125,15 +125,15 @@ static int hfs_releasepage(struct page *page, gfp_t mask) } static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = file_inode(file)->i_mapping->host; ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, - hfs_get_block); + ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, + iter->nr_segs, hfs_get_block); /* * In case of error extending write may have instantiated a few @@ -141,7 +141,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, */ if (unlikely((rw & WRITE) && ret < 0)) { loff_t isize = i_size_read(inode); - loff_t end = offset + iov_length(iov, nr_segs); + loff_t end = offset + iov_length(iter->iov, iter->nr_segs); if (end > isize) hfs_write_failed(mapping, end); diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index a4f45bd88a63..7f894a5b5eaf 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -123,14 +123,14 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask) } static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = file_inode(file)->i_mapping->host; ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, + ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, iter->nr_segs, hfsplus_get_block); /* @@ -139,7 +139,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, */ if (unlikely((rw & WRITE) && ret < 0)) { loff_t isize = i_size_read(inode); - loff_t end = offset + iov_length(iov, nr_segs); + loff_t end = offset + iov_length(iter->iov, iter->nr_segs); if (end > isize) hfsplus_write_failed(mapping, end); diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 6f8fe72c2a7a..7052744d5107 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -331,15 +331,15 @@ static sector_t jfs_bmap(struct address_space *mapping, sector_t block) } static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = file->f_mapping->host; ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, - jfs_get_block); + ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, + iter->nr_segs, jfs_get_block); /* * In case of error extending write may have instantiated a few @@ -347,7 +347,7 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, */ if (unlikely((rw & WRITE) && ret < 0)) { loff_t isize = i_size_read(inode); - loff_t end = offset + iov_length(iov, nr_segs); + loff_t end = offset + iov_length(iter->iov, iter->nr_segs); if (end > isize) jfs_write_failed(mapping, end); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index b8797ae6831f..e9cde3935001 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -121,20 +121,20 @@ static inline int put_dreq(struct nfs_direct_req *dreq) * shunt off direct read and write requests before the VFS gets them, * so this method is only ever called for swap. */ -ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) +ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos) { #ifndef CONFIG_NFS_SWAP dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n", - iocb->ki_filp, (long long) pos, nr_segs); + iocb->ki_filp, (long long) pos, iter->nr_segs); return -EINVAL; #else VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); if (rw == READ || rw == KERNEL_READ) - return nfs_file_direct_read(iocb, iov, nr_segs, pos, + return nfs_file_direct_read(iocb, iter->iov, iter->nr_segs, pos, rw == READ ? true : false); - return nfs_file_direct_write(iocb, iov, nr_segs, pos, + return nfs_file_direct_write(iocb, iter->iov, iter->nr_segs, pos, rw == WRITE ? true : false); #endif /* CONFIG_NFS_SWAP */ } diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index b9c5726120e3..1c0e8fedc095 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -298,8 +298,8 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping, } static ssize_t -nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t offset, unsigned long nr_segs) +nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -310,8 +310,8 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, return 0; /* Needs synchronization with the cleaner */ - size = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, - nilfs_get_block); + size = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, + iter->nr_segs, nilfs_get_block); /* * In case of error extending write may have instantiated a few @@ -319,7 +319,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, */ if (unlikely((rw & WRITE) && size < 0)) { loff_t isize = i_size_read(inode); - loff_t end = offset + iov_length(iov, nr_segs); + loff_t end = offset + iov_length(iter->iov, iter->nr_segs); if (end > isize) nilfs_write_failed(mapping, end); diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index d310d12a9adc..799fd0afcb35 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -599,9 +599,8 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait) static ssize_t ocfs2_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, - loff_t offset, - unsigned long nr_segs) + struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file)->i_mapping->host; @@ -618,7 +617,7 @@ static ssize_t ocfs2_direct_IO(int rw, return 0; return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, - iov, offset, nr_segs, + iter->iov, offset, iter->nr_segs, ocfs2_direct_IO_get_blocks, ocfs2_dio_end_io, NULL, 0); } diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index bc8b8009897d..17bf4c41a509 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -3083,15 +3083,14 @@ static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags) /* We thank Mingming Cao for helping us understand in great detail what to do in this section of the code. */ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, - reiserfs_get_blocks_direct_io); + ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, + iter->nr_segs, reiserfs_get_blocks_direct_io); /* * In case of error extending write may have instantiated a few @@ -3099,7 +3098,7 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, */ if (unlikely((rw & WRITE) && ret < 0)) { loff_t isize = i_size_read(inode); - loff_t end = offset + iov_length(iov, nr_segs); + loff_t end = offset + iov_length(iter->iov, iter->nr_segs); if ((end > isize) && inode_newsize_ok(inode, isize) == 0) { truncate_setsize(inode, isize); diff --git a/fs/udf/file.c b/fs/udf/file.c index d2c170f8b035..ade886401658 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -119,8 +119,8 @@ static int udf_adinicb_write_end(struct file *file, } static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, - loff_t offset, unsigned long nr_segs) + struct iov_iter *iter, + loff_t offset) { /* Fallback to buffered I/O. */ return 0; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 5d643706212f..5b184c7f7dcb 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -217,18 +217,18 @@ static int udf_write_begin(struct file *file, struct address_space *mapping, } static ssize_t udf_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, - loff_t offset, unsigned long nr_segs) + struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, + ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, iter->nr_segs, udf_get_block); if (unlikely(ret < 0 && (rw & WRITE))) - udf_write_failed(mapping, offset + iov_length(iov, nr_segs)); + udf_write_failed(mapping, offset + iov_length(iter->iov, iter->nr_segs)); return ret; } diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 0479c32c5eb1..330d7b1c4be3 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1449,9 +1449,8 @@ STATIC ssize_t xfs_vm_direct_IO( int rw, struct kiocb *iocb, - const struct iovec *iov, - loff_t offset, - unsigned long nr_segs) + struct iov_iter *iter, + loff_t offset) { struct inode *inode = iocb->ki_filp->f_mapping->host; struct block_device *bdev = xfs_find_bdev_for_inode(inode); @@ -1459,7 +1458,7 @@ xfs_vm_direct_IO( ssize_t ret; if (rw & WRITE) { - size_t size = iov_length(iov, nr_segs); + size_t size = iov_length(iter->iov, iter->nr_segs); /* * We cannot preallocate a size update transaction here as we @@ -1471,16 +1470,16 @@ xfs_vm_direct_IO( if (offset + size > XFS_I(inode)->i_d.di_size) ioend->io_isdirect = 1; - ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, - offset, nr_segs, + ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter->iov, + offset, iter->nr_segs, xfs_get_blocks_direct, xfs_end_io_direct_write, NULL, DIO_ASYNC_EXTEND); if (ret != -EIOCBQUEUED && iocb->private) goto out_destroy_ioend; } else { - ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, - offset, nr_segs, + ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter->iov, + offset, iter->nr_segs, xfs_get_blocks_direct, NULL, NULL, 0); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 796de742fe4a..399a338c92b5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -343,8 +343,7 @@ struct address_space_operations { void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, gfp_t); void (*freepage)(struct page *); - ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, - loff_t offset, unsigned long nr_segs); + ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset); int (*get_xip_mem)(struct address_space *, pgoff_t, int, void **, unsigned long *); /* diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index fa6918b0f829..5a0d78ec739d 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -459,8 +459,7 @@ extern int nfs3_removexattr (struct dentry *, const char *name); /* * linux/fs/nfs/direct.c */ -extern ssize_t nfs_direct_IO(int, struct kiocb *, const struct iovec *, loff_t, - unsigned long); +extern ssize_t nfs_direct_IO(int, struct kiocb *, struct iov_iter *, loff_t); extern ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos, bool uio); diff --git a/mm/filemap.c b/mm/filemap.c index 7c1417b0bd7b..139641274f1e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1699,10 +1699,9 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, size = i_size_read(inode); retval = filemap_write_and_wait_range(mapping, pos, pos + iov_length(iov, nr_segs) - 1); - if (!retval) { - retval = mapping->a_ops->direct_IO(READ, iocb, - iov, pos, nr_segs); - } + if (!retval) + retval = mapping->a_ops->direct_IO(READ, iocb, &i, pos); + if (retval > 0) { *ppos = pos + retval; count -= retval; @@ -2383,7 +2382,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, } } - written = mapping->a_ops->direct_IO(WRITE, iocb, from->iov, pos, from->nr_segs); + written = mapping->a_ops->direct_IO(WRITE, iocb, from, pos); /* * Finally, try again to invalidate clean pages which might have been diff --git a/mm/page_io.c b/mm/page_io.c index 7c59ef681381..0ed0644c73db 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -263,16 +263,18 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, .iov_base = kmap(page), .iov_len = PAGE_SIZE, }; + struct iov_iter from; init_sync_kiocb(&kiocb, swap_file); kiocb.ki_pos = page_file_offset(page); kiocb.ki_nbytes = PAGE_SIZE; + iov_iter_init(&from, &iov, 1, PAGE_SIZE, 0); set_page_writeback(page); unlock_page(page); ret = mapping->a_ops->direct_IO(KERNEL_WRITE, - &kiocb, &iov, - kiocb.ki_pos, 1); + &kiocb, &from, + kiocb.ki_pos); kunmap(page); if (ret == PAGE_SIZE) { count_vm_event(PSWPOUT); -- cgit v1.2.3-71-gd317 From 619d30b4b8c488042b4a720ca79dccc346d1a516 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 4 Mar 2014 21:53:33 -0500 Subject: convert the guts of nfs_direct_IO() to iov_iter Signed-off-by: Al Viro --- fs/nfs/direct.c | 46 +++++++++++++++++++++------------------------- fs/nfs/file.c | 18 ++++++++++++------ include/linux/nfs_fs.h | 4 ++-- 3 files changed, 35 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e9cde3935001..21723149668b 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -132,9 +132,9 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); if (rw == READ || rw == KERNEL_READ) - return nfs_file_direct_read(iocb, iter->iov, iter->nr_segs, pos, + return nfs_file_direct_read(iocb, iter, pos, rw == READ ? true : false); - return nfs_file_direct_write(iocb, iter->iov, iter->nr_segs, pos, + return nfs_file_direct_write(iocb, iter, pos, rw == WRITE ? true : false); #endif /* CONFIG_NFS_SWAP */ } @@ -414,8 +414,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de } static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, - const struct iovec *iov, - unsigned long nr_segs, + struct iov_iter *iter, loff_t pos, bool uio) { struct nfs_pageio_descriptor desc; @@ -430,8 +429,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, desc.pg_dreq = dreq; atomic_inc(&inode->i_dio_count); - for (seg = 0; seg < nr_segs; seg++) { - const struct iovec *vec = &iov[seg]; + for (seg = 0; seg < iter->nr_segs; seg++) { + const struct iovec *vec = &iter->iov[seg]; result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio); if (result < 0) break; @@ -461,8 +460,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, /** * nfs_file_direct_read - file direct read operation for NFS files * @iocb: target I/O control block - * @iov: vector of user buffers into which to read data - * @nr_segs: size of iov vector + * @iter: vector of user buffers into which to read data * @pos: byte offset in file where reading starts * * We use this function for direct reads instead of calling @@ -479,8 +477,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * client must read the updated atime from the server back into its * cache. */ -ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos, bool uio) +ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, + loff_t pos, bool uio) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -490,7 +488,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, ssize_t result = -EINVAL; size_t count; - count = iov_length(iov, nr_segs); + count = iov_length(iter->iov, iter->nr_segs); nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n", @@ -513,7 +511,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, goto out_unlock; dreq->inode = inode; - dreq->bytes_left = iov_length(iov, nr_segs); + dreq->bytes_left = count; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); l_ctx = nfs_get_lock_context(dreq->ctx); if (IS_ERR(l_ctx)) { @@ -524,8 +522,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - NFS_I(inode)->read_io += iov_length(iov, nr_segs); - result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio); + NFS_I(inode)->read_io += count; + result = nfs_direct_read_schedule_iovec(dreq, iter, pos, uio); mutex_unlock(&inode->i_mutex); @@ -864,8 +862,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { }; static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, - const struct iovec *iov, - unsigned long nr_segs, + struct iov_iter *iter, loff_t pos, bool uio) { struct nfs_pageio_descriptor desc; @@ -880,9 +877,9 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, get_dreq(dreq); atomic_inc(&inode->i_dio_count); - NFS_I(dreq->inode)->write_io += iov_length(iov, nr_segs); - for (seg = 0; seg < nr_segs; seg++) { - const struct iovec *vec = &iov[seg]; + NFS_I(dreq->inode)->write_io += iov_length(iter->iov, iter->nr_segs); + for (seg = 0; seg < iter->nr_segs; seg++) { + const struct iovec *vec = &iter->iov[seg]; result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio); if (result < 0) break; @@ -911,8 +908,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, /** * nfs_file_direct_write - file direct write operation for NFS files * @iocb: target I/O control block - * @iov: vector of user buffers from which to write data - * @nr_segs: size of iov vector + * @iter: vector of user buffers from which to write data * @pos: byte offset in file where writing starts * * We use this function for direct writes instead of calling @@ -930,8 +926,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * Note that O_APPEND is not supported for NFS direct writes, as there * is no atomic O_APPEND write facility in the NFS protocol. */ -ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos, bool uio) +ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, + loff_t pos, bool uio) { ssize_t result = -EINVAL; struct file *file = iocb->ki_filp; @@ -942,7 +938,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, loff_t end; size_t count; - count = iov_length(iov, nr_segs); + count = iov_length(iter->iov, iter->nr_segs); end = (pos + count - 1) >> PAGE_CACHE_SHIFT; nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); @@ -993,7 +989,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio); + result = nfs_direct_write_schedule_iovec(dreq, iter, pos, uio); if (mapping->nrpages) { invalidate_inode_pages2_range(mapping, diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 284ca901fe16..3d01b152894e 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -169,14 +169,18 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct inode *inode = file_inode(iocb->ki_filp); + size_t count = iov_length(iov, nr_segs); ssize_t result; + struct iov_iter to; + + iov_iter_init(&to, iov, nr_segs, count, 0); if (iocb->ki_filp->f_flags & O_DIRECT) - return nfs_file_direct_read(iocb, iov, nr_segs, pos, true); + return nfs_file_direct_read(iocb, &to, pos, true); - dprintk("NFS: read(%pD2, %lu@%lu)\n", + dprintk("NFS: read(%pD2, %zu@%lu)\n", iocb->ki_filp, - (unsigned long) iov_length(iov, nr_segs), (unsigned long) pos); + count, (unsigned long) pos); result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); if (!result) { @@ -643,16 +647,18 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, unsigned long written = 0; ssize_t result; size_t count = iov_length(iov, nr_segs); + struct iov_iter from; + iov_iter_init(&from, iov, nr_segs, count, 0); result = nfs_key_timeout_notify(file, inode); if (result) return result; if (file->f_flags & O_DIRECT) - return nfs_file_direct_write(iocb, iov, nr_segs, pos, true); + return nfs_file_direct_write(iocb, &from, pos, true); - dprintk("NFS: write(%pD2, %lu@%Ld)\n", - file, (unsigned long) count, (long long) pos); + dprintk("NFS: write(%pD2, %zu@%Ld)\n", + file, count, (long long) pos); result = -EBUSY; if (IS_SWAPFILE(inode)) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 5a0d78ec739d..0a82b6fbae8a 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -461,10 +461,10 @@ extern int nfs3_removexattr (struct dentry *, const char *name); */ extern ssize_t nfs_direct_IO(int, struct kiocb *, struct iov_iter *, loff_t); extern ssize_t nfs_file_direct_read(struct kiocb *iocb, - const struct iovec *iov, unsigned long nr_segs, + struct iov_iter *iter, loff_t pos, bool uio); extern ssize_t nfs_file_direct_write(struct kiocb *iocb, - const struct iovec *iov, unsigned long nr_segs, + struct iov_iter *iter, loff_t pos, bool uio); /* -- cgit v1.2.3-71-gd317 From 31b140398ce56ab41646eda7f02bcb78d6a4c916 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 5 Mar 2014 01:33:16 -0500 Subject: switch {__,}blockdev_direct_IO() to iov_iter Signed-off-by: Al Viro --- fs/block_dev.c | 4 ++-- fs/btrfs/inode.c | 3 +-- fs/direct-io.c | 33 ++++++++++++++++----------------- fs/ext2/inode.c | 3 +-- fs/ext3/inode.c | 3 +-- fs/ext4/indirect.c | 7 +++---- fs/ext4/inode.c | 4 ++-- fs/f2fs/data.c | 4 ++-- fs/fat/inode.c | 3 +-- fs/gfs2/aops.c | 2 +- fs/hfs/inode.c | 3 +-- fs/hfsplus/inode.c | 2 +- fs/jfs/inode.c | 3 +-- fs/nilfs2/inode.c | 4 ++-- fs/ocfs2/aops.c | 2 +- fs/reiserfs/inode.c | 4 ++-- fs/udf/inode.c | 3 +-- fs/xfs/xfs_aops.c | 10 ++++------ include/linux/fs.h | 12 ++++++------ 19 files changed, 49 insertions(+), 60 deletions(-) (limited to 'include') diff --git a/fs/block_dev.c b/fs/block_dev.c index 938fc707d769..937e3011ed58 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -171,8 +171,8 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iter->iov, - offset, iter->nr_segs, blkdev_get_block, + return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iter, + offset, blkdev_get_block, NULL, NULL, 0); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c46a025d0c4b..b0b8fa0efba3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7483,8 +7483,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, ret = __blockdev_direct_IO(rw, iocb, inode, BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, - iter->iov, offset, iter->nr_segs, - btrfs_get_blocks_direct, NULL, + iter, offset, btrfs_get_blocks_direct, NULL, btrfs_submit_direct, flags); if (rw & WRITE) { if (ret < 0 && ret != -EIOCBQUEUED) diff --git a/fs/direct-io.c b/fs/direct-io.c index 31ba0935e32e..1c677899b989 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1107,8 +1107,8 @@ static inline int drop_refcount(struct dio *dio) */ static inline ssize_t do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, - struct block_device *bdev, const struct iovec *iov, loff_t offset, - unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, + struct block_device *bdev, struct iov_iter *iter, loff_t offset, + get_block_t get_block, dio_iodone_t end_io, dio_submit_t submit_io, int flags) { int seg; @@ -1143,9 +1143,9 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, } /* Check the memory alignment. Blocks cannot straddle pages */ - for (seg = 0; seg < nr_segs; seg++) { - addr = (unsigned long)iov[seg].iov_base; - size = iov[seg].iov_len; + for (seg = 0; seg < iter->nr_segs; seg++) { + addr = (unsigned long)iter->iov[seg].iov_base; + size = iter->iov[seg].iov_len; end += size; if (unlikely((addr & blocksize_mask) || (size & blocksize_mask))) { @@ -1256,18 +1256,18 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, if (unlikely(sdio.blkfactor)) sdio.pages_in_io = 2; - for (seg = 0; seg < nr_segs; seg++) { - user_addr = (unsigned long)iov[seg].iov_base; + for (seg = 0; seg < iter->nr_segs; seg++) { + user_addr = (unsigned long)iter->iov[seg].iov_base; sdio.pages_in_io += - ((user_addr + iov[seg].iov_len + PAGE_SIZE-1) / + ((user_addr + iter->iov[seg].iov_len + PAGE_SIZE-1) / PAGE_SIZE - user_addr / PAGE_SIZE); } blk_start_plug(&plug); - for (seg = 0; seg < nr_segs; seg++) { - user_addr = (unsigned long)iov[seg].iov_base; - sdio.size += bytes = iov[seg].iov_len; + for (seg = 0; seg < iter->nr_segs; seg++) { + user_addr = (unsigned long)iter->iov[seg].iov_base; + sdio.size += bytes = iter->iov[seg].iov_len; /* Index into the first page of the first block */ sdio.first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits; @@ -1288,7 +1288,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, retval = do_direct_IO(dio, &sdio, &map_bh); - dio->result += iov[seg].iov_len - + dio->result += iter->iov[seg].iov_len - ((sdio.final_block_in_request - sdio.block_in_file) << blkbits); @@ -1365,8 +1365,8 @@ out: ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, - struct block_device *bdev, const struct iovec *iov, loff_t offset, - unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, + struct block_device *bdev, struct iov_iter *iter, loff_t offset, + get_block_t get_block, dio_iodone_t end_io, dio_submit_t submit_io, int flags) { /* @@ -1381,9 +1381,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, prefetch(bdev->bd_queue); prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES); - return do_blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, - nr_segs, get_block, end_io, - submit_io, flags); + return do_blockdev_direct_IO(rw, iocb, inode, bdev, iter, offset, + get_block, end_io, submit_io, flags); } EXPORT_SYMBOL(__blockdev_direct_IO); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 116e809aa7cb..36d35c36311d 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -859,8 +859,7 @@ ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, - iter->nr_segs, ext2_get_block); + ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext2_get_block); if (ret < 0 && (rw & WRITE)) ext2_write_failed(mapping, offset + count); return ret; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 8582ae2c80b0..4d32133a76c4 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1855,8 +1855,7 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, } retry: - ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, - iter->nr_segs, ext3_get_block); + ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext3_get_block); /* * In case of error extending write may have instantiated a few * blocks outside i_size. Trim these off again. diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 123898a6af05..8a57e9fcd1b9 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -686,14 +686,13 @@ retry: goto locked; } ret = __blockdev_direct_IO(rw, iocb, inode, - inode->i_sb->s_bdev, iter->iov, - offset, iter->nr_segs, + inode->i_sb->s_bdev, iter, offset, ext4_get_block, NULL, NULL, 0); inode_dio_done(inode); } else { locked: - ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, - offset, iter->nr_segs, ext4_get_block); + ret = blockdev_direct_IO(rw, iocb, inode, iter, + offset, ext4_get_block); if (unlikely((rw & WRITE) && ret < 0)) { loff_t isize = i_size_read(inode); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2b993579a968..e5718385a037 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3166,8 +3166,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, dio_flags = DIO_LOCKING; } ret = __blockdev_direct_IO(rw, iocb, inode, - inode->i_sb->s_bdev, iter->iov, - offset, iter->nr_segs, + inode->i_sb->s_bdev, iter, + offset, get_block_func, ext4_end_io_dio, NULL, diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3a6ef121c095..151488f27755 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1022,8 +1022,8 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, if (check_direct_IO(inode, rw, iter->iov, offset, iter->nr_segs)) return 0; - return blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, - iter->nr_segs, get_data_block); + return blockdev_direct_IO(rw, iocb, inode, iter, offset, + get_data_block); } static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 154a6f9d3189..385cce464e82 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -213,8 +213,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, * FAT need to use the DIO_LOCKING for avoiding the race * condition of fat_get_block() and ->truncate(). */ - ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, - iter->nr_segs, fat_get_block); + ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, fat_get_block); if (ret < 0 && (rw & WRITE)) fat_write_failed(mapping, offset + count); diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 228a12d2afa9..910838951d66 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -1097,7 +1097,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, } rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, - iter->iov, offset, iter->nr_segs, + iter, offset, gfs2_get_block_direct, NULL, NULL, 0); out: gfs2_glock_dq(&gh); diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index dc69e8f31581..f5fb09ebc850 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -133,8 +133,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, - iter->nr_segs, hfs_get_block); + ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, hfs_get_block); /* * In case of error extending write may have instantiated a few diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index e6b1251af47a..76b930ff58ae 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -131,7 +131,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, iter->nr_segs, + ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, hfsplus_get_block); /* diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 6cde5928693b..bd3df1ca3c9b 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -339,8 +339,7 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, - iter->nr_segs, jfs_get_block); + ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, jfs_get_block); /* * In case of error extending write may have instantiated a few diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 7aaf913e8709..6252b173a465 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -311,8 +311,8 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, return 0; /* Needs synchronization with the cleaner */ - size = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, - iter->nr_segs, nilfs_get_block); + size = blockdev_direct_IO(rw, iocb, inode, iter, offset, + nilfs_get_block); /* * In case of error extending write may have instantiated a few diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 799fd0afcb35..4a231a166cf8 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -617,7 +617,7 @@ static ssize_t ocfs2_direct_IO(int rw, return 0; return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, - iter->iov, offset, iter->nr_segs, + iter, offset, ocfs2_direct_IO_get_blocks, ocfs2_dio_end_io, NULL, 0); } diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 723affe921f1..b8003e8dd1f4 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -3090,8 +3090,8 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, - iter->nr_segs, reiserfs_get_blocks_direct_io); + ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, + reiserfs_get_blocks_direct_io); /* * In case of error extending write may have instantiated a few diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 28984baf6194..236cd48184c2 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -226,8 +226,7 @@ static ssize_t udf_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, iter->nr_segs, - udf_get_block); + ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, udf_get_block); if (unlikely(ret < 0 && (rw & WRITE))) udf_write_failed(mapping, offset + count); return ret; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 6462b3186784..08d13e395252 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1470,17 +1470,15 @@ xfs_vm_direct_IO( if (offset + size > XFS_I(inode)->i_d.di_size) ioend->io_isdirect = 1; - ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter->iov, - offset, iter->nr_segs, - xfs_get_blocks_direct, + ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter, + offset, xfs_get_blocks_direct, xfs_end_io_direct_write, NULL, DIO_ASYNC_EXTEND); if (ret != -EIOCBQUEUED && iocb->private) goto out_destroy_ioend; } else { - ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter->iov, - offset, iter->nr_segs, - xfs_get_blocks_direct, + ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter, + offset, xfs_get_blocks_direct, NULL, NULL, 0); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 399a338c92b5..946a9484844f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2474,16 +2474,16 @@ enum { void dio_end_io(struct bio *bio, int error); ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, - struct block_device *bdev, const struct iovec *iov, loff_t offset, - unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, + struct block_device *bdev, struct iov_iter *iter, loff_t offset, + get_block_t get_block, dio_iodone_t end_io, dio_submit_t submit_io, int flags); static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, - struct inode *inode, const struct iovec *iov, loff_t offset, - unsigned long nr_segs, get_block_t get_block) + struct inode *inode, struct iov_iter *iter, loff_t offset, + get_block_t get_block) { - return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, - offset, nr_segs, get_block, NULL, NULL, + return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iter, + offset, get_block, NULL, NULL, DIO_LOCKING | DIO_SKIP_HOLES); } #endif -- cgit v1.2.3-71-gd317 From 886a39115005ced8b15ab067c9c2a8d546b40a5e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 5 Mar 2014 13:50:45 -0500 Subject: new primitive: iov_iter_alignment() returns the value aligned as badly as the worst remaining segment in iov_iter is. Use instead of open-coded equivalents. Signed-off-by: Al Viro --- drivers/staging/lustre/lustre/llite/rw26.c | 7 ++----- fs/direct-io.c | 27 +++++---------------------- include/linux/uio.h | 2 ++ mm/iov_iter.c | 25 +++++++++++++++++++++++++ 4 files changed, 34 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c index 38a5b580e7f0..f718585c9e08 100644 --- a/drivers/staging/lustre/lustre/llite/rw26.c +++ b/drivers/staging/lustre/lustre/llite/rw26.c @@ -391,11 +391,8 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, MAX_DIO_SIZE >> PAGE_CACHE_SHIFT); /* Check that all user buffers are aligned as well */ - for (seg = 0; seg < iter->nr_segs; seg++) { - if (((unsigned long)iter->iov[seg].iov_base & ~CFS_PAGE_MASK) || - (iter->iov[seg].iov_len & ~CFS_PAGE_MASK)) - return -EINVAL; - } + if (iov_iter_alignment(iter) & ~CFS_PAGE_MASK) + return -EINVAL; env = cl_env_get(&refcheck); LASSERT(!IS_ERR(env)); diff --git a/fs/direct-io.c b/fs/direct-io.c index 1c677899b989..adfa1fb33456 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1112,19 +1112,18 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, dio_submit_t submit_io, int flags) { int seg; - size_t size; - unsigned long addr; unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits); unsigned blkbits = i_blkbits; unsigned blocksize_mask = (1 << blkbits) - 1; ssize_t retval = -EINVAL; - loff_t end = offset; + loff_t end = offset + iov_iter_count(iter); struct dio *dio; struct dio_submit sdio = { 0, }; unsigned long user_addr; size_t bytes; struct buffer_head map_bh = { 0, }; struct blk_plug plug; + unsigned long align = offset | iov_iter_alignment(iter); if (rw & WRITE) rw = WRITE_ODIRECT; @@ -1134,32 +1133,16 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, * the early prefetch in the caller enough time. */ - if (offset & blocksize_mask) { + if (align & blocksize_mask) { if (bdev) blkbits = blksize_bits(bdev_logical_block_size(bdev)); blocksize_mask = (1 << blkbits) - 1; - if (offset & blocksize_mask) + if (align & blocksize_mask) goto out; } - /* Check the memory alignment. Blocks cannot straddle pages */ - for (seg = 0; seg < iter->nr_segs; seg++) { - addr = (unsigned long)iter->iov[seg].iov_base; - size = iter->iov[seg].iov_len; - end += size; - if (unlikely((addr & blocksize_mask) || - (size & blocksize_mask))) { - if (bdev) - blkbits = blksize_bits( - bdev_logical_block_size(bdev)); - blocksize_mask = (1 << blkbits) - 1; - if ((addr & blocksize_mask) || (size & blocksize_mask)) - goto out; - } - } - /* watch out for a 0 len io from a tricksy fs */ - if (rw == READ && end == offset) + if (rw == READ && !iov_iter_count(iter)) return 0; dio = kmem_cache_alloc(dio_cache, GFP_KERNEL); diff --git a/include/linux/uio.h b/include/linux/uio.h index abbe83ded630..4ee17413fe1b 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -67,6 +67,7 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); size_t iov_iter_single_seg_count(const struct iov_iter *i); size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); +unsigned long iov_iter_alignment(const struct iov_iter *i); static inline void iov_iter_init(struct iov_iter *i, const struct iovec *iov, unsigned long nr_segs, @@ -88,4 +89,5 @@ static inline size_t iov_iter_count(struct iov_iter *i) int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len); int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len); + #endif diff --git a/mm/iov_iter.c b/mm/iov_iter.c index 22ec1ef068a8..2f762cc21080 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c @@ -195,3 +195,28 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i) return min(i->count, iov->iov_len - i->iov_offset); } EXPORT_SYMBOL(iov_iter_single_seg_count); + +unsigned long iov_iter_alignment(const struct iov_iter *i) +{ + const struct iovec *iov = i->iov; + unsigned long res; + size_t size = i->count; + size_t n; + + if (!size) + return 0; + + res = (unsigned long)iov->iov_base + i->iov_offset; + n = iov->iov_len - i->iov_offset; + if (n >= size) + return res | size; + size -= n; + res |= n; + while (size > (++iov)->iov_len) { + res |= (unsigned long)iov->iov_base | iov->iov_len; + size -= iov->iov_len; + } + res |= (unsigned long)iov->iov_base | size; + return res; +} +EXPORT_SYMBOL(iov_iter_alignment); -- cgit v1.2.3-71-gd317 From ed978a811ec528dbe40243605c3afab55892f722 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 5 Mar 2014 22:53:04 -0500 Subject: new helper: generic_file_read_iter() iov_iter-using variant of generic_file_aio_read(). Some callers converted. Note that it's still not quite there for use as ->read_iter() - we depend on having zero iter->iov_offset in O_DIRECT case. Fortunately, that's true for all converted callers (and for generic_file_aio_read() itself). Signed-off-by: Al Viro --- fs/ceph/file.c | 15 +----------- fs/nfs/file.c | 2 +- include/linux/fs.h | 1 + mm/filemap.c | 67 +++++++++++++++++++++++++++--------------------------- 4 files changed, 37 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index d8f383d59449..910a3022eb27 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -833,24 +833,11 @@ again: /* hmm, this isn't really async... */ ret = ceph_sync_read(iocb, &i, &checkeof); } else { - /* - * We can't modify the content of iov, - * so we only read from beginning. - * - * When we switch generic_file_aio_read() to iov_iter, the - * if () below will be removed -- AV - */ - if (read) { - iocb->ki_pos = pos; - len = iocb->ki_nbytes; - read = 0; - iov_iter_init(&i, iov, nr_segs, len, 0); - } dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", inode, ceph_vinop(inode), pos, (unsigned)len, ceph_cap_string(got)); - ret = generic_file_aio_read(iocb, iov, nr_segs, pos); + ret = generic_file_read_iter(iocb, &i); } dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 3d01b152894e..a352bc6d613f 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -184,7 +184,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); if (!result) { - result = generic_file_aio_read(iocb, iov, nr_segs, pos); + result = generic_file_read_iter(iocb, &to); if (result > 0) nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 946a9484844f..d096ebc7f348 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2404,6 +2404,7 @@ extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr, unsigned long size, pgoff_t pgoff); int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); +extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long); extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, diff --git a/mm/filemap.c b/mm/filemap.c index 866f4ae8223b..a7f79e90209c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1663,55 +1663,34 @@ out: return written ? written : error; } -/** - * generic_file_aio_read - generic filesystem read routine - * @iocb: kernel I/O control block - * @iov: io vector request - * @nr_segs: number of segments in the iovec - * @pos: current file position - * - * This is the "read()" routine for all filesystems - * that can use the page cache directly. - */ ssize_t -generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { - struct file *filp = iocb->ki_filp; + struct file *file = iocb->ki_filp; ssize_t retval = 0; - size_t count; loff_t *ppos = &iocb->ki_pos; - struct iov_iter i; - - count = iov_length(iov, nr_segs); - iov_iter_init(&i, iov, nr_segs, count, 0); + loff_t pos = *ppos; /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ - if (filp->f_flags & O_DIRECT) { + if (file->f_flags & O_DIRECT) { + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + size_t count = iov_iter_count(iter); loff_t size; - struct address_space *mapping; - struct inode *inode; - mapping = filp->f_mapping; - inode = mapping->host; if (!count) goto out; /* skip atime */ size = i_size_read(inode); retval = filemap_write_and_wait_range(mapping, pos, pos + count - 1); if (!retval) { - struct iov_iter data = i; + struct iov_iter data = *iter; retval = mapping->a_ops->direct_IO(READ, iocb, &data, pos); } if (retval > 0) { *ppos = pos + retval; - count -= retval; - /* - * If we did a short DIO read we need to skip the - * section of the iov that we've already read data into. - */ - iov_iter_advance(&i, retval); + iov_iter_advance(iter, retval); } /* @@ -1722,16 +1701,38 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, * and return. Otherwise fallthrough to buffered io for * the rest of the read. */ - if (retval < 0 || !count || *ppos >= size) { - file_accessed(filp); + if (retval < 0 || !iov_iter_count(iter) || *ppos >= size) { + file_accessed(file); goto out; } } - retval = do_generic_file_read(filp, ppos, &i, retval); + retval = do_generic_file_read(file, ppos, iter, retval); out: return retval; } +EXPORT_SYMBOL(generic_file_read_iter); + +/** + * generic_file_aio_read - generic filesystem read routine + * @iocb: kernel I/O control block + * @iov: io vector request + * @nr_segs: number of segments in the iovec + * @pos: current file position + * + * This is the "read()" routine for all filesystems + * that can use the page cache directly. + */ +ssize_t +generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + size_t count = iov_length(iov, nr_segs); + struct iov_iter i; + + iov_iter_init(&i, iov, nr_segs, count, 0); + return generic_file_read_iter(iocb, &i); +} EXPORT_SYMBOL(generic_file_aio_read); #ifdef CONFIG_MMU -- cgit v1.2.3-71-gd317 From 71d8e532b1549a478e6a6a8a44f309d050294d00 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 5 Mar 2014 19:28:09 -0500 Subject: start adding the tag to iov_iter For now, just use the same thing we pass to ->direct_IO() - it's all iovec-based at the moment. Pass it explicitly to iov_iter_init() and account for kvec vs. iovec in there, by the same kludge NFS ->direct_IO() uses. Signed-off-by: Al Viro --- fs/btrfs/file.c | 2 +- fs/ceph/file.c | 8 ++++---- fs/cifs/file.c | 4 ++-- fs/fuse/file.c | 6 +++--- fs/nfs/file.c | 4 ++-- fs/ocfs2/file.c | 2 +- fs/pipe.c | 2 +- fs/splice.c | 2 +- fs/xfs/xfs_file.c | 4 ++-- include/linux/uio.h | 15 +++------------ mm/filemap.c | 4 ++-- mm/iov_iter.c | 15 +++++++++++++++ mm/page_io.c | 2 +- mm/process_vm_access.c | 4 ++-- mm/shmem.c | 2 +- 15 files changed, 41 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a0a94a30d85a..f8cee205618a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1740,7 +1740,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, goto out; } - iov_iter_init(&i, iov, nr_segs, count, 0); + iov_iter_init(&i, WRITE, iov, nr_segs, count); err = file_remove_suid(file); if (err) { diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 910a3022eb27..5b93cadedfbe 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -582,7 +582,7 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov, CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE; - iov_iter_init(&i, iov, nr_segs, count, 0); + iov_iter_init(&i, WRITE, iov, nr_segs, count); while (iov_iter_count(&i) > 0) { void __user *data = i.iov->iov_base + i.iov_offset; @@ -703,7 +703,7 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov, CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ACK; - iov_iter_init(&i, iov, nr_segs, count, 0); + iov_iter_init(&i, WRITE, iov, nr_segs, count); while ((len = iov_iter_count(&i)) > 0) { size_t left; @@ -808,7 +808,7 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov, int checkeof = 0, read = 0; struct iov_iter i; - iov_iter_init(&i, iov, nr_segs, len, 0); + iov_iter_init(&i, READ, iov, nr_segs, len); again: dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", @@ -961,7 +961,7 @@ retry_snap: * are pending vmtruncate. So write and vmtruncate * can not run at the same time */ - iov_iter_init(&from, iov, nr_segs, count, 0); + iov_iter_init(&from, WRITE, iov, nr_segs, count); written = generic_perform_write(file, &from, pos); if (likely(written >= 0)) iocb->ki_pos = pos + written; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index a4ccc39e6c11..15201c21ac88 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2424,7 +2424,7 @@ cifs_iovec_write(struct file *file, const struct iovec *iov, else pid = current->tgid; - iov_iter_init(&it, iov, nr_segs, len, 0); + iov_iter_init(&it, WRITE, iov, nr_segs, len); do { size_t save_len; @@ -2854,7 +2854,7 @@ ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov, if (!len) return 0; - iov_iter_init(&to, iov, nr_segs, len, 0); + iov_iter_init(&to, READ, iov, nr_segs, len); INIT_LIST_HEAD(&rdata_list); cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index fc54d04a41e2..4a5519ca253f 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1217,7 +1217,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (err) goto out; - iov_iter_init(&i, iov, nr_segs, count, 0); + iov_iter_init(&i, WRITE, iov, nr_segs, count); if (count == 0) goto out; @@ -1386,7 +1386,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, struct fuse_req *req; struct iov_iter ii; - iov_iter_init(&ii, iov, nr_segs, count, 0); + iov_iter_init(&ii, write ? WRITE : READ, iov, nr_segs, count); if (io->async) req = fuse_get_req_for_background(fc, fuse_iter_npages(&ii)); @@ -2367,7 +2367,7 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov, if (!bytes) return 0; - iov_iter_init(&ii, iov, nr_segs, bytes, 0); + iov_iter_init(&ii, to_user ? READ : WRITE, iov, nr_segs, bytes); while (iov_iter_count(&ii)) { struct page *page = pages[page_idx++]; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index a352bc6d613f..ead8f44f7973 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -173,7 +173,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, ssize_t result; struct iov_iter to; - iov_iter_init(&to, iov, nr_segs, count, 0); + iov_iter_init(&to, READ, iov, nr_segs, count); if (iocb->ki_filp->f_flags & O_DIRECT) return nfs_file_direct_read(iocb, &to, pos, true); @@ -648,7 +648,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, ssize_t result; size_t count = iov_length(iov, nr_segs); struct iov_iter from; - iov_iter_init(&from, iov, nr_segs, count, 0); + iov_iter_init(&from, WRITE, iov, nr_segs, count); result = nfs_key_timeout_notify(file, inode); if (result) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index d33c4ced0baf..9ce9ed7615c1 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2361,7 +2361,7 @@ relock: if (ret) goto out_dio; - iov_iter_init(&from, iov, nr_segs, count, 0); + iov_iter_init(&from, WRITE, iov, nr_segs, count); if (direct_io) { written = generic_file_direct_write(iocb, &from, *ppos, count, ocount); diff --git a/fs/pipe.c b/fs/pipe.c index 034bffac3f97..cd4ccf07e772 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -287,7 +287,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov, if (unlikely(total_len == 0)) return 0; - iov_iter_init(&iter, iov, nr_segs, total_len, 0); + iov_iter_init(&iter, READ, iov, nr_segs, total_len); do_wakeup = 0; ret = 0; diff --git a/fs/splice.c b/fs/splice.c index 9bc07d2b53cf..f99e420744c7 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1548,7 +1548,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov, if (ret <= 0) return ret; - iov_iter_init(&iter, iov, nr_segs, count, 0); + iov_iter_init(&iter, READ, iov, nr_segs, count); sd.len = 0; sd.total_len = count; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index f0f8084a67be..762bb3e148a6 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -697,7 +697,7 @@ xfs_file_dio_aio_write( } trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); - iov_iter_init(&from, iovp, nr_segs, count, 0); + iov_iter_init(&from, WRITE, iovp, nr_segs, count); ret = generic_file_direct_write(iocb, &from, pos, count, ocount); out: @@ -731,7 +731,7 @@ xfs_file_buffered_aio_write( if (ret) goto out; - iov_iter_init(&from, iovp, nr_segs, count, 0); + iov_iter_init(&from, WRITE, iovp, nr_segs, count); /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; diff --git a/include/linux/uio.h b/include/linux/uio.h index 4ee17413fe1b..b80bbe197d13 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -20,6 +20,7 @@ struct kvec { }; struct iov_iter { + int type; const struct iovec *iov; unsigned long nr_segs; size_t iov_offset; @@ -68,18 +69,8 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i); size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); unsigned long iov_iter_alignment(const struct iov_iter *i); - -static inline void iov_iter_init(struct iov_iter *i, - const struct iovec *iov, unsigned long nr_segs, - size_t count, size_t written) -{ - i->iov = iov; - i->nr_segs = nr_segs; - i->iov_offset = 0; - i->count = count + written; - - iov_iter_advance(i, written); -} +void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov, + unsigned long nr_segs, size_t count); static inline size_t iov_iter_count(struct iov_iter *i) { diff --git a/mm/filemap.c b/mm/filemap.c index a7f79e90209c..3aeaf2df4135 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1730,7 +1730,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, size_t count = iov_length(iov, nr_segs); struct iov_iter i; - iov_iter_init(&i, iov, nr_segs, count, 0); + iov_iter_init(&i, READ, iov, nr_segs, count); return generic_file_read_iter(iocb, &i); } EXPORT_SYMBOL(generic_file_aio_read); @@ -2596,7 +2596,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err) goto out; - iov_iter_init(&from, iov, nr_segs, count, 0); + iov_iter_init(&from, WRITE, iov, nr_segs, count); /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (unlikely(file->f_flags & O_DIRECT)) { diff --git a/mm/iov_iter.c b/mm/iov_iter.c index 2f762cc21080..e2c9a2db4350 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c @@ -220,3 +220,18 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) return res; } EXPORT_SYMBOL(iov_iter_alignment); + +void iov_iter_init(struct iov_iter *i, int direction, + const struct iovec *iov, unsigned long nr_segs, + size_t count) +{ + /* It will get better. Eventually... */ + if (segment_eq(get_fs(), KERNEL_DS)) + direction |= REQ_KERNEL; + i->type = direction; + i->iov = iov; + i->nr_segs = nr_segs; + i->iov_offset = 0; + i->count = count; +} +EXPORT_SYMBOL(iov_iter_init); diff --git a/mm/page_io.c b/mm/page_io.c index 0ed0644c73db..313bfedb75d1 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -268,7 +268,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, init_sync_kiocb(&kiocb, swap_file); kiocb.ki_pos = page_file_offset(page); kiocb.ki_nbytes = PAGE_SIZE; - iov_iter_init(&from, &iov, 1, PAGE_SIZE, 0); + iov_iter_init(&from, KERNEL_WRITE, &iov, 1, PAGE_SIZE); set_page_writeback(page); unlock_page(page); diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index f32b1fbbfe69..5077afcd9e11 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -274,7 +274,7 @@ static ssize_t process_vm_rw(pid_t pid, if (rc <= 0) goto free_iovecs; - iov_iter_init(&iter, iov_l, liovcnt, rc, 0); + iov_iter_init(&iter, vm_write ? WRITE : READ, iov_l, liovcnt, rc); rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV, iovstack_r, &iov_r); @@ -337,7 +337,7 @@ compat_process_vm_rw(compat_pid_t pid, &iov_l); if (rc <= 0) goto free_iovecs; - iov_iter_init(&iter, iov_l, liovcnt, rc, 0); + iov_iter_init(&iter, vm_write ? WRITE : READ, iov_l, liovcnt, rc); rc = compat_rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV, iovstack_r, &iov_r); diff --git a/mm/shmem.c b/mm/shmem.c index 2a93e625adaf..e0b76696c3f9 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1417,7 +1417,7 @@ static ssize_t shmem_file_aio_read(struct kiocb *iocb, loff_t *ppos = &iocb->ki_pos; struct iov_iter iter; - iov_iter_init(&iter, iov, nr_segs, count, 0); + iov_iter_init(&iter, READ, iov, nr_segs, count); /* * Might this read be for a stacking filesystem? Then when reading -- cgit v1.2.3-71-gd317 From 7b2c99d15559e285384c742db52316802e24b0bd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 15 Mar 2014 04:05:57 -0400 Subject: new helper: iov_iter_get_pages() iov_iter_get_pages(iter, pages, maxsize, &start) grabs references pinning the pages of up to maxsize of (contiguous) data from iter. Returns the amount of memory grabbed or -error. In case of success, the requested area begins at offset start in pages[0] and runs through pages[1], etc. Less than requested amount might be returned - either because the contiguous area in the beginning of iterator is smaller than requested, or because the kernel failed to pin that many pages. direct-io.c switched to using iov_iter_get_pages() Signed-off-by: Al Viro --- fs/direct-io.c | 111 ++++++++++++++++++---------------------------------- include/linux/uio.h | 2 + mm/iov_iter.c | 27 +++++++++++++ 3 files changed, 67 insertions(+), 73 deletions(-) (limited to 'include') diff --git a/fs/direct-io.c b/fs/direct-io.c index 387d91989c45..4b410d58faae 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -77,7 +77,6 @@ struct dio_submit { unsigned blocks_available; /* At block_in_file. changes */ int reap_counter; /* rate limit reaping */ sector_t final_block_in_request;/* doesn't change */ - unsigned first_block_in_page; /* doesn't change, Used only once */ int boundary; /* prev block is at a boundary */ get_block_t *get_block; /* block mapping function */ dio_submit_t *submit_io; /* IO submition function */ @@ -98,19 +97,14 @@ struct dio_submit { sector_t cur_page_block; /* Where it starts */ loff_t cur_page_fs_offset; /* Offset in file */ - /* - * Page fetching state. These variables belong to dio_refill_pages(). - */ - int curr_page; /* changes */ - int total_pages; /* doesn't change */ - unsigned long curr_user_address;/* changes */ - + struct iov_iter *iter; /* * Page queue. These variables belong to dio_refill_pages() and * dio_get_page(). */ unsigned head; /* next page to process */ unsigned tail; /* last valid page + 1 */ + size_t from, to; }; /* dio_state communicated between submission path and end_io */ @@ -163,15 +157,10 @@ static inline unsigned dio_pages_present(struct dio_submit *sdio) */ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) { - int ret; - int nr_pages; + ssize_t ret; - nr_pages = min(sdio->total_pages - sdio->curr_page, DIO_PAGES); - ret = get_user_pages_fast( - sdio->curr_user_address, /* Where from? */ - nr_pages, /* How many pages? */ - dio->rw == READ, /* Write to memory? */ - &dio->pages[0]); /* Put results here */ + ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES * PAGE_SIZE, + &sdio->from); if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) { struct page *page = ZERO_PAGE(0); @@ -186,18 +175,19 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) dio->pages[0] = page; sdio->head = 0; sdio->tail = 1; - ret = 0; - goto out; + sdio->from = 0; + sdio->to = PAGE_SIZE; + return 0; } if (ret >= 0) { - sdio->curr_user_address += ret * PAGE_SIZE; - sdio->curr_page += ret; + iov_iter_advance(sdio->iter, ret); + ret += sdio->from; sdio->head = 0; - sdio->tail = ret; - ret = 0; + sdio->tail = (ret + PAGE_SIZE - 1) / PAGE_SIZE; + sdio->to = ((ret - 1) & (PAGE_SIZE - 1)) + 1; + return 0; } -out: return ret; } @@ -208,8 +198,9 @@ out: * L1 cache. */ static inline struct page *dio_get_page(struct dio *dio, - struct dio_submit *sdio) + struct dio_submit *sdio, size_t *from, size_t *to) { + int n; if (dio_pages_present(sdio) == 0) { int ret; @@ -218,7 +209,10 @@ static inline struct page *dio_get_page(struct dio *dio, return ERR_PTR(ret); BUG_ON(dio_pages_present(sdio) == 0); } - return dio->pages[sdio->head++]; + n = sdio->head++; + *from = n ? 0 : sdio->from; + *to = (n == sdio->tail - 1) ? sdio->to : PAGE_SIZE; + return dio->pages[n]; } /** @@ -422,8 +416,8 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) */ static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio) { - while (dio_pages_present(sdio)) - page_cache_release(dio_get_page(dio, sdio)); + while (sdio->head < sdio->tail) + page_cache_release(dio->pages[sdio->head++]); } /* @@ -912,23 +906,18 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio, struct buffer_head *map_bh) { const unsigned blkbits = sdio->blkbits; - const unsigned blocks_per_page = PAGE_SIZE >> blkbits; - struct page *page; - unsigned block_in_page; int ret = 0; - /* The I/O can start at any block offset within the first page */ - block_in_page = sdio->first_block_in_page; - while (sdio->block_in_file < sdio->final_block_in_request) { - page = dio_get_page(dio, sdio); + struct page *page; + size_t from, to; + page = dio_get_page(dio, sdio, &from, &to); if (IS_ERR(page)) { ret = PTR_ERR(page); goto out; } - while (block_in_page < blocks_per_page) { - unsigned offset_in_page = block_in_page << blkbits; + while (from < to) { unsigned this_chunk_bytes; /* # of bytes mapped */ unsigned this_chunk_blocks; /* # of blocks */ unsigned u; @@ -999,10 +988,9 @@ do_holes: page_cache_release(page); goto out; } - zero_user(page, block_in_page << blkbits, - 1 << blkbits); + zero_user(page, from, 1 << blkbits); sdio->block_in_file++; - block_in_page++; + from += 1 << blkbits; dio->result += 1 << blkbits; goto next_block; } @@ -1020,7 +1008,7 @@ do_holes: * can add to this page */ this_chunk_blocks = sdio->blocks_available; - u = (PAGE_SIZE - offset_in_page) >> blkbits; + u = (to - from) >> blkbits; if (this_chunk_blocks > u) this_chunk_blocks = u; u = sdio->final_block_in_request - sdio->block_in_file; @@ -1032,7 +1020,7 @@ do_holes: if (this_chunk_blocks == sdio->blocks_available) sdio->boundary = buffer_boundary(map_bh); ret = submit_page_section(dio, sdio, page, - offset_in_page, + from, this_chunk_bytes, sdio->next_block_for_io, map_bh); @@ -1043,9 +1031,9 @@ do_holes: sdio->next_block_for_io += this_chunk_blocks; sdio->block_in_file += this_chunk_blocks; - block_in_page += this_chunk_blocks; + from += this_chunk_bytes; + dio->result += this_chunk_bytes; sdio->blocks_available -= this_chunk_blocks; - dio->result += this_chunk_blocks << blkbits; next_block: BUG_ON(sdio->block_in_file > sdio->final_block_in_request); if (sdio->block_in_file == sdio->final_block_in_request) @@ -1054,7 +1042,6 @@ next_block: /* Drop the ref which was taken in get_user_pages() */ page_cache_release(page); - block_in_page = 0; } out: return ret; @@ -1122,7 +1109,6 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct dio *dio; struct dio_submit sdio = { 0, }; unsigned long user_addr; - size_t bytes; struct buffer_head map_bh = { 0, }; struct blk_plug plug; unsigned long align = offset | iov_iter_alignment(iter); @@ -1234,6 +1220,10 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, spin_lock_init(&dio->bio_lock); dio->refcount = 1; + sdio.iter = iter; + sdio.final_block_in_request = + (offset + iov_iter_count(iter)) >> blkbits; + /* * In case of non-aligned buffers, we may need 2 more * pages since we need to zero out first and last block. @@ -1250,34 +1240,9 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, blk_start_plug(&plug); - for (seg = 0; seg < iter->nr_segs; seg++) { - user_addr = (unsigned long)iter->iov[seg].iov_base; - sdio.size += bytes = iter->iov[seg].iov_len; - - /* Index into the first page of the first block */ - sdio.first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits; - sdio.final_block_in_request = sdio.block_in_file + - (bytes >> blkbits); - /* Page fetching state */ - sdio.head = 0; - sdio.tail = 0; - sdio.curr_page = 0; - - sdio.total_pages = 0; - if (user_addr & (PAGE_SIZE-1)) { - sdio.total_pages++; - bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1)); - } - sdio.total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE; - sdio.curr_user_address = user_addr; - - retval = do_direct_IO(dio, &sdio, &map_bh); - - if (retval) { - dio_cleanup(dio, &sdio); - break; - } - } /* end iovec loop */ + retval = do_direct_IO(dio, &sdio, &map_bh); + if (retval) + dio_cleanup(dio, &sdio); if (retval == -ENOTBLK) { /* diff --git a/include/linux/uio.h b/include/linux/uio.h index b80bbe197d13..341986116d83 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -71,6 +71,8 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, unsigned long iov_iter_alignment(const struct iov_iter *i); void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov, unsigned long nr_segs, size_t count); +ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, + size_t maxsize, size_t *start); static inline size_t iov_iter_count(struct iov_iter *i) { diff --git a/mm/iov_iter.c b/mm/iov_iter.c index e2c9a2db4350..45204cd5ccd8 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c @@ -235,3 +235,30 @@ void iov_iter_init(struct iov_iter *i, int direction, i->count = count; } EXPORT_SYMBOL(iov_iter_init); + +ssize_t iov_iter_get_pages(struct iov_iter *i, + struct page **pages, size_t maxsize, + size_t *start) +{ + size_t offset = i->iov_offset; + const struct iovec *iov = i->iov; + size_t len; + unsigned long addr; + int n; + int res; + + len = iov->iov_len - offset; + if (len > i->count) + len = i->count; + if (len > maxsize) + len = maxsize; + addr = (unsigned long)iov->iov_base + offset; + len += *start = addr & (PAGE_SIZE - 1); + addr &= ~(PAGE_SIZE - 1); + n = (len + PAGE_SIZE - 1) / PAGE_SIZE; + res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages); + if (unlikely(res < 0)) + return res; + return (res == n ? len : res * PAGE_SIZE) - *start; +} +EXPORT_SYMBOL(iov_iter_get_pages); -- cgit v1.2.3-71-gd317 From f67da30c1d5fc9e341bc8121708874bfd7b31e45 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 19 Mar 2014 01:16:16 -0400 Subject: new helper: iov_iter_npages() counts the pages covered by iov_iter, up to given limit. do_block_direct_io() and fuse_iter_npages() switched to it. Signed-off-by: Al Viro --- fs/direct-io.c | 9 +-------- fs/fuse/file.c | 16 ++-------------- include/linux/uio.h | 1 + mm/iov_iter.c | 27 +++++++++++++++++++++++++++ 4 files changed, 31 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/fs/direct-io.c b/fs/direct-io.c index 4b410d58faae..98040ba388ac 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1100,7 +1100,6 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, get_block_t get_block, dio_iodone_t end_io, dio_submit_t submit_io, int flags) { - int seg; unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits); unsigned blkbits = i_blkbits; unsigned blocksize_mask = (1 << blkbits) - 1; @@ -1108,7 +1107,6 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, loff_t end = offset + iov_iter_count(iter); struct dio *dio; struct dio_submit sdio = { 0, }; - unsigned long user_addr; struct buffer_head map_bh = { 0, }; struct blk_plug plug; unsigned long align = offset | iov_iter_alignment(iter); @@ -1231,12 +1229,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, if (unlikely(sdio.blkfactor)) sdio.pages_in_io = 2; - for (seg = 0; seg < iter->nr_segs; seg++) { - user_addr = (unsigned long)iter->iov[seg].iov_base; - sdio.pages_in_io += - ((user_addr + iter->iov[seg].iov_len + PAGE_SIZE-1) / - PAGE_SIZE - user_addr / PAGE_SIZE); - } + sdio.pages_in_io += iov_iter_npages(iter, INT_MAX); blk_start_plug(&plug); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 7db564d18dc6..7026014717bc 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1310,7 +1310,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, while (nbytes < *nbytesp && req->num_pages < req->max_pages) { unsigned npages; - size_t start, end, frag_size; + size_t start; unsigned n = req->max_pages - req->num_pages; ssize_t ret = iov_iter_get_pages(ii, &req->pages[req->num_pages], @@ -1344,19 +1344,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, static inline int fuse_iter_npages(const struct iov_iter *ii_p) { - struct iov_iter ii = *ii_p; - int npages = 0; - - while (iov_iter_count(&ii) && npages < FUSE_MAX_PAGES_PER_REQ) { - unsigned long user_addr = fuse_get_user_addr(&ii); - unsigned offset = user_addr & ~PAGE_MASK; - size_t frag_size = iov_iter_single_seg_count(&ii); - - npages += (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; - iov_iter_advance(&ii, frag_size); - } - - return min(npages, FUSE_MAX_PAGES_PER_REQ); + return iov_iter_npages(ii_p, FUSE_MAX_PAGES_PER_REQ); } ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, diff --git a/include/linux/uio.h b/include/linux/uio.h index 341986116d83..2f8825b06680 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -73,6 +73,7 @@ void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov, unsigned long nr_segs, size_t count); ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, size_t maxsize, size_t *start); +int iov_iter_npages(const struct iov_iter *i, int maxpages); static inline size_t iov_iter_count(struct iov_iter *i) { diff --git a/mm/iov_iter.c b/mm/iov_iter.c index 45204cd5ccd8..0b677f8f9bad 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c @@ -262,3 +262,30 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, return (res == n ? len : res * PAGE_SIZE) - *start; } EXPORT_SYMBOL(iov_iter_get_pages); + +int iov_iter_npages(const struct iov_iter *i, int maxpages) +{ + size_t offset = i->iov_offset; + size_t size = i->count; + const struct iovec *iov = i->iov; + int npages = 0; + int n; + + for (n = 0; size && n < i->nr_segs; n++, iov++) { + unsigned long addr = (unsigned long)iov->iov_base + offset; + size_t len = iov->iov_len - offset; + offset = 0; + if (unlikely(!len)) /* empty segment */ + continue; + if (len > size) + len = size; + npages += (addr + len + PAGE_SIZE - 1) / PAGE_SIZE + - addr / PAGE_SIZE; + if (npages >= maxpages) /* don't bother going further */ + return maxpages; + size -= len; + offset = 0; + } + return min(npages, maxpages); +} +EXPORT_SYMBOL(iov_iter_npages); -- cgit v1.2.3-71-gd317 From 91f79c43d1b54d7154b118860d81b39bad07dfff Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 21 Mar 2014 04:58:33 -0400 Subject: new helper: iov_iter_get_pages_alloc() same as iov_iter_get_pages(), except that pages array is allocated (kmalloc if possible, vmalloc if that fails) and left for caller to free. Lustre and NFS ->direct_IO() switched to it. Signed-off-by: Al Viro --- drivers/staging/lustre/lustre/llite/rw26.c | 92 ++++----- fs/nfs/direct.c | 290 +++++++++-------------------- include/linux/uio.h | 2 + mm/iov_iter.c | 40 ++++ 4 files changed, 167 insertions(+), 257 deletions(-) (limited to 'include') diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c index f718585c9e08..6b5994577b6b 100644 --- a/drivers/staging/lustre/lustre/llite/rw26.c +++ b/drivers/staging/lustre/lustre/llite/rw26.c @@ -218,14 +218,11 @@ static void ll_free_user_pages(struct page **pages, int npages, int do_dirty) int i; for (i = 0; i < npages; i++) { - if (pages[i] == NULL) - break; if (do_dirty) set_page_dirty_lock(pages[i]); page_cache_release(pages[i]); } - - OBD_FREE_LARGE(pages, npages * sizeof(*pages)); + kvfree(pages); } ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, @@ -370,10 +367,9 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct ccc_object *obj = cl_inode2ccc(inode); - long count = iov_iter_count(iter); - long tot_bytes = 0, result = 0; + ssize_t count = iov_iter_count(iter); + ssize_t tot_bytes = 0, result = 0; struct ll_inode_info *lli = ll_i2info(inode); - unsigned long seg = 0; long size = MAX_DIO_SIZE; int refcheck; @@ -407,63 +403,49 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, mutex_lock(&inode->i_mutex); LASSERT(obj->cob_transient_pages == 0); - for (seg = 0; seg < iter->nr_segs; seg++) { - long iov_left = iter->iov[seg].iov_len; - unsigned long user_addr = (unsigned long)iter->iov[seg].iov_base; + while (iov_iter_count(iter)) { + struct page **pages; + size_t offs; + count = min_t(size_t, iov_iter_count(iter), size); if (rw == READ) { if (file_offset >= i_size_read(inode)) break; - if (file_offset + iov_left > i_size_read(inode)) - iov_left = i_size_read(inode) - file_offset; + if (file_offset + count > i_size_read(inode)) + count = i_size_read(inode) - file_offset; } - while (iov_left > 0) { - struct page **pages; - int page_count, max_pages = 0; - long bytes; - - bytes = min(size, iov_left); - page_count = ll_get_user_pages(rw, user_addr, bytes, - &pages, &max_pages); - if (likely(page_count > 0)) { - if (unlikely(page_count < max_pages)) - bytes = page_count << PAGE_CACHE_SHIFT; - result = ll_direct_IO_26_seg(env, io, rw, inode, - file->f_mapping, - bytes, file_offset, - pages, page_count); - ll_free_user_pages(pages, max_pages, rw==READ); - } else if (page_count == 0) { - GOTO(out, result = -EFAULT); - } else { - result = page_count; - } - if (unlikely(result <= 0)) { - /* If we can't allocate a large enough buffer - * for the request, shrink it to a smaller - * PAGE_SIZE multiple and try again. - * We should always be able to kmalloc for a - * page worth of page pointers = 4MB on i386. */ - if (result == -ENOMEM && - size > (PAGE_CACHE_SIZE / sizeof(*pages)) * - PAGE_CACHE_SIZE) { - size = ((((size / 2) - 1) | - ~CFS_PAGE_MASK) + 1) & - CFS_PAGE_MASK; - CDEBUG(D_VFSTRACE,"DIO size now %lu\n", - size); - continue; - } - - GOTO(out, result); + result = iov_iter_get_pages_alloc(iter, &pages, count, &offs); + if (likely(result > 0)) { + int n = (result + offs + PAGE_SIZE - 1) / PAGE_SIZE; + result = ll_direct_IO_26_seg(env, io, rw, inode, + file->f_mapping, + result, file_offset, + pages, n); + ll_free_user_pages(pages, n, rw==READ); + } + if (unlikely(result <= 0)) { + /* If we can't allocate a large enough buffer + * for the request, shrink it to a smaller + * PAGE_SIZE multiple and try again. + * We should always be able to kmalloc for a + * page worth of page pointers = 4MB on i386. */ + if (result == -ENOMEM && + size > (PAGE_CACHE_SIZE / sizeof(*pages)) * + PAGE_CACHE_SIZE) { + size = ((((size / 2) - 1) | + ~CFS_PAGE_MASK) + 1) & + CFS_PAGE_MASK; + CDEBUG(D_VFSTRACE,"DIO size now %lu\n", + size); + continue; } - tot_bytes += result; - file_offset += result; - iov_left -= result; - user_addr += result; + GOTO(out, result); } + iov_iter_advance(iter, result); + tot_bytes += result; + file_offset += result; } out: LASSERT(obj->cob_transient_pages == 0); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 1d34f454989e..b122fe21fea0 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -322,60 +322,37 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = { * handled automatically by nfs_direct_read_result(). Otherwise, if * no requests have been sent, just return an error. */ -static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc, - const struct iovec *iov, - loff_t pos, bool uio) -{ - struct nfs_direct_req *dreq = desc->pg_dreq; - struct nfs_open_context *ctx = dreq->ctx; - struct inode *inode = ctx->dentry->d_inode; - unsigned long user_addr = (unsigned long)iov->iov_base; - size_t count = iov->iov_len; - size_t rsize = NFS_SERVER(inode)->rsize; - unsigned int pgbase; - int result; - ssize_t started = 0; - struct page **pagevec = NULL; - unsigned int npages; - - do { - size_t bytes; - int i; - pgbase = user_addr & ~PAGE_MASK; - bytes = min(max_t(size_t, rsize, PAGE_SIZE), count); +static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, + struct iov_iter *iter, + loff_t pos) +{ + struct nfs_pageio_descriptor desc; + struct inode *inode = dreq->inode; + ssize_t result = -EINVAL; + size_t requested_bytes = 0; + size_t rsize = max_t(size_t, NFS_SERVER(inode)->rsize, PAGE_SIZE); - result = -ENOMEM; - npages = nfs_page_array_len(pgbase, bytes); - if (!pagevec) - pagevec = kmalloc(npages * sizeof(struct page *), - GFP_KERNEL); - if (!pagevec) - break; - if (uio) { - down_read(¤t->mm->mmap_sem); - result = get_user_pages(current, current->mm, user_addr, - npages, 1, 0, pagevec, NULL); - up_read(¤t->mm->mmap_sem); - if (result < 0) - break; - } else { - WARN_ON(npages != 1); - result = get_kernel_page(user_addr, 1, pagevec); - if (WARN_ON(result != 1)) - break; - } + NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode, + &nfs_direct_read_completion_ops); + get_dreq(dreq); + desc.pg_dreq = dreq; + atomic_inc(&inode->i_dio_count); - if ((unsigned)result < npages) { - bytes = result * PAGE_SIZE; - if (bytes <= pgbase) { - nfs_direct_release_pages(pagevec, result); - break; - } - bytes -= pgbase; - npages = result; - } + while (iov_iter_count(iter)) { + struct page **pagevec; + size_t bytes; + size_t pgbase; + unsigned npages, i; + result = iov_iter_get_pages_alloc(iter, &pagevec, + rsize, &pgbase); + if (result < 0) + break; + + bytes = result; + iov_iter_advance(iter, bytes); + npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE; for (i = 0; i < npages; i++) { struct nfs_page *req; unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); @@ -389,55 +366,21 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de } req->wb_index = pos >> PAGE_SHIFT; req->wb_offset = pos & ~PAGE_MASK; - if (!nfs_pageio_add_request(desc, req)) { - result = desc->pg_error; + if (!nfs_pageio_add_request(&desc, req)) { + result = desc.pg_error; nfs_release_request(req); break; } pgbase = 0; bytes -= req_len; - started += req_len; - user_addr += req_len; + requested_bytes += req_len; pos += req_len; - count -= req_len; dreq->bytes_left -= req_len; } - /* The nfs_page now hold references to these pages */ nfs_direct_release_pages(pagevec, npages); - } while (count != 0 && result >= 0); - - kfree(pagevec); - - if (started) - return started; - return result < 0 ? (ssize_t) result : -EFAULT; -} - -static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, - struct iov_iter *iter, - loff_t pos, bool uio) -{ - struct nfs_pageio_descriptor desc; - struct inode *inode = dreq->inode; - ssize_t result = -EINVAL; - size_t requested_bytes = 0; - unsigned long seg; - - NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode, - &nfs_direct_read_completion_ops); - get_dreq(dreq); - desc.pg_dreq = dreq; - atomic_inc(&inode->i_dio_count); - - for (seg = 0; seg < iter->nr_segs; seg++) { - const struct iovec *vec = &iter->iov[seg]; - result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio); + kvfree(pagevec); if (result < 0) break; - requested_bytes += result; - if ((size_t)result < vec->iov_len) - break; - pos += vec->iov_len; } nfs_pageio_complete(&desc); @@ -521,7 +464,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, dreq->iocb = iocb; NFS_I(inode)->read_io += count; - result = nfs_direct_read_schedule_iovec(dreq, iter, pos, uio); + result = nfs_direct_read_schedule_iovec(dreq, iter, pos); mutex_unlock(&inode->i_mutex); @@ -677,109 +620,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode } #endif -/* - * NB: Return the value of the first error return code. Subsequent - * errors after the first one are ignored. - */ -/* - * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE - * operation. If nfs_writedata_alloc() or get_user_pages() fails, - * bail and stop sending more writes. Write length accounting is - * handled automatically by nfs_direct_write_result(). Otherwise, if - * no requests have been sent, just return an error. - */ -static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc, - const struct iovec *iov, - loff_t pos, bool uio) -{ - struct nfs_direct_req *dreq = desc->pg_dreq; - struct nfs_open_context *ctx = dreq->ctx; - struct inode *inode = ctx->dentry->d_inode; - unsigned long user_addr = (unsigned long)iov->iov_base; - size_t count = iov->iov_len; - size_t wsize = NFS_SERVER(inode)->wsize; - unsigned int pgbase; - int result; - ssize_t started = 0; - struct page **pagevec = NULL; - unsigned int npages; - - do { - size_t bytes; - int i; - - pgbase = user_addr & ~PAGE_MASK; - bytes = min(max_t(size_t, wsize, PAGE_SIZE), count); - - result = -ENOMEM; - npages = nfs_page_array_len(pgbase, bytes); - if (!pagevec) - pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL); - if (!pagevec) - break; - - if (uio) { - down_read(¤t->mm->mmap_sem); - result = get_user_pages(current, current->mm, user_addr, - npages, 0, 0, pagevec, NULL); - up_read(¤t->mm->mmap_sem); - if (result < 0) - break; - } else { - WARN_ON(npages != 1); - result = get_kernel_page(user_addr, 0, pagevec); - if (WARN_ON(result != 1)) - break; - } - - if ((unsigned)result < npages) { - bytes = result * PAGE_SIZE; - if (bytes <= pgbase) { - nfs_direct_release_pages(pagevec, result); - break; - } - bytes -= pgbase; - npages = result; - } - - for (i = 0; i < npages; i++) { - struct nfs_page *req; - unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); - - req = nfs_create_request(dreq->ctx, dreq->inode, - pagevec[i], - pgbase, req_len); - if (IS_ERR(req)) { - result = PTR_ERR(req); - break; - } - nfs_lock_request(req); - req->wb_index = pos >> PAGE_SHIFT; - req->wb_offset = pos & ~PAGE_MASK; - if (!nfs_pageio_add_request(desc, req)) { - result = desc->pg_error; - nfs_unlock_and_release_request(req); - break; - } - pgbase = 0; - bytes -= req_len; - started += req_len; - user_addr += req_len; - pos += req_len; - count -= req_len; - dreq->bytes_left -= req_len; - } - /* The nfs_page now hold references to these pages */ - nfs_direct_release_pages(pagevec, npages); - } while (count != 0 && result >= 0); - - kfree(pagevec); - - if (started) - return started; - return result < 0 ? (ssize_t) result : -EFAULT; -} - static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) { struct nfs_direct_req *dreq = hdr->dreq; @@ -859,15 +699,27 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { .completion = nfs_direct_write_completion, }; + +/* + * NB: Return the value of the first error return code. Subsequent + * errors after the first one are ignored. + */ +/* + * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE + * operation. If nfs_writedata_alloc() or get_user_pages() fails, + * bail and stop sending more writes. Write length accounting is + * handled automatically by nfs_direct_write_result(). Otherwise, if + * no requests have been sent, just return an error. + */ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, struct iov_iter *iter, - loff_t pos, bool uio) + loff_t pos) { struct nfs_pageio_descriptor desc; struct inode *inode = dreq->inode; ssize_t result = 0; size_t requested_bytes = 0; - unsigned long seg; + size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE, &nfs_direct_write_completion_ops); @@ -875,16 +727,50 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, get_dreq(dreq); atomic_inc(&inode->i_dio_count); - NFS_I(dreq->inode)->write_io += iov_iter_count(iter); - for (seg = 0; seg < iter->nr_segs; seg++) { - const struct iovec *vec = &iter->iov[seg]; - result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio); + NFS_I(inode)->write_io += iov_iter_count(iter); + while (iov_iter_count(iter)) { + struct page **pagevec; + size_t bytes; + size_t pgbase; + unsigned npages, i; + + result = iov_iter_get_pages_alloc(iter, &pagevec, + wsize, &pgbase); if (result < 0) break; - requested_bytes += result; - if ((size_t)result < vec->iov_len) + + bytes = result; + iov_iter_advance(iter, bytes); + npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE; + for (i = 0; i < npages; i++) { + struct nfs_page *req; + unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); + + req = nfs_create_request(dreq->ctx, inode, + pagevec[i], + pgbase, req_len); + if (IS_ERR(req)) { + result = PTR_ERR(req); + break; + } + nfs_lock_request(req); + req->wb_index = pos >> PAGE_SHIFT; + req->wb_offset = pos & ~PAGE_MASK; + if (!nfs_pageio_add_request(&desc, req)) { + result = desc.pg_error; + nfs_unlock_and_release_request(req); + break; + } + pgbase = 0; + bytes -= req_len; + requested_bytes += req_len; + pos += req_len; + dreq->bytes_left -= req_len; + } + nfs_direct_release_pages(pagevec, npages); + kvfree(pagevec); + if (result < 0) break; - pos += vec->iov_len; } nfs_pageio_complete(&desc); @@ -985,7 +871,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - result = nfs_direct_write_schedule_iovec(dreq, iter, pos, uio); + result = nfs_direct_write_schedule_iovec(dreq, iter, pos); if (mapping->nrpages) { invalidate_inode_pages2_range(mapping, diff --git a/include/linux/uio.h b/include/linux/uio.h index 2f8825b06680..4876e9f2a58f 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -73,6 +73,8 @@ void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov, unsigned long nr_segs, size_t count); ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, size_t maxsize, size_t *start); +ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, + size_t maxsize, size_t *start); int iov_iter_npages(const struct iov_iter *i, int maxpages); static inline size_t iov_iter_count(struct iov_iter *i) diff --git a/mm/iov_iter.c b/mm/iov_iter.c index 0b677f8f9bad..a5c691c1a283 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c @@ -1,6 +1,8 @@ #include #include #include +#include +#include size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) @@ -263,6 +265,44 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, } EXPORT_SYMBOL(iov_iter_get_pages); +ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, + struct page ***pages, size_t maxsize, + size_t *start) +{ + size_t offset = i->iov_offset; + const struct iovec *iov = i->iov; + size_t len; + unsigned long addr; + void *p; + int n; + int res; + + len = iov->iov_len - offset; + if (len > i->count) + len = i->count; + if (len > maxsize) + len = maxsize; + addr = (unsigned long)iov->iov_base + offset; + len += *start = addr & (PAGE_SIZE - 1); + addr &= ~(PAGE_SIZE - 1); + n = (len + PAGE_SIZE - 1) / PAGE_SIZE; + + p = kmalloc(n * sizeof(struct page *), GFP_KERNEL); + if (!p) + p = vmalloc(n * sizeof(struct page *)); + if (!p) + return -ENOMEM; + + res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p); + if (unlikely(res < 0)) { + kvfree(p); + return res; + } + *pages = p; + return (res == n ? len : res * PAGE_SIZE) - *start; +} +EXPORT_SYMBOL(iov_iter_get_pages_alloc); + int iov_iter_npages(const struct iov_iter *i, int maxpages) { size_t offset = i->iov_offset; -- cgit v1.2.3-71-gd317 From 0c949334a9e2581646c6ff0d1470a805b1e5be99 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Mar 2014 06:51:37 -0400 Subject: iov_iter_truncate() Now It Can Be Done(tm) - we don't need to do iov_shorten() in generic_file_direct_write() anymore, now that all ->direct_IO() instances are converted to proper iov_iter methods and honour iter->count and iter->iov_offset properly. Get rid of count/ocount arguments of generic_file_direct_write(), while we are at it. Signed-off-by: Al Viro --- fs/btrfs/file.c | 17 ++++++++--------- fs/fuse/file.c | 18 ++++++++---------- fs/ocfs2/file.c | 10 +++++----- fs/xfs/xfs_file.c | 9 +++++---- include/linux/fs.h | 3 +-- include/linux/uio.h | 6 ++++++ mm/filemap.c | 19 +++++++------------ 7 files changed, 40 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f8cee205618a..ea63a51c148c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1659,8 +1659,7 @@ again: static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from, - loff_t pos, - size_t count, size_t ocount) + loff_t pos) { struct file *file = iocb->ki_filp; ssize_t written; @@ -1668,9 +1667,9 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, loff_t endbyte; int err; - written = generic_file_direct_write(iocb, from, pos, count, ocount); + written = generic_file_direct_write(iocb, from, pos); - if (written < 0 || written == count) + if (written < 0 || !iov_iter_count(from)) return written; pos += written; @@ -1720,13 +1719,14 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, u64 end_pos; ssize_t num_written = 0; ssize_t err = 0; - size_t count, ocount; + size_t count; bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); struct iov_iter i; mutex_lock(&inode->i_mutex); - count = ocount = iov_length(iov, nr_segs); + count = iov_length(iov, nr_segs); + iov_iter_init(&i, WRITE, iov, nr_segs, count); current->backing_dev_info = inode->i_mapping->backing_dev_info; err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); @@ -1740,7 +1740,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, goto out; } - iov_iter_init(&i, WRITE, iov, nr_segs, count); + iov_iter_truncate(&i, count); err = file_remove_suid(file); if (err) { @@ -1783,8 +1783,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, atomic_inc(&BTRFS_I(inode)->sync_writers); if (unlikely(file->f_flags & O_DIRECT)) { - num_written = __btrfs_direct_write(iocb, &i, - pos, count, ocount); + num_written = __btrfs_direct_write(iocb, &i, pos); } else { num_written = __btrfs_buffered_write(file, &i, pos); if (num_written > 0) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 7026014717bc..66d2d5de19d2 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1188,8 +1188,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; - size_t count = 0; - size_t ocount = 0; + size_t count; ssize_t written = 0; ssize_t written_buffered = 0; struct inode *inode = mapping->host; @@ -1208,7 +1207,8 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, WARN_ON(iocb->ki_pos != pos); - count = ocount = iov_length(iov, nr_segs); + count = iov_length(iov, nr_segs); + iov_iter_init(&i, WRITE, iov, nr_segs, count); mutex_lock(&inode->i_mutex); /* We can write back this queue in page reclaim */ @@ -1217,11 +1217,11 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (err) goto out; - iov_iter_init(&i, WRITE, iov, nr_segs, count); if (count == 0) goto out; + iov_iter_truncate(&i, count); err = file_remove_suid(file); if (err) goto out; @@ -1231,8 +1231,8 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, goto out; if (file->f_flags & O_DIRECT) { - written = generic_file_direct_write(iocb, &i, pos, count, ocount); - if (written < 0 || written == count) + written = generic_file_direct_write(iocb, &i, pos); + if (written < 0 || !iov_iter_count(&i)) goto out; pos += written; @@ -1469,8 +1469,7 @@ static ssize_t __fuse_direct_write(struct fuse_io_priv *io, res = generic_write_checks(file, ppos, &count, 0); if (!res) { - if (iter->count > count) - iter->count = count; + iov_iter_truncate(iter, count); res = fuse_direct_io(io, iter, ppos, FUSE_DIO_WRITE); } @@ -2896,8 +2895,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, if (offset >= i_size) return 0; count = min_t(loff_t, count, fuse_round_up(i_size - offset)); - if (iter->count > count) - iter->count = count; + iov_iter_truncate(iter, count); } io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 9ce9ed7615c1..06b6a16d9776 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2241,7 +2241,6 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, int ret, direct_io, appending, rw_level, have_alloc_sem = 0; int can_do_direct, has_refcount = 0; ssize_t written = 0; - size_t ocount; /* original count */ size_t count; /* after file limit checks */ loff_t old_size, *ppos = &iocb->ki_pos; u32 old_clusters; @@ -2253,6 +2252,9 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, int unaligned_dio = 0; struct iov_iter from; + count = iov_length(iov, nr_segs); + iov_iter_init(&from, WRITE, iov, nr_segs, count); + trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry, (unsigned long long)OCFS2_I(inode)->ip_blkno, file->f_path.dentry->d_name.len, @@ -2355,16 +2357,14 @@ relock: /* communicate with ocfs2_dio_end_io */ ocfs2_iocb_set_rw_locked(iocb, rw_level); - count = ocount = iov_length(iov, nr_segs); ret = generic_write_checks(file, ppos, &count, S_ISBLK(inode->i_mode)); if (ret) goto out_dio; - iov_iter_init(&from, WRITE, iov, nr_segs, count); + iov_iter_truncate(&from, count); if (direct_io) { - written = generic_file_direct_write(iocb, &from, *ppos, - count, ocount); + written = generic_file_direct_write(iocb, &from, *ppos); if (written < 0) { ret = written; goto out_dio; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 762bb3e148a6..c997aa2751b2 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -626,7 +626,7 @@ xfs_file_dio_aio_write( const struct iovec *iovp, unsigned long nr_segs, loff_t pos, - size_t ocount) + size_t count) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -634,7 +634,6 @@ xfs_file_dio_aio_write( struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; ssize_t ret = 0; - size_t count = ocount; int unaligned_io = 0; int iolock; struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? @@ -645,6 +644,8 @@ xfs_file_dio_aio_write( if ((pos | count) & target->bt_logical_sectormask) return -XFS_ERROR(EINVAL); + iov_iter_init(&from, WRITE, iovp, nr_segs, count); + /* "unaligned" here means not aligned to a filesystem block */ if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) unaligned_io = 1; @@ -676,6 +677,7 @@ xfs_file_dio_aio_write( ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock); if (ret) goto out; + iov_iter_truncate(&from, count); if (mapping->nrpages) { ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, @@ -697,8 +699,7 @@ xfs_file_dio_aio_write( } trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); - iov_iter_init(&from, WRITE, iovp, nr_segs, count); - ret = generic_file_direct_write(iocb, &from, pos, count, ocount); + ret = generic_file_direct_write(iocb, &from, pos); out: xfs_rw_iunlock(ip, iolock); diff --git a/include/linux/fs.h b/include/linux/fs.h index d096ebc7f348..8153396d19b4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2407,8 +2407,7 @@ extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsig extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long); extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); -extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, - loff_t, size_t, size_t); +extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, loff_t); extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); diff --git a/include/linux/uio.h b/include/linux/uio.h index 4876e9f2a58f..532f59d0adbb 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -82,6 +82,12 @@ static inline size_t iov_iter_count(struct iov_iter *i) return i->count; } +static inline void iov_iter_truncate(struct iov_iter *i, size_t count) +{ + if (i->count > count) + i->count = count; +} + int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len); int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len); diff --git a/mm/filemap.c b/mm/filemap.c index 3aeaf2df4135..c0404b763a17 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2345,8 +2345,7 @@ int pagecache_write_end(struct file *file, struct address_space *mapping, EXPORT_SYMBOL(pagecache_write_end); ssize_t -generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, - loff_t pos, size_t count, size_t ocount) +generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -2356,10 +2355,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, pgoff_t end; struct iov_iter data; - if (count != ocount) - from->nr_segs = iov_shorten((struct iovec *)from->iov, from->nr_segs, count); - - write_len = iov_length(from->iov, from->nr_segs); + write_len = iov_iter_count(from); end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT; written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1); @@ -2568,7 +2564,6 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, { struct file *file = iocb->ki_filp; struct address_space * mapping = file->f_mapping; - size_t ocount; /* original count */ size_t count; /* after file limit checks */ struct inode *inode = mapping->host; loff_t pos = iocb->ki_pos; @@ -2577,7 +2572,8 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, ssize_t status; struct iov_iter from; - count = ocount = iov_length(iov, nr_segs); + count = iov_length(iov, nr_segs); + iov_iter_init(&from, WRITE, iov, nr_segs, count); /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; @@ -2588,6 +2584,8 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (count == 0) goto out; + iov_iter_truncate(&from, count); + err = file_remove_suid(file); if (err) goto out; @@ -2596,14 +2594,11 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err) goto out; - iov_iter_init(&from, WRITE, iov, nr_segs, count); - /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (unlikely(file->f_flags & O_DIRECT)) { loff_t endbyte; - written = generic_file_direct_write(iocb, &from, pos, - count, ocount); + written = generic_file_direct_write(iocb, &from, pos); if (written < 0 || written == count) goto out; -- cgit v1.2.3-71-gd317 From 7f7f25e82d54870df24d415a7007fbd327da027b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 11 Feb 2014 17:49:24 -0500 Subject: replace checking for ->read/->aio_read presence with check in ->f_mode Since we are about to introduce new methods (read_iter/write_iter), the tests in a bunch of places would have to grow inconveniently. Check once (at open() time) and store results in ->f_mode as FMODE_CAN_READ and FMODE_CAN_WRITE resp. It might end up being a temporary measure - once everything switches from ->aio_{read,write} to ->{read,write}_iter it might make sense to return to open-coded checks. We'll see... Signed-off-by: Al Viro --- drivers/mtd/nand/nandsim.c | 4 ++-- drivers/usb/gadget/storage_common.c | 4 ++-- fs/file_table.c | 4 ++++ fs/open.c | 4 ++++ fs/read_write.c | 14 +++++++------- include/linux/fs.h | 4 ++++ 6 files changed, 23 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c index 42e8a770e631..4f0d83648e5a 100644 --- a/drivers/mtd/nand/nandsim.c +++ b/drivers/mtd/nand/nandsim.c @@ -575,12 +575,12 @@ static int alloc_device(struct nandsim *ns) cfile = filp_open(cache_file, O_CREAT | O_RDWR | O_LARGEFILE, 0600); if (IS_ERR(cfile)) return PTR_ERR(cfile); - if (!cfile->f_op->read && !cfile->f_op->aio_read) { + if (!(cfile->f_mode & FMODE_CAN_READ)) { NS_ERR("alloc_device: cache file not readable\n"); err = -EINVAL; goto err_close; } - if (!cfile->f_op->write && !cfile->f_op->aio_write) { + if (!(cfile->f_mode & FMODE_CAN_WRITE)) { NS_ERR("alloc_device: cache file not writeable\n"); err = -EINVAL; goto err_close; diff --git a/drivers/usb/gadget/storage_common.c b/drivers/usb/gadget/storage_common.c index ec20a1f50c2d..a8898df131ed 100644 --- a/drivers/usb/gadget/storage_common.c +++ b/drivers/usb/gadget/storage_common.c @@ -220,11 +220,11 @@ int fsg_lun_open(struct fsg_lun *curlun, const char *filename) * If we can't read the file, it's no good. * If we can't write the file, use it read-only. */ - if (!(filp->f_op->read || filp->f_op->aio_read)) { + if (!(filp->f_mode & FMODE_CAN_READ)) { LINFO(curlun, "file not readable: %s\n", filename); goto out; } - if (!(filp->f_op->write || filp->f_op->aio_write)) + if (!(filp->f_mode & FMODE_CAN_WRITE)) ro = 1; size = i_size_read(inode->i_mapping->host); diff --git a/fs/file_table.c b/fs/file_table.c index a374f5033e97..be73cbc48c12 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -175,6 +175,10 @@ struct file *alloc_file(struct path *path, fmode_t mode, file->f_path = *path; file->f_inode = path->dentry->d_inode; file->f_mapping = path->dentry->d_inode->i_mapping; + if ((mode & FMODE_READ) && likely(fop->read || fop->aio_read)) + mode |= FMODE_CAN_READ; + if ((mode & FMODE_WRITE) && likely(fop->write || fop->aio_write)) + mode |= FMODE_CAN_WRITE; file->f_mode = mode; file->f_op = fop; if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) diff --git a/fs/open.c b/fs/open.c index 9d64679cec73..39d3d0468ee6 100644 --- a/fs/open.c +++ b/fs/open.c @@ -725,6 +725,10 @@ static int do_dentry_open(struct file *f, } if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_inc(inode); + if ((f->f_mode & FMODE_READ) && likely(f->f_op->read || f->f_op->aio_read)) + f->f_mode |= FMODE_CAN_READ; + if ((f->f_mode & FMODE_WRITE) && likely(f->f_op->write || f->f_op->aio_write)) + f->f_mode |= FMODE_CAN_WRITE; f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); diff --git a/fs/read_write.c b/fs/read_write.c index 31c6efa43183..d29d2a361d2c 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -396,7 +396,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) if (!(file->f_mode & FMODE_READ)) return -EBADF; - if (!file->f_op->read && !file->f_op->aio_read) + if (!(file->f_mode & FMODE_CAN_READ)) return -EINVAL; if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) return -EFAULT; @@ -445,7 +445,7 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t const char __user *p; ssize_t ret; - if (!file->f_op->write && !file->f_op->aio_write) + if (!(file->f_mode & FMODE_CAN_WRITE)) return -EINVAL; old_fs = get_fs(); @@ -472,7 +472,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ if (!(file->f_mode & FMODE_WRITE)) return -EBADF; - if (!file->f_op->write && !file->f_op->aio_write) + if (!(file->f_mode & FMODE_CAN_WRITE)) return -EINVAL; if (unlikely(!access_ok(VERIFY_READ, buf, count))) return -EFAULT; @@ -785,7 +785,7 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, { if (!(file->f_mode & FMODE_READ)) return -EBADF; - if (!file->f_op->aio_read && !file->f_op->read) + if (!(file->f_mode & FMODE_CAN_READ)) return -EINVAL; return do_readv_writev(READ, file, vec, vlen, pos); @@ -798,7 +798,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, { if (!(file->f_mode & FMODE_WRITE)) return -EBADF; - if (!file->f_op->aio_write && !file->f_op->write) + if (!(file->f_mode & FMODE_CAN_WRITE)) return -EINVAL; return do_readv_writev(WRITE, file, vec, vlen, pos); @@ -964,7 +964,7 @@ static size_t compat_readv(struct file *file, goto out; ret = -EINVAL; - if (!file->f_op->aio_read && !file->f_op->read) + if (!(file->f_mode & FMODE_CAN_READ)) goto out; ret = compat_do_readv_writev(READ, file, vec, vlen, pos); @@ -1041,7 +1041,7 @@ static size_t compat_writev(struct file *file, goto out; ret = -EINVAL; - if (!file->f_op->aio_write && !file->f_op->write) + if (!(file->f_mode & FMODE_CAN_WRITE)) goto out; ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos); diff --git a/include/linux/fs.h b/include/linux/fs.h index 8153396d19b4..75eb71357adf 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -128,6 +128,10 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_ATOMIC_POS ((__force fmode_t)0x8000) /* Write access to underlying fs */ #define FMODE_WRITER ((__force fmode_t)0x10000) +/* Has read method(s) */ +#define FMODE_CAN_READ ((__force fmode_t)0x20000) +/* Has write method(s) */ +#define FMODE_CAN_WRITE ((__force fmode_t)0x40000) /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x1000000) -- cgit v1.2.3-71-gd317 From 293bc9822fa9b3c9d4b7893bcb241e085580771a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 11 Feb 2014 18:37:41 -0500 Subject: new methods: ->read_iter() and ->write_iter() Beginning to introduce those. Just the callers for now, and it's clumsier than it'll eventually become; once we finish converting aio_read and aio_write instances, the things will get nicer. For now, these guys are in parallel to ->aio_read() and ->aio_write(); they take iocb and iov_iter, with everything in iov_iter already validated. File offset is passed in iocb->ki_pos, iov/nr_segs - in iov_iter. Main concerns in that series are stack footprint and ability to split the damn thing cleanly. [fix from Peter Ujfalusi folded] Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 2 + Documentation/filesystems/vfs.txt | 10 ++++- fs/aio.c | 14 +++++- fs/file_table.c | 6 ++- fs/open.c | 6 ++- fs/read_write.c | 90 ++++++++++++++++++++++++++++++++++++--- include/linux/fs.h | 6 +++ 7 files changed, 121 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 9b0d5a33c8bf..b18dd1779029 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -430,6 +430,8 @@ prototypes: ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); + ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); + ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); int (*iterate) (struct file *, struct dir_context *); unsigned int (*poll) (struct file *, struct poll_table_struct *); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 1846374a5add..a1d0d7a30165 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -806,6 +806,8 @@ struct file_operations { ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); + ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); + ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); int (*iterate) (struct file *, struct dir_context *); unsigned int (*poll) (struct file *, struct poll_table_struct *); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); @@ -836,11 +838,15 @@ otherwise noted. read: called by read(2) and related system calls - aio_read: called by io_submit(2) and other asynchronous I/O operations + aio_read: vectored, possibly asynchronous read + + read_iter: possibly asynchronous read with iov_iter as destination write: called by write(2) and related system calls - aio_write: called by io_submit(2) and other asynchronous I/O operations + aio_write: vectored, possibly asynchronous write + + write_iter: possibly asynchronous write with iov_iter as source iterate: called when the VFS needs to read the directory contents diff --git a/fs/aio.c b/fs/aio.c index a0ed6c7d2cd2..56b28607c32d 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1241,6 +1241,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *, unsigned long, loff_t); +typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *); static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, int rw, char __user *buf, @@ -1298,7 +1299,9 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, int rw; fmode_t mode; aio_rw_op *rw_op; + rw_iter_op *iter_op; struct iovec inline_vec, *iovec = &inline_vec; + struct iov_iter iter; switch (opcode) { case IOCB_CMD_PREAD: @@ -1306,6 +1309,7 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, mode = FMODE_READ; rw = READ; rw_op = file->f_op->aio_read; + iter_op = file->f_op->read_iter; goto rw_common; case IOCB_CMD_PWRITE: @@ -1313,12 +1317,13 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, mode = FMODE_WRITE; rw = WRITE; rw_op = file->f_op->aio_write; + iter_op = file->f_op->write_iter; goto rw_common; rw_common: if (unlikely(!(file->f_mode & mode))) return -EBADF; - if (!rw_op) + if (!rw_op && !iter_op) return -EINVAL; ret = (opcode == IOCB_CMD_PREADV || @@ -1347,7 +1352,12 @@ rw_common: if (rw == WRITE) file_start_write(file); - ret = rw_op(req, iovec, nr_segs, req->ki_pos); + if (iter_op) { + iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes); + ret = iter_op(req, &iter); + } else { + ret = rw_op(req, iovec, nr_segs, req->ki_pos); + } if (rw == WRITE) file_end_write(file); diff --git a/fs/file_table.c b/fs/file_table.c index be73cbc48c12..f8cc881fbbfb 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -175,9 +175,11 @@ struct file *alloc_file(struct path *path, fmode_t mode, file->f_path = *path; file->f_inode = path->dentry->d_inode; file->f_mapping = path->dentry->d_inode->i_mapping; - if ((mode & FMODE_READ) && likely(fop->read || fop->aio_read)) + if ((mode & FMODE_READ) && + likely(fop->read || fop->aio_read || fop->read_iter)) mode |= FMODE_CAN_READ; - if ((mode & FMODE_WRITE) && likely(fop->write || fop->aio_write)) + if ((mode & FMODE_WRITE) && + likely(fop->write || fop->aio_write || fop->write_iter)) mode |= FMODE_CAN_WRITE; file->f_mode = mode; file->f_op = fop; diff --git a/fs/open.c b/fs/open.c index 39d3d0468ee6..36662d036237 100644 --- a/fs/open.c +++ b/fs/open.c @@ -725,9 +725,11 @@ static int do_dentry_open(struct file *f, } if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_inc(inode); - if ((f->f_mode & FMODE_READ) && likely(f->f_op->read || f->f_op->aio_read)) + if ((f->f_mode & FMODE_READ) && + likely(f->f_op->read || f->f_op->aio_read || f->f_op->read_iter)) f->f_mode |= FMODE_CAN_READ; - if ((f->f_mode & FMODE_WRITE) && likely(f->f_op->write || f->f_op->aio_write)) + if ((f->f_mode & FMODE_WRITE) && + likely(f->f_op->write || f->f_op->aio_write || f->f_op->write_iter)) f->f_mode |= FMODE_CAN_WRITE; f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); diff --git a/fs/read_write.c b/fs/read_write.c index d29d2a361d2c..fe2f9d5e3536 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -25,6 +25,7 @@ typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *, unsigned long, loff_t); +typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *); const struct file_operations generic_ro_fops = { .llseek = generic_file_llseek, @@ -390,6 +391,27 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp EXPORT_SYMBOL(do_sync_read); +ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) +{ + struct iovec iov = { .iov_base = buf, .iov_len = len }; + struct kiocb kiocb; + struct iov_iter iter; + ssize_t ret; + + init_sync_kiocb(&kiocb, filp); + kiocb.ki_pos = *ppos; + kiocb.ki_nbytes = len; + iov_iter_init(&iter, READ, &iov, 1, len); + + ret = filp->f_op->read_iter(&kiocb, &iter); + if (-EIOCBQUEUED == ret) + ret = wait_on_sync_kiocb(&kiocb); + *ppos = kiocb.ki_pos; + return ret; +} + +EXPORT_SYMBOL(new_sync_read); + ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { ssize_t ret; @@ -406,8 +428,10 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) count = ret; if (file->f_op->read) ret = file->f_op->read(file, buf, count, pos); - else + else if (file->f_op->aio_read) ret = do_sync_read(file, buf, count, pos); + else + ret = new_sync_read(file, buf, count, pos); if (ret > 0) { fsnotify_access(file); add_rchar(current, ret); @@ -439,6 +463,27 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof EXPORT_SYMBOL(do_sync_write); +ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) +{ + struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; + struct kiocb kiocb; + struct iov_iter iter; + ssize_t ret; + + init_sync_kiocb(&kiocb, filp); + kiocb.ki_pos = *ppos; + kiocb.ki_nbytes = len; + iov_iter_init(&iter, WRITE, &iov, 1, len); + + ret = filp->f_op->write_iter(&kiocb, &iter); + if (-EIOCBQUEUED == ret) + ret = wait_on_sync_kiocb(&kiocb); + *ppos = kiocb.ki_pos; + return ret; +} + +EXPORT_SYMBOL(new_sync_write); + ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos) { mm_segment_t old_fs; @@ -455,8 +500,10 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t count = MAX_RW_COUNT; if (file->f_op->write) ret = file->f_op->write(file, p, count, pos); - else + else if (file->f_op->aio_write) ret = do_sync_write(file, p, count, pos); + else + ret = new_sync_write(file, p, count, pos); set_fs(old_fs); if (ret > 0) { fsnotify_modify(file); @@ -483,8 +530,10 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ file_start_write(file); if (file->f_op->write) ret = file->f_op->write(file, buf, count, pos); - else + else if (file->f_op->aio_write) ret = do_sync_write(file, buf, count, pos); + else + ret = new_sync_write(file, buf, count, pos); if (ret > 0) { fsnotify_modify(file); add_wchar(current, ret); @@ -601,6 +650,25 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) } EXPORT_SYMBOL(iov_shorten); +static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov, + unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn) +{ + struct kiocb kiocb; + struct iov_iter iter; + ssize_t ret; + + init_sync_kiocb(&kiocb, filp); + kiocb.ki_pos = *ppos; + kiocb.ki_nbytes = len; + + iov_iter_init(&iter, rw, iov, nr_segs, len); + ret = fn(&kiocb, &iter); + if (ret == -EIOCBQUEUED) + ret = wait_on_sync_kiocb(&kiocb); + *ppos = kiocb.ki_pos; + return ret; +} + static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) { @@ -738,6 +806,7 @@ static ssize_t do_readv_writev(int type, struct file *file, ssize_t ret; io_fn_t fn; iov_fn_t fnv; + iter_fn_t iter_fn; ret = rw_copy_check_uvector(type, uvector, nr_segs, ARRAY_SIZE(iovstack), iovstack, &iov); @@ -753,13 +822,18 @@ static ssize_t do_readv_writev(int type, struct file *file, if (type == READ) { fn = file->f_op->read; fnv = file->f_op->aio_read; + iter_fn = file->f_op->read_iter; } else { fn = (io_fn_t)file->f_op->write; fnv = file->f_op->aio_write; + iter_fn = file->f_op->write_iter; file_start_write(file); } - if (fnv) + if (iter_fn) + ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, + pos, iter_fn); + else if (fnv) ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, pos, fnv); else @@ -912,6 +986,7 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, ssize_t ret; io_fn_t fn; iov_fn_t fnv; + iter_fn_t iter_fn; ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, UIO_FASTIOV, iovstack, &iov); @@ -927,13 +1002,18 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, if (type == READ) { fn = file->f_op->read; fnv = file->f_op->aio_read; + iter_fn = file->f_op->read_iter; } else { fn = (io_fn_t)file->f_op->write; fnv = file->f_op->aio_write; + iter_fn = file->f_op->write_iter; file_start_write(file); } - if (fnv) + if (iter_fn) + ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, + pos, iter_fn); + else if (fnv) ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, pos, fnv); else diff --git a/include/linux/fs.h b/include/linux/fs.h index 75eb71357adf..17535e0a4547 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1451,6 +1451,8 @@ struct block_device_operations; #define HAVE_COMPAT_IOCTL 1 #define HAVE_UNLOCKED_IOCTL 1 +struct iov_iter; + struct file_operations { struct module *owner; loff_t (*llseek) (struct file *, loff_t, int); @@ -1458,6 +1460,8 @@ struct file_operations { ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); + ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); + ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); int (*iterate) (struct file *, struct dir_context *); unsigned int (*poll) (struct file *, struct poll_table_struct *); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); @@ -2415,6 +2419,8 @@ extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, loff extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); +extern ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); +extern ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); /* fs/block_dev.c */ extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, -- cgit v1.2.3-71-gd317 From 8174202b34c30e0c07231bf63f18ab29af634f0b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 3 Apr 2014 03:17:43 -0400 Subject: write_iter variants of {__,}generic_file_aio_write() Signed-off-by: Al Viro --- fs/9p/vfs_file.c | 8 +++---- fs/adfs/file.c | 4 ++-- fs/affs/file.c | 4 ++-- fs/bfs/file.c | 4 ++-- fs/ecryptfs/file.c | 4 ++-- fs/exofs/file.c | 4 ++-- fs/ext2/file.c | 4 ++-- fs/ext3/file.c | 4 ++-- fs/f2fs/file.c | 4 ++-- fs/fat/file.c | 4 ++-- fs/hfs/inode.c | 4 ++-- fs/hfsplus/inode.c | 4 ++-- fs/hostfs/hostfs_kern.c | 4 ++-- fs/hpfs/file.c | 4 ++-- fs/jffs2/file.c | 4 ++-- fs/jfs/file.c | 4 ++-- fs/logfs/file.c | 4 ++-- fs/minix/file.c | 4 ++-- fs/nilfs2/file.c | 4 ++-- fs/omfs/file.c | 4 ++-- fs/ramfs/file-mmu.c | 4 ++-- fs/ramfs/file-nommu.c | 4 ++-- fs/reiserfs/file.c | 4 ++-- fs/sysv/file.c | 4 ++-- fs/ufs/file.c | 4 ++-- include/linux/fs.h | 2 ++ mm/filemap.c | 61 ++++++++++++++++++++++++++++++------------------- mm/shmem.c | 4 ++-- mm/vmscan.c | 2 +- 29 files changed, 94 insertions(+), 79 deletions(-) (limited to 'include') diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 47e0597d1e9b..b9b5f979a2ca 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -763,7 +763,7 @@ err_out: buff_write: mutex_unlock(&inode->i_mutex); - return do_sync_write(filp, data, count, offsetp); + return new_sync_write(filp, data, count, offsetp); } /** @@ -781,7 +781,7 @@ v9fs_cached_file_write(struct file *filp, const char __user * data, if (filp->f_flags & O_DIRECT) return v9fs_direct_write(filp, data, count, offset); - return do_sync_write(filp, data, count, offset); + return new_sync_write(filp, data, count, offset); } @@ -851,7 +851,7 @@ const struct file_operations v9fs_cached_file_operations = { .read = v9fs_cached_file_read, .write = v9fs_cached_file_write, .read_iter = generic_file_read_iter, - .aio_write = generic_file_aio_write, + .write_iter = generic_file_write_iter, .open = v9fs_file_open, .release = v9fs_dir_release, .lock = v9fs_file_lock, @@ -864,7 +864,7 @@ const struct file_operations v9fs_cached_file_operations_dotl = { .read = v9fs_cached_file_read, .write = v9fs_cached_file_write, .read_iter = generic_file_read_iter, - .aio_write = generic_file_aio_write, + .write_iter = generic_file_write_iter, .open = v9fs_file_open, .release = v9fs_dir_release, .lock = v9fs_file_lock_dotl, diff --git a/fs/adfs/file.c b/fs/adfs/file.c index 3bfc9efa29b4..07c9edce5aa7 100644 --- a/fs/adfs/file.c +++ b/fs/adfs/file.c @@ -27,8 +27,8 @@ const struct file_operations adfs_file_operations = { .read_iter = generic_file_read_iter, .mmap = generic_file_mmap, .fsync = generic_file_fsync, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .splice_read = generic_file_splice_read, }; diff --git a/fs/affs/file.c b/fs/affs/file.c index 982853f17afc..9df23175e28b 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -29,8 +29,8 @@ const struct file_operations affs_file_operations = { .llseek = generic_file_llseek, .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .open = affs_file_open, .release = affs_file_release, diff --git a/fs/bfs/file.c b/fs/bfs/file.c index 0aa788892f93..e7f88ace1a25 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -25,8 +25,8 @@ const struct file_operations bfs_file_operations = { .llseek = generic_file_llseek, .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .splice_read = generic_file_splice_read, }; diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index b32827c917e1..db0fad3269c0 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -353,8 +353,8 @@ const struct file_operations ecryptfs_main_fops = { .llseek = generic_file_llseek, .read = new_sync_read, .read_iter = ecryptfs_read_update_atime, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .iterate = ecryptfs_readdir, .unlocked_ioctl = ecryptfs_unlocked_ioctl, #ifdef CONFIG_COMPAT diff --git a/fs/exofs/file.c b/fs/exofs/file.c index 90d394da7471..5b7f6be5a2d5 100644 --- a/fs/exofs/file.c +++ b/fs/exofs/file.c @@ -68,9 +68,9 @@ static int exofs_flush(struct file *file, fl_owner_t id) const struct file_operations exofs_file_operations = { .llseek = generic_file_llseek, .read = new_sync_read, - .write = do_sync_write, + .write = new_sync_write, .read_iter = generic_file_read_iter, - .aio_write = generic_file_aio_write, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .open = generic_file_open, .release = exofs_release_file, diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 407305072597..970c6aca15cc 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -63,9 +63,9 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync) const struct file_operations ext2_file_operations = { .llseek = generic_file_llseek, .read = new_sync_read, - .write = do_sync_write, + .write = new_sync_write, .read_iter = generic_file_read_iter, - .aio_write = generic_file_aio_write, + .write_iter = generic_file_write_iter, .unlocked_ioctl = ext2_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext2_compat_ioctl, diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 5439d2f0141b..c833b1226d4d 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -51,9 +51,9 @@ static int ext3_release_file (struct inode * inode, struct file * filp) const struct file_operations ext3_file_operations = { .llseek = generic_file_llseek, .read = new_sync_read, - .write = do_sync_write, + .write = new_sync_write, .read_iter = generic_file_read_iter, - .aio_write = generic_file_aio_write, + .write_iter = generic_file_write_iter, .unlocked_ioctl = ext3_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext3_compat_ioctl, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0e01fb0bc97c..22f4900dd8eb 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -680,9 +680,9 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) const struct file_operations f2fs_file_operations = { .llseek = generic_file_llseek, .read = new_sync_read, - .write = do_sync_write, + .write = new_sync_write, .read_iter = generic_file_read_iter, - .aio_write = generic_file_aio_write, + .write_iter = generic_file_write_iter, .open = generic_file_open, .mmap = f2fs_file_mmap, .fsync = f2fs_sync_file, diff --git a/fs/fat/file.c b/fs/fat/file.c index 29285e990c90..85f79a89e747 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -171,9 +171,9 @@ int fat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync) const struct file_operations fat_file_operations = { .llseek = generic_file_llseek, .read = new_sync_read, - .write = do_sync_write, + .write = new_sync_write, .read_iter = generic_file_read_iter, - .aio_write = generic_file_aio_write, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .release = fat_file_release, .unlocked_ioctl = fat_generic_ioctl, diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 6d4055aff109..d0929bc81782 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -676,8 +676,8 @@ static const struct file_operations hfs_file_operations = { .llseek = generic_file_llseek, .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .splice_read = generic_file_splice_read, .fsync = hfs_file_fsync, diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index cccc89e47cb6..0cf786f2d046 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -343,8 +343,8 @@ static const struct file_operations hfsplus_file_operations = { .llseek = generic_file_llseek, .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .splice_read = generic_file_splice_read, .fsync = hfsplus_file_fsync, diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index ce0005d8ffeb..bb529f3b7f2b 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -381,8 +381,8 @@ static const struct file_operations hostfs_file_fops = { .read = new_sync_read, .splice_read = generic_file_splice_read, .read_iter = generic_file_read_iter, - .aio_write = generic_file_aio_write, - .write = do_sync_write, + .write_iter = generic_file_write_iter, + .write = new_sync_write, .mmap = generic_file_mmap, .open = hostfs_file_open, .release = hostfs_file_release, diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index bacb478a4990..7f54e5f76cec 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -199,8 +199,8 @@ const struct file_operations hpfs_file_ops = .llseek = generic_file_llseek, .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .release = hpfs_file_release, .fsync = hpfs_file_fsync, diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 9192127d591c..64989ca9ba90 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -53,8 +53,8 @@ const struct file_operations jffs2_file_operations = .open = generic_file_open, .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .unlocked_ioctl=jffs2_ioctl, .mmap = generic_file_readonly_mmap, .fsync = jffs2_fsync, diff --git a/fs/jfs/file.c b/fs/jfs/file.c index a5d8299b2208..cc744ecaf51f 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -151,10 +151,10 @@ const struct inode_operations jfs_file_inode_operations = { const struct file_operations jfs_file_operations = { .open = jfs_open, .llseek = generic_file_llseek, - .write = do_sync_write, + .write = new_sync_write, .read = new_sync_read, .read_iter = generic_file_read_iter, - .aio_write = generic_file_aio_write, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, diff --git a/fs/logfs/file.c b/fs/logfs/file.c index 1ca8026dc664..8538752df2f6 100644 --- a/fs/logfs/file.c +++ b/fs/logfs/file.c @@ -265,14 +265,14 @@ const struct inode_operations logfs_reg_iops = { const struct file_operations logfs_reg_fops = { .read_iter = generic_file_read_iter, - .aio_write = generic_file_aio_write, + .write_iter = generic_file_write_iter, .fsync = logfs_fsync, .unlocked_ioctl = logfs_ioctl, .llseek = generic_file_llseek, .mmap = generic_file_readonly_mmap, .open = generic_file_open, .read = new_sync_read, - .write = do_sync_write, + .write = new_sync_write, }; const struct address_space_operations logfs_reg_aops = { diff --git a/fs/minix/file.c b/fs/minix/file.c index 607b47145325..a967de085ac0 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c @@ -16,8 +16,8 @@ const struct file_operations minix_file_operations = { .llseek = generic_file_llseek, .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .fsync = generic_file_fsync, .splice_read = generic_file_splice_read, diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index dcb1b0e8b435..24978153c0c4 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -153,9 +153,9 @@ static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) const struct file_operations nilfs_file_operations = { .llseek = generic_file_llseek, .read = new_sync_read, - .write = do_sync_write, + .write = new_sync_write, .read_iter = generic_file_read_iter, - .aio_write = generic_file_aio_write, + .write_iter = generic_file_write_iter, .unlocked_ioctl = nilfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = nilfs_compat_ioctl, diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 3bf28da9f0df..902e88527fce 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -338,9 +338,9 @@ static sector_t omfs_bmap(struct address_space *mapping, sector_t block) const struct file_operations omfs_file_operations = { .llseek = generic_file_llseek, .read = new_sync_read, - .write = do_sync_write, + .write = new_sync_write, .read_iter = generic_file_read_iter, - .aio_write = generic_file_aio_write, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .fsync = generic_file_fsync, .splice_read = generic_file_splice_read, diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c index 30ffb367bc0b..6ea0b9718a9d 100644 --- a/fs/ramfs/file-mmu.c +++ b/fs/ramfs/file-mmu.c @@ -33,8 +33,8 @@ const struct file_operations ramfs_file_operations = { .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .fsync = noop_fsync, .splice_read = generic_file_splice_read, diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 416db04f8464..9ed420f8f3ca 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -39,8 +39,8 @@ const struct file_operations ramfs_file_operations = { .get_unmapped_area = ramfs_nommu_get_unmapped_area, .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .fsync = noop_fsync, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 7592d681fd8c..7c8ecd6468db 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -236,7 +236,7 @@ int reiserfs_commit_page(struct inode *inode, struct page *page, const struct file_operations reiserfs_file_operations = { .read = new_sync_read, - .write = do_sync_write, + .write = new_sync_write, .unlocked_ioctl = reiserfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = reiserfs_compat_ioctl, @@ -246,7 +246,7 @@ const struct file_operations reiserfs_file_operations = { .release = reiserfs_file_release, .fsync = reiserfs_sync_file, .read_iter = generic_file_read_iter, - .aio_write = generic_file_aio_write, + .write_iter = generic_file_write_iter, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, .llseek = generic_file_llseek, diff --git a/fs/sysv/file.c b/fs/sysv/file.c index d99be8877388..b00811c75b24 100644 --- a/fs/sysv/file.c +++ b/fs/sysv/file.c @@ -23,8 +23,8 @@ const struct file_operations sysv_file_operations = { .llseek = generic_file_llseek, .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .fsync = generic_file_fsync, .splice_read = generic_file_splice_read, diff --git a/fs/ufs/file.c b/fs/ufs/file.c index b6b402989e6b..c84ec010a676 100644 --- a/fs/ufs/file.c +++ b/fs/ufs/file.c @@ -37,8 +37,8 @@ const struct file_operations ufs_file_operations = { .llseek = generic_file_llseek, .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .open = generic_file_open, .fsync = generic_file_fsync, diff --git a/include/linux/fs.h b/include/linux/fs.h index 17535e0a4547..4b221637f09e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2414,7 +2414,9 @@ int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isbl extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long); +extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); +extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, loff_t); extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); diff --git a/mm/filemap.c b/mm/filemap.c index c0404b763a17..d2d9eeec8bf0 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2542,10 +2542,9 @@ again: EXPORT_SYMBOL(generic_perform_write); /** - * __generic_file_aio_write - write data to a file + * __generic_file_write_iter - write data to a file * @iocb: IO state structure (file, offset, etc.) - * @iov: vector with data to write - * @nr_segs: number of segments in the vector + * @from: iov_iter with data to write * * This function does all the work needed for actually writing data to a * file. It does all basic checks, removes SUID from the file, updates @@ -2559,21 +2558,16 @@ EXPORT_SYMBOL(generic_perform_write); * A caller has to handle it. This is mainly due to the fact that we want to * avoid syncing under i_mutex. */ -ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs) +ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space * mapping = file->f_mapping; - size_t count; /* after file limit checks */ struct inode *inode = mapping->host; loff_t pos = iocb->ki_pos; ssize_t written = 0; ssize_t err; ssize_t status; - struct iov_iter from; - - count = iov_length(iov, nr_segs); - iov_iter_init(&from, WRITE, iov, nr_segs, count); + size_t count = iov_iter_count(from); /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; @@ -2584,7 +2578,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (count == 0) goto out; - iov_iter_truncate(&from, count); + iov_iter_truncate(from, count); err = file_remove_suid(file); if (err) @@ -2598,7 +2592,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (unlikely(file->f_flags & O_DIRECT)) { loff_t endbyte; - written = generic_file_direct_write(iocb, &from, pos); + written = generic_file_direct_write(iocb, from, pos); if (written < 0 || written == count) goto out; @@ -2609,7 +2603,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, pos += written; count -= written; - status = generic_perform_write(file, &from, pos); + status = generic_perform_write(file, from, pos); /* * If generic_perform_write() returned a synchronous error * then we want to return the number of bytes which were @@ -2641,7 +2635,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, */ } } else { - written = generic_perform_write(file, &from, pos); + written = generic_perform_write(file, from, pos); if (likely(written >= 0)) iocb->ki_pos = pos + written; } @@ -2649,30 +2643,36 @@ out: current->backing_dev_info = NULL; return written ? written : err; } +EXPORT_SYMBOL(__generic_file_write_iter); + +ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs) +{ + size_t count = iov_length(iov, nr_segs); + struct iov_iter from; + + iov_iter_init(&from, WRITE, iov, nr_segs, count); + return __generic_file_write_iter(iocb, &from); +} EXPORT_SYMBOL(__generic_file_aio_write); /** - * generic_file_aio_write - write data to a file + * generic_file_write_iter - write data to a file * @iocb: IO state structure - * @iov: vector with data to write - * @nr_segs: number of segments in the vector - * @pos: position in file where to write + * @from: iov_iter with data to write * - * This is a wrapper around __generic_file_aio_write() to be used by most + * This is a wrapper around __generic_file_write_iter() to be used by most * filesystems. It takes care of syncing the file in case of O_SYNC file * and acquires i_mutex as needed. */ -ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; ssize_t ret; - BUG_ON(iocb->ki_pos != pos); - mutex_lock(&inode->i_mutex); - ret = __generic_file_aio_write(iocb, iov, nr_segs); + ret = __generic_file_write_iter(iocb, from); mutex_unlock(&inode->i_mutex); if (ret > 0) { @@ -2684,6 +2684,19 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, } return ret; } +EXPORT_SYMBOL(generic_file_write_iter); + +ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + size_t count = iov_length(iov, nr_segs); + struct iov_iter from; + + BUG_ON(iocb->ki_pos != pos); + + iov_iter_init(&from, WRITE, iov, nr_segs, count); + return generic_file_write_iter(iocb, &from); +} EXPORT_SYMBOL(generic_file_aio_write); /** diff --git a/mm/shmem.c b/mm/shmem.c index edc6c7e817e9..d3e5c6fc313c 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2618,9 +2618,9 @@ static const struct file_operations shmem_file_operations = { #ifdef CONFIG_TMPFS .llseek = shmem_file_llseek, .read = new_sync_read, - .write = do_sync_write, + .write = new_sync_write, .read_iter = shmem_file_read_iter, - .aio_write = generic_file_aio_write, + .write_iter = generic_file_write_iter, .fsync = noop_fsync, .splice_read = shmem_file_splice_read, .splice_write = generic_file_splice_write, diff --git a/mm/vmscan.c b/mm/vmscan.c index 32c661d66a45..9c2dba6ac685 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -458,7 +458,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, * stalls if we need to run get_block(). We could test * PagePrivate for that. * - * If this process is currently in __generic_file_aio_write() against + * If this process is currently in __generic_file_write_iter() against * this page's queue, we can perform writeback even if that * will block. * -- cgit v1.2.3-71-gd317 From 1456c0a87c4241d3a801651019e66983c69ad17d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 3 Apr 2014 03:21:50 -0400 Subject: blkdev_aio_write() - turn into blkdev_write_iter() Signed-off-by: Al Viro --- drivers/char/raw.c | 4 ++-- fs/block_dev.c | 16 ++++++---------- include/linux/fs.h | 3 +-- 3 files changed, 9 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/char/raw.c b/drivers/char/raw.c index cfb607a64b85..0102dc788608 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -286,8 +286,8 @@ static long raw_ctl_compat_ioctl(struct file *file, unsigned int cmd, static const struct file_operations raw_fops = { .read = new_sync_read, .read_iter = generic_file_read_iter, - .write = do_sync_write, - .aio_write = blkdev_aio_write, + .write = new_sync_write, + .write_iter = blkdev_write_iter, .fsync = blkdev_fsync, .open = raw_open, .release = raw_release, diff --git a/fs/block_dev.c b/fs/block_dev.c index 3d97f4a257ff..4e36b8ea8aa4 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1509,28 +1509,24 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) * Does not take i_mutex for the write and thus is not for general purpose * use. */ -ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct blk_plug plug; ssize_t ret; - BUG_ON(iocb->ki_pos != pos); - blk_start_plug(&plug); - ret = __generic_file_aio_write(iocb, iov, nr_segs); + ret = __generic_file_write_iter(iocb, from); if (ret > 0) { ssize_t err; - - err = generic_write_sync(file, pos, ret); + err = generic_write_sync(file, iocb->ki_pos - ret, ret); if (err < 0) ret = err; } blk_finish_plug(&plug); return ret; } -EXPORT_SYMBOL_GPL(blkdev_aio_write); +EXPORT_SYMBOL_GPL(blkdev_write_iter); static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) { @@ -1577,9 +1573,9 @@ const struct file_operations def_blk_fops = { .release = blkdev_close, .llseek = block_llseek, .read = new_sync_read, - .write = do_sync_write, + .write = new_sync_write, .read_iter = blkdev_read_iter, - .aio_write = blkdev_aio_write, + .write_iter = blkdev_write_iter, .mmap = generic_file_mmap, .fsync = blkdev_fsync, .unlocked_ioctl = block_ioctl, diff --git a/include/linux/fs.h b/include/linux/fs.h index 4b221637f09e..1b9b6c59abdd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2425,8 +2425,7 @@ extern ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo extern ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); /* fs/block_dev.c */ -extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos); +extern ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from); extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync); extern void block_sync_page(struct page *page); -- cgit v1.2.3-71-gd317 From a8f3550cd228b6edc5d17fce1a9af8cc7004f185 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 3 Apr 2014 03:32:25 -0400 Subject: bury __generic_file_aio_write() all users converted to __generic_file_write_iter() now Signed-off-by: Al Viro --- include/linux/fs.h | 1 - mm/filemap.c | 11 ----------- 2 files changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 1b9b6c59abdd..99817c9e665e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2413,7 +2413,6 @@ extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr, int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); -extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); diff --git a/mm/filemap.c b/mm/filemap.c index d2d9eeec8bf0..7dcdb9db710d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2645,17 +2645,6 @@ out: } EXPORT_SYMBOL(__generic_file_write_iter); -ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs) -{ - size_t count = iov_length(iov, nr_segs); - struct iov_iter from; - - iov_iter_init(&from, WRITE, iov, nr_segs, count); - return __generic_file_write_iter(iocb, &from); -} -EXPORT_SYMBOL(__generic_file_aio_write); - /** * generic_file_write_iter - write data to a file * @iocb: IO state structure -- cgit v1.2.3-71-gd317 From f0d1bec9d58d4c038d0ac958c9af82be6eb18045 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 3 Apr 2014 15:05:18 -0400 Subject: new helper: copy_page_from_iter() parallel to copy_page_to_iter(). pipe_write() switched to it (and became ->write_iter()). Signed-off-by: Al Viro --- fs/pipe.c | 129 ++++++++-------------------------------------------- include/linux/uio.h | 2 + mm/iov_iter.c | 78 +++++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+), 110 deletions(-) (limited to 'include') diff --git a/fs/pipe.c b/fs/pipe.c index 05ccb00cb407..21981e58e2a6 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -116,50 +116,6 @@ void pipe_wait(struct pipe_inode_info *pipe) pipe_lock(pipe); } -static int -pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len, - int atomic) -{ - unsigned long copy; - - while (len > 0) { - while (!iov->iov_len) - iov++; - copy = min_t(unsigned long, len, iov->iov_len); - - if (atomic) { - if (__copy_from_user_inatomic(to, iov->iov_base, copy)) - return -EFAULT; - } else { - if (copy_from_user(to, iov->iov_base, copy)) - return -EFAULT; - } - to += copy; - len -= copy; - iov->iov_base += copy; - iov->iov_len -= copy; - } - return 0; -} - -/* - * Pre-fault in the user memory, so we can use atomic copies. - */ -static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len) -{ - while (!iov->iov_len) - iov++; - - while (len > 0) { - unsigned long this_len; - - this_len = min_t(unsigned long, len, iov->iov_len); - fault_in_pages_readable(iov->iov_base, this_len); - len -= this_len; - iov++; - } -} - static void anon_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { @@ -380,24 +336,19 @@ static inline int is_packetized(struct file *file) } static ssize_t -pipe_write(struct kiocb *iocb, const struct iovec *_iov, - unsigned long nr_segs, loff_t ppos) +pipe_write(struct kiocb *iocb, struct iov_iter *from) { struct file *filp = iocb->ki_filp; struct pipe_inode_info *pipe = filp->private_data; - ssize_t ret; - int do_wakeup; - struct iovec *iov = (struct iovec *)_iov; - size_t total_len; + ssize_t ret = 0; + int do_wakeup = 0; + size_t total_len = iov_iter_count(from); ssize_t chars; - total_len = iov_length(iov, nr_segs); /* Null write succeeds. */ if (unlikely(total_len == 0)) return 0; - do_wakeup = 0; - ret = 0; __pipe_lock(pipe); if (!pipe->readers) { @@ -416,38 +367,19 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov, int offset = buf->offset + buf->len; if (ops->can_merge && offset + chars <= PAGE_SIZE) { - int error, atomic = 1; - void *addr; - - error = ops->confirm(pipe, buf); + int error = ops->confirm(pipe, buf); if (error) goto out; - iov_fault_in_pages_read(iov, chars); -redo1: - if (atomic) - addr = kmap_atomic(buf->page); - else - addr = kmap(buf->page); - error = pipe_iov_copy_from_user(offset + addr, iov, - chars, atomic); - if (atomic) - kunmap_atomic(addr); - else - kunmap(buf->page); - ret = error; - do_wakeup = 1; - if (error) { - if (atomic) { - atomic = 0; - goto redo1; - } + ret = copy_page_from_iter(buf->page, offset, chars, from); + if (unlikely(ret < chars)) { + error = -EFAULT; goto out; } + do_wakeup = 1; buf->len += chars; - total_len -= chars; ret = chars; - if (!total_len) + if (!iov_iter_count(from)) goto out; } } @@ -466,8 +398,7 @@ redo1: int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1); struct pipe_buffer *buf = pipe->bufs + newbuf; struct page *page = pipe->tmp_page; - char *src; - int error, atomic = 1; + int copied; if (!page) { page = alloc_page(GFP_HIGHUSER); @@ -483,40 +414,19 @@ redo1: * FIXME! Is this really true? */ do_wakeup = 1; - chars = PAGE_SIZE; - if (chars > total_len) - chars = total_len; - - iov_fault_in_pages_read(iov, chars); -redo2: - if (atomic) - src = kmap_atomic(page); - else - src = kmap(page); - - error = pipe_iov_copy_from_user(src, iov, chars, - atomic); - if (atomic) - kunmap_atomic(src); - else - kunmap(page); - - if (unlikely(error)) { - if (atomic) { - atomic = 0; - goto redo2; - } + copied = copy_page_from_iter(page, 0, PAGE_SIZE, from); + if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) { if (!ret) - ret = error; + ret = -EFAULT; break; } - ret += chars; + ret += copied; /* Insert it into the buffer array */ buf->page = page; buf->ops = &anon_pipe_buf_ops; buf->offset = 0; - buf->len = chars; + buf->len = copied; buf->flags = 0; if (is_packetized(filp)) { buf->ops = &packet_pipe_buf_ops; @@ -525,8 +435,7 @@ redo2: pipe->nrbufs = ++bufs; pipe->tmp_page = NULL; - total_len -= chars; - if (!total_len) + if (!iov_iter_count(from)) break; } if (bufs < pipe->buffers) @@ -1040,8 +949,8 @@ const struct file_operations pipefifo_fops = { .llseek = no_llseek, .read = new_sync_read, .read_iter = pipe_read, - .write = do_sync_write, - .aio_write = pipe_write, + .write = new_sync_write, + .write_iter = pipe_write, .poll = pipe_poll, .unlocked_ioctl = pipe_ioctl, .release = pipe_release, diff --git a/include/linux/uio.h b/include/linux/uio.h index 532f59d0adbb..66012352d333 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -68,6 +68,8 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); size_t iov_iter_single_seg_count(const struct iov_iter *i); size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); +size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i); unsigned long iov_iter_alignment(const struct iov_iter *i); void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov, unsigned long nr_segs, size_t count); diff --git a/mm/iov_iter.c b/mm/iov_iter.c index a5c691c1a283..081e3273085b 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c @@ -82,6 +82,84 @@ done: } EXPORT_SYMBOL(copy_page_to_iter); +size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + size_t skip, copy, left, wanted; + const struct iovec *iov; + char __user *buf; + void *kaddr, *to; + + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + wanted = bytes; + iov = i->iov; + skip = i->iov_offset; + buf = iov->iov_base + skip; + copy = min(bytes, iov->iov_len - skip); + + if (!fault_in_pages_readable(buf, copy)) { + kaddr = kmap_atomic(page); + to = kaddr + offset; + + /* first chunk, usually the only one */ + left = __copy_from_user_inatomic(to, buf, copy); + copy -= left; + skip += copy; + to += copy; + bytes -= copy; + + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __copy_from_user_inatomic(to, buf, copy); + copy -= left; + skip = copy; + to += copy; + bytes -= copy; + } + if (likely(!bytes)) { + kunmap_atomic(kaddr); + goto done; + } + offset = to - kaddr; + buf += copy; + kunmap_atomic(kaddr); + copy = min(bytes, iov->iov_len - skip); + } + /* Too bad - revert to non-atomic kmap */ + kaddr = kmap(page); + to = kaddr + offset; + left = __copy_from_user(to, buf, copy); + copy -= left; + skip += copy; + to += copy; + bytes -= copy; + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __copy_from_user(to, buf, copy); + copy -= left; + skip = copy; + to += copy; + bytes -= copy; + } + kunmap(page); +done: + i->count -= wanted - bytes; + i->nr_segs -= iov - i->iov; + i->iov = iov; + i->iov_offset = skip; + return wanted - bytes; +} +EXPORT_SYMBOL(copy_page_from_iter); + static size_t __iovec_copy_from_user_inatomic(char *vaddr, const struct iovec *iov, size_t base, size_t bytes) { -- cgit v1.2.3-71-gd317 From 2b777c9dd9ebbb2f8b6818d454cc5e6d7c1e3c8b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 3 Apr 2014 22:31:22 -0400 Subject: ceph_sync_read: stop poking into iov_iter guts Signed-off-by: Al Viro --- fs/ceph/file.c | 46 +++++++++++++++++--------------------------- include/linux/ceph/libceph.h | 2 -- net/ceph/pagevec.c | 35 ++++----------------------------- 3 files changed, 22 insertions(+), 61 deletions(-) (limited to 'include') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index c9a24ba98c9a..672b0fedb17b 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -418,7 +418,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, struct page **pages; u64 off = iocb->ki_pos; int num_pages, ret; - size_t len = i->count; + size_t len = iov_iter_count(i); dout("sync_read on file %p %llu~%u %s\n", file, off, (unsigned)len, @@ -436,25 +436,26 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, if (file->f_flags & O_DIRECT) { while (iov_iter_count(i)) { - void __user *data = i->iov[0].iov_base + i->iov_offset; - size_t len = i->iov[0].iov_len - i->iov_offset; + size_t start; + ssize_t n; - num_pages = calc_pages_for((unsigned long)data, len); - pages = ceph_get_direct_page_vector(data, - num_pages, true); - if (IS_ERR(pages)) - return PTR_ERR(pages); + n = iov_iter_get_pages_alloc(i, &pages, INT_MAX, &start); + if (n < 0) + return n; - ret = striped_read(inode, off, len, + num_pages = (n + start + PAGE_SIZE - 1) / PAGE_SIZE; + + ret = striped_read(inode, off, n, pages, num_pages, checkeof, - 1, (unsigned long)data & ~PAGE_MASK); + 1, start); + ceph_put_page_vector(pages, num_pages, true); if (ret <= 0) break; off += ret; iov_iter_advance(i, ret); - if (ret < len) + if (ret < n) break; } } else { @@ -466,25 +467,14 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, num_pages, checkeof, 0, 0); if (ret > 0) { int l, k = 0; - size_t left = len = ret; + size_t left = ret; while (left) { - void __user *data = i->iov[0].iov_base - + i->iov_offset; - l = min(i->iov[0].iov_len - i->iov_offset, - left); - - ret = ceph_copy_page_vector_to_user(&pages[k], - data, off, - l); - if (ret > 0) { - iov_iter_advance(i, ret); - left -= ret; - off += ret; - k = calc_pages_for(iocb->ki_pos, - len - left + 1) - 1; - BUG_ON(k >= num_pages && left); - } else + int copy = min_t(size_t, PAGE_SIZE, left); + l = copy_page_to_iter(pages[k++], 0, copy, i); + off += l; + left -= l; + if (l < copy) break; } } diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 2f49aa4c4f7f..279b0afac1c1 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -222,8 +222,6 @@ extern void ceph_copy_to_page_vector(struct page **pages, extern void ceph_copy_from_page_vector(struct page **pages, void *data, loff_t off, size_t len); -extern int ceph_copy_page_vector_to_user(struct page **pages, void __user *data, - loff_t off, size_t len); extern void ceph_zero_page_vector_range(int off, int len, struct page **pages); diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index 815a2249cfa9..555013034f7a 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -53,7 +53,10 @@ void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty) set_page_dirty_lock(pages[i]); put_page(pages[i]); } - kfree(pages); + if (is_vmalloc_addr(pages)) + vfree(pages); + else + kfree(pages); } EXPORT_SYMBOL(ceph_put_page_vector); @@ -164,36 +167,6 @@ void ceph_copy_from_page_vector(struct page **pages, } EXPORT_SYMBOL(ceph_copy_from_page_vector); -/* - * copy user data from a page vector into a user pointer - */ -int ceph_copy_page_vector_to_user(struct page **pages, - void __user *data, - loff_t off, size_t len) -{ - int i = 0; - int po = off & ~PAGE_CACHE_MASK; - int left = len; - int l, bad; - - while (left > 0) { - l = min_t(int, left, PAGE_CACHE_SIZE-po); - bad = copy_to_user(data, page_address(pages[i]) + po, l); - if (bad == l) - return -EFAULT; - data += l - bad; - left -= l - bad; - if (po) { - po += l - bad; - if (po == PAGE_CACHE_SIZE) - po = 0; - } - i++; - } - return len; -} -EXPORT_SYMBOL(ceph_copy_page_vector_to_user); - /* * Zero an extent within a page vector. Offset is relative to the * start of the first page. -- cgit v1.2.3-71-gd317 From b42b15fdad3ebb790250041d1517acebb9bd56d9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Apr 2014 12:15:19 -0400 Subject: lustre: get rid of messing with iovecs * switch to ->read_iter/->write_iter * keep a pointer to iov_iter instead of iov/nr_segs * do not modify iovecs; use iov_iter_truncate()/iov_iter_advance() and a new primitive - iov_iter_reexpand() (expand previously truncated iterator) istead. * (racy) check for lustre VMAs intersecting with iovecs kept for now as for_each_iov() loop. Signed-off-by: Al Viro --- drivers/staging/lustre/lustre/include/lclient.h | 11 +-- drivers/staging/lustre/lustre/lclient/lcommon_cl.c | 48 +--------- drivers/staging/lustre/lustre/llite/file.c | 106 ++++----------------- .../staging/lustre/lustre/llite/llite_internal.h | 3 +- drivers/staging/lustre/lustre/llite/rw.c | 3 +- drivers/staging/lustre/lustre/llite/vvp_io.c | 29 +++--- include/linux/uio.h | 9 ++ 7 files changed, 46 insertions(+), 163 deletions(-) (limited to 'include') diff --git a/drivers/staging/lustre/lustre/include/lclient.h b/drivers/staging/lustre/lustre/include/lclient.h index 827209ea6bd0..386a36c00f57 100644 --- a/drivers/staging/lustre/lustre/include/lclient.h +++ b/drivers/staging/lustre/lustre/include/lclient.h @@ -82,16 +82,7 @@ struct ccc_io { /** * I/O vector information to or from which read/write is going. */ - struct iovec *cui_iov; - unsigned long cui_nrsegs; - /** - * Total iov count for left IO. - */ - unsigned long cui_tot_nrsegs; - /** - * Old length for iov that was truncated partially. - */ - size_t cui_iov_olen; + struct iov_iter *cui_iter; /** * Total size for the left IO. */ diff --git a/drivers/staging/lustre/lustre/lclient/lcommon_cl.c b/drivers/staging/lustre/lustre/lclient/lcommon_cl.c index 6907a16dbbd1..a07d5156bc50 100644 --- a/drivers/staging/lustre/lustre/lclient/lcommon_cl.c +++ b/drivers/staging/lustre/lustre/lclient/lcommon_cl.c @@ -721,31 +721,12 @@ int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io, void ccc_io_update_iov(const struct lu_env *env, struct ccc_io *cio, struct cl_io *io) { - int i; size_t size = io->u.ci_rw.crw_count; - cio->cui_iov_olen = 0; - if (!cl_is_normalio(env, io) || cio->cui_tot_nrsegs == 0) + if (!cl_is_normalio(env, io) || cio->cui_iter == NULL) return; - for (i = 0; i < cio->cui_tot_nrsegs; i++) { - struct iovec *iv = &cio->cui_iov[i]; - - if (iv->iov_len < size) - size -= iv->iov_len; - else { - if (iv->iov_len > size) { - cio->cui_iov_olen = iv->iov_len; - iv->iov_len = size; - } - break; - } - } - - cio->cui_nrsegs = i + 1; - LASSERTF(cio->cui_tot_nrsegs >= cio->cui_nrsegs, - "tot_nrsegs: %lu, nrsegs: %lu\n", - cio->cui_tot_nrsegs, cio->cui_nrsegs); + iov_iter_truncate(cio->cui_iter, size); } int ccc_io_one_lock(const struct lu_env *env, struct cl_io *io, @@ -776,30 +757,7 @@ void ccc_io_advance(const struct lu_env *env, if (!cl_is_normalio(env, io)) return; - LASSERT(cio->cui_tot_nrsegs >= cio->cui_nrsegs); - LASSERT(cio->cui_tot_count >= nob); - - cio->cui_iov += cio->cui_nrsegs; - cio->cui_tot_nrsegs -= cio->cui_nrsegs; - cio->cui_tot_count -= nob; - - /* update the iov */ - if (cio->cui_iov_olen > 0) { - struct iovec *iv; - - cio->cui_iov--; - cio->cui_tot_nrsegs++; - iv = &cio->cui_iov[0]; - if (io->ci_continue) { - iv->iov_base += iv->iov_len; - LASSERT(cio->cui_iov_olen > iv->iov_len); - iv->iov_len = cio->cui_iov_olen - iv->iov_len; - } else { - /* restore the iov_len, in case of restart io. */ - iv->iov_len = cio->cui_iov_olen; - } - cio->cui_iov_olen = 0; - } + iov_iter_reexpand(cio->cui_iter, cio->cui_tot_count -= nob); } /** diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c index 220bd8390a84..3efda2540d29 100644 --- a/drivers/staging/lustre/lustre/llite/file.c +++ b/drivers/staging/lustre/lustre/llite/file.c @@ -1105,9 +1105,7 @@ restart: switch (vio->cui_io_subtype) { case IO_NORMAL: - cio->cui_iov = args->u.normal.via_iov; - cio->cui_nrsegs = args->u.normal.via_nrsegs; - cio->cui_tot_nrsegs = cio->cui_nrsegs; + cio->cui_iter = args->u.normal.via_iter; cio->cui_iocb = args->u.normal.via_iocb; if ((iot == CIT_WRITE) && !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { @@ -1171,56 +1169,23 @@ out: return result; } -static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct lu_env *env; struct vvp_io_args *args; - size_t count = 0; ssize_t result; int refcheck; - count = iov_length(iov, nr_segs); - env = cl_env_get(&refcheck); if (IS_ERR(env)) return PTR_ERR(env); args = vvp_env_args(env, IO_NORMAL); - args->u.normal.via_iov = (struct iovec *)iov; - args->u.normal.via_nrsegs = nr_segs; + args->u.normal.via_iter = to; args->u.normal.via_iocb = iocb; result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ, - &iocb->ki_pos, count); - cl_env_put(env, &refcheck); - return result; -} - -static ssize_t ll_file_read(struct file *file, char *buf, size_t count, - loff_t *ppos) -{ - struct lu_env *env; - struct iovec *local_iov; - struct kiocb *kiocb; - ssize_t result; - int refcheck; - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - local_iov = &vvp_env_info(env)->vti_local_iov; - kiocb = &vvp_env_info(env)->vti_kiocb; - local_iov->iov_base = (void __user *)buf; - local_iov->iov_len = count; - init_sync_kiocb(kiocb, file); - kiocb->ki_pos = *ppos; - kiocb->ki_nbytes = count; - - result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos); - *ppos = kiocb->ki_pos; - + &iocb->ki_pos, iov_iter_count(to)); cl_env_put(env, &refcheck); return result; } @@ -1228,12 +1193,10 @@ static ssize_t ll_file_read(struct file *file, char *buf, size_t count, /* * Write to a file (through the page cache). */ -static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct lu_env *env; struct vvp_io_args *args; - size_t count = iov_length(iov, nr_segs); ssize_t result; int refcheck; @@ -1242,46 +1205,15 @@ static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov, return PTR_ERR(env); args = vvp_env_args(env, IO_NORMAL); - args->u.normal.via_iov = (struct iovec *)iov; - args->u.normal.via_nrsegs = nr_segs; + args->u.normal.via_iter = from; args->u.normal.via_iocb = iocb; result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE, - &iocb->ki_pos, count); + &iocb->ki_pos, iov_iter_count(from)); cl_env_put(env, &refcheck); return result; } -static ssize_t ll_file_write(struct file *file, const char *buf, size_t count, - loff_t *ppos) -{ - struct lu_env *env; - struct iovec *local_iov; - struct kiocb *kiocb; - ssize_t result; - int refcheck; - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - local_iov = &vvp_env_info(env)->vti_local_iov; - kiocb = &vvp_env_info(env)->vti_kiocb; - local_iov->iov_base = (void __user *)buf; - local_iov->iov_len = count; - init_sync_kiocb(kiocb, file); - kiocb->ki_pos = *ppos; - kiocb->ki_nbytes = count; - - result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos); - *ppos = kiocb->ki_pos; - - cl_env_put(env, &refcheck); - return result; -} - - - /* * Send file content (through pagecache) somewhere with helper */ @@ -3133,10 +3065,10 @@ int ll_inode_permission(struct inode *inode, int mask) /* -o localflock - only provides locally consistent flock locks */ struct file_operations ll_file_operations = { - .read = ll_file_read, - .aio_read = ll_file_aio_read, - .write = ll_file_write, - .aio_write = ll_file_aio_write, + .read = new_sync_read, + .read_iter = ll_file_read_iter, + .write = new_sync_write, + .write_iter = ll_file_write_iter, .unlocked_ioctl = ll_file_ioctl, .open = ll_file_open, .release = ll_file_release, @@ -3148,10 +3080,10 @@ struct file_operations ll_file_operations = { }; struct file_operations ll_file_operations_flock = { - .read = ll_file_read, - .aio_read = ll_file_aio_read, - .write = ll_file_write, - .aio_write = ll_file_aio_write, + .read = new_sync_read, + .read_iter = ll_file_read_iter, + .write = new_sync_write, + .write_iter = ll_file_write_iter, .unlocked_ioctl = ll_file_ioctl, .open = ll_file_open, .release = ll_file_release, @@ -3166,10 +3098,10 @@ struct file_operations ll_file_operations_flock = { /* These are for -o noflock - to return ENOSYS on flock calls */ struct file_operations ll_file_operations_noflock = { - .read = ll_file_read, - .aio_read = ll_file_aio_read, - .write = ll_file_write, - .aio_write = ll_file_aio_write, + .read = new_sync_read, + .read_iter = ll_file_read_iter, + .write = new_sync_write, + .write_iter = ll_file_write_iter, .unlocked_ioctl = ll_file_ioctl, .open = ll_file_open, .release = ll_file_release, diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h index 69aba0afca41..fbb8650ead34 100644 --- a/drivers/staging/lustre/lustre/llite/llite_internal.h +++ b/drivers/staging/lustre/lustre/llite/llite_internal.h @@ -974,8 +974,7 @@ struct vvp_io_args { union { struct { struct kiocb *via_iocb; - struct iovec *via_iov; - unsigned long via_nrsegs; + struct iov_iter *via_iter; } normal; struct { struct pipe_inode_info *via_pipe; diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c index 416f7a094a6d..b345dfa599f3 100644 --- a/drivers/staging/lustre/lustre/llite/rw.c +++ b/drivers/staging/lustre/lustre/llite/rw.c @@ -157,8 +157,7 @@ static struct ll_cl_context *ll_cl_init(struct file *file, result = cl_io_rw_init(env, io, CIT_WRITE, pos, PAGE_CACHE_SIZE); if (result == 0) { cio->cui_fd = LUSTRE_FPRIVATE(file); - cio->cui_iov = NULL; - cio->cui_nrsegs = 0; + cio->cui_iter = NULL; result = cl_io_iter_init(env, io); if (result == 0) { result = cl_io_lock(env, io); diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c index c7d70091246e..cfe8c625ae64 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_io.c +++ b/drivers/staging/lustre/lustre/llite/vvp_io.c @@ -211,27 +211,26 @@ static int vvp_mmap_locks(const struct lu_env *env, struct cl_lock_descr *descr = &cti->cti_descr; ldlm_policy_data_t policy; unsigned long addr; - unsigned long seg; ssize_t count; int result; + struct iov_iter i; + struct iovec iov; LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); if (!cl_is_normalio(env, io)) return 0; - if (vio->cui_iov == NULL) /* nfs or loop back device write */ + if (vio->cui_iter == NULL) /* nfs or loop back device write */ return 0; /* No MM (e.g. NFS)? No vmas too. */ if (mm == NULL) return 0; - for (seg = 0; seg < vio->cui_nrsegs; seg++) { - const struct iovec *iv = &vio->cui_iov[seg]; - - addr = (unsigned long)iv->iov_base; - count = iv->iov_len; + iov_for_each(iov, i, *(vio->cui_iter)) { + addr = (unsigned long)iov.iov_base; + count = iov.iov_len; if (count == 0) continue; @@ -527,9 +526,7 @@ static int vvp_io_read_start(const struct lu_env *env, switch (vio->cui_io_subtype) { case IO_NORMAL: LASSERT(cio->cui_iocb->ki_pos == pos); - result = generic_file_aio_read(cio->cui_iocb, - cio->cui_iov, cio->cui_nrsegs, - cio->cui_iocb->ki_pos); + result = generic_file_read_iter(cio->cui_iocb, cio->cui_iter); break; case IO_SPLICE: result = generic_file_splice_read(file, &pos, @@ -595,12 +592,11 @@ static int vvp_io_write_start(const struct lu_env *env, CDEBUG(D_VFSTRACE, "write: [%lli, %lli)\n", pos, pos + (long long)cnt); - if (cio->cui_iov == NULL) /* from a temp io in ll_cl_init(). */ + if (cio->cui_iter == NULL) /* from a temp io in ll_cl_init(). */ result = 0; else - result = generic_file_aio_write(cio->cui_iocb, - cio->cui_iov, cio->cui_nrsegs, - cio->cui_iocb->ki_pos); + result = generic_file_write_iter(cio->cui_iocb, cio->cui_iter); + if (result > 0) { if (result < cnt) io->ci_continue = 0; @@ -1162,10 +1158,9 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj, * results." -- Single Unix Spec */ if (count == 0) result = 1; - else { + else cio->cui_tot_count = count; - cio->cui_tot_nrsegs = 0; - } + /* for read/write, we store the jobid in the inode, and * it'll be fetched by osc when building RPC. * diff --git a/include/linux/uio.h b/include/linux/uio.h index 66012352d333..e8a109a75de1 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -90,6 +90,15 @@ static inline void iov_iter_truncate(struct iov_iter *i, size_t count) i->count = count; } +/* + * reexpand a previously truncated iterator; count must be no more than how much + * we had shrunk it. + */ +static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) +{ + i->count = count; +} + int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len); int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len); -- cgit v1.2.3-71-gd317 From 6abd232274fd652e4a57f486d14e52ffee6f72e9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Apr 2014 14:20:57 -0400 Subject: bury generic_file_aio_{read,write} no callers left Signed-off-by: Al Viro --- include/linux/fs.h | 2 -- mm/filemap.c | 43 ++++++++----------------------------------- 2 files changed, 8 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 99817c9e665e..a6448849dbce 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2411,10 +2411,8 @@ extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr, unsigned long size, pgoff_t pgoff); int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); -extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); -extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, loff_t); extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); diff --git a/mm/filemap.c b/mm/filemap.c index 7dcdb9db710d..2f724e3cdf24 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1663,6 +1663,14 @@ out: return written ? written : error; } +/** + * generic_file_read_iter - generic filesystem read routine + * @iocb: kernel I/O control block + * @iter: destination for the data read + * + * This is the "read_iter()" routine for all filesystems + * that can use the page cache directly. + */ ssize_t generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { @@ -1713,28 +1721,6 @@ out: } EXPORT_SYMBOL(generic_file_read_iter); -/** - * generic_file_aio_read - generic filesystem read routine - * @iocb: kernel I/O control block - * @iov: io vector request - * @nr_segs: number of segments in the iovec - * @pos: current file position - * - * This is the "read()" routine for all filesystems - * that can use the page cache directly. - */ -ssize_t -generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - size_t count = iov_length(iov, nr_segs); - struct iov_iter i; - - iov_iter_init(&i, READ, iov, nr_segs, count); - return generic_file_read_iter(iocb, &i); -} -EXPORT_SYMBOL(generic_file_aio_read); - #ifdef CONFIG_MMU /** * page_cache_read - adds requested page to the page cache if not already there @@ -2675,19 +2661,6 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) } EXPORT_SYMBOL(generic_file_write_iter); -ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - size_t count = iov_length(iov, nr_segs); - struct iov_iter from; - - BUG_ON(iocb->ki_pos != pos); - - iov_iter_init(&from, WRITE, iov, nr_segs, count); - return generic_file_write_iter(iocb, &from); -} -EXPORT_SYMBOL(generic_file_aio_write); - /** * try_to_release_page() - release old fs-specific metadata on a page * -- cgit v1.2.3-71-gd317 From 62a8067a7f35dba2de501c9cb00e4cf36da90bc0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Apr 2014 23:12:29 -0400 Subject: bio_vec-backed iov_iter New variant of iov_iter - ITER_BVEC in iter->type, backed with bio_vec array instead of iovec one. Primitives taught to deal with such beasts, __swap_write() switched to using that kind of iov_iter. Note that bio_vec is just a triple - there's nothing block-specific about it. I've left the definition where it was, but took it from under ifdef CONFIG_BLOCK. Next target: ->splice_write()... Signed-off-by: Al Viro --- fs/fuse/file.c | 2 +- include/linux/blk_types.h | 4 +- include/linux/uio.h | 14 +- mm/iov_iter.c | 390 ++++++++++++++++++++++++++++++++++++++++++---- mm/page_io.c | 19 ++- 5 files changed, 385 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 7fbc803cf51d..b2dae9d1437c 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1288,7 +1288,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, size_t nbytes = 0; /* # bytes already packed in req */ /* Special case for kernel I/O: can copy directly into the buffer */ - if (ii->type & REQ_KERNEL) { + if (ii->type & ITER_KVEC) { unsigned long user_addr = fuse_get_user_addr(ii); size_t frag_size = fuse_get_frag_size(ii, *nbytesp); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index aa0eaa2d0bd8..86df13b97160 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -5,8 +5,6 @@ #ifndef __LINUX_BLK_TYPES_H #define __LINUX_BLK_TYPES_H -#ifdef CONFIG_BLOCK - #include struct bio_set; @@ -28,6 +26,8 @@ struct bio_vec { unsigned int bv_offset; }; +#ifdef CONFIG_BLOCK + struct bvec_iter { sector_t bi_sector; /* device address in 512 byte sectors */ diff --git a/include/linux/uio.h b/include/linux/uio.h index e8a109a75de1..e2231e47cec1 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -19,12 +19,21 @@ struct kvec { size_t iov_len; }; +enum { + ITER_IOVEC = 0, + ITER_KVEC = 2, + ITER_BVEC = 4, +}; + struct iov_iter { int type; - const struct iovec *iov; - unsigned long nr_segs; size_t iov_offset; size_t count; + union { + const struct iovec *iov; + const struct bio_vec *bvec; + }; + unsigned long nr_segs; }; /* @@ -54,6 +63,7 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter) } #define iov_for_each(iov, iter, start) \ + if (!((start).type & ITER_BVEC)) \ for (iter = (start); \ (iter).count && \ ((iov = iov_iter_iovec(&(iter))), 1); \ diff --git a/mm/iov_iter.c b/mm/iov_iter.c index fcdaaab438b6..7b5dbd1517b5 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c @@ -4,7 +4,7 @@ #include #include -size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, +static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { size_t skip, copy, left, wanted; @@ -84,9 +84,8 @@ done: i->iov_offset = skip; return wanted - bytes; } -EXPORT_SYMBOL(copy_page_to_iter); -size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, +static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { size_t skip, copy, left, wanted; @@ -166,7 +165,6 @@ done: i->iov_offset = skip; return wanted - bytes; } -EXPORT_SYMBOL(copy_page_from_iter); static size_t __iovec_copy_from_user_inatomic(char *vaddr, const struct iovec *iov, size_t base, size_t bytes) @@ -195,7 +193,7 @@ static size_t __iovec_copy_from_user_inatomic(char *vaddr, * were successfully copied. If a fault is encountered then return the number of * bytes which were copied. */ -size_t iov_iter_copy_from_user_atomic(struct page *page, +static size_t copy_from_user_atomic_iovec(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes) { char *kaddr; @@ -215,9 +213,8 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, return copied; } -EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); -void iov_iter_advance(struct iov_iter *i, size_t bytes) +static void advance_iovec(struct iov_iter *i, size_t bytes) { BUG_ON(i->count < bytes); @@ -252,7 +249,6 @@ void iov_iter_advance(struct iov_iter *i, size_t bytes) i->nr_segs = nr_segs; } } -EXPORT_SYMBOL(iov_iter_advance); /* * Fault in the first iovec of the given iov_iter, to a maximum length @@ -265,26 +261,16 @@ EXPORT_SYMBOL(iov_iter_advance); */ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) { - char __user *buf = i->iov->iov_base + i->iov_offset; - bytes = min(bytes, i->iov->iov_len - i->iov_offset); - return fault_in_pages_readable(buf, bytes); + if (!(i->type & ITER_BVEC)) { + char __user *buf = i->iov->iov_base + i->iov_offset; + bytes = min(bytes, i->iov->iov_len - i->iov_offset); + return fault_in_pages_readable(buf, bytes); + } + return 0; } EXPORT_SYMBOL(iov_iter_fault_in_readable); -/* - * Return the count of just the current iov_iter segment. - */ -size_t iov_iter_single_seg_count(const struct iov_iter *i) -{ - const struct iovec *iov = i->iov; - if (i->nr_segs == 1) - return i->count; - else - return min(i->count, iov->iov_len - i->iov_offset); -} -EXPORT_SYMBOL(iov_iter_single_seg_count); - -unsigned long iov_iter_alignment(const struct iov_iter *i) +static unsigned long alignment_iovec(const struct iov_iter *i) { const struct iovec *iov = i->iov; unsigned long res; @@ -307,7 +293,6 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) res |= (unsigned long)iov->iov_base | size; return res; } -EXPORT_SYMBOL(iov_iter_alignment); void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov, unsigned long nr_segs, @@ -315,7 +300,7 @@ void iov_iter_init(struct iov_iter *i, int direction, { /* It will get better. Eventually... */ if (segment_eq(get_fs(), KERNEL_DS)) - direction |= REQ_KERNEL; + direction |= ITER_KVEC; i->type = direction; i->iov = iov; i->nr_segs = nr_segs; @@ -324,7 +309,7 @@ void iov_iter_init(struct iov_iter *i, int direction, } EXPORT_SYMBOL(iov_iter_init); -ssize_t iov_iter_get_pages(struct iov_iter *i, +static ssize_t get_pages_iovec(struct iov_iter *i, struct page **pages, size_t maxsize, size_t *start) { @@ -349,9 +334,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, return res; return (res == n ? len : res * PAGE_SIZE) - *start; } -EXPORT_SYMBOL(iov_iter_get_pages); -ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, +static ssize_t get_pages_alloc_iovec(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start) { @@ -387,9 +371,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, *pages = p; return (res == n ? len : res * PAGE_SIZE) - *start; } -EXPORT_SYMBOL(iov_iter_get_pages_alloc); -int iov_iter_npages(const struct iov_iter *i, int maxpages) +static int iov_iter_npages_iovec(const struct iov_iter *i, int maxpages) { size_t offset = i->iov_offset; size_t size = i->count; @@ -414,4 +397,347 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) } return min(npages, maxpages); } + +static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) +{ + char *from = kmap_atomic(page); + memcpy(to, from + offset, len); + kunmap_atomic(from); +} + +static void memcpy_to_page(struct page *page, size_t offset, char *from, size_t len) +{ + char *to = kmap_atomic(page); + memcpy(to + offset, from, len); + kunmap_atomic(to); +} + +static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + size_t skip, copy, wanted; + const struct bio_vec *bvec; + void *kaddr, *from; + + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + wanted = bytes; + bvec = i->bvec; + skip = i->iov_offset; + copy = min_t(size_t, bytes, bvec->bv_len - skip); + + kaddr = kmap_atomic(page); + from = kaddr + offset; + memcpy_to_page(bvec->bv_page, skip + bvec->bv_offset, from, copy); + skip += copy; + from += copy; + bytes -= copy; + while (bytes) { + bvec++; + copy = min(bytes, (size_t)bvec->bv_len); + memcpy_to_page(bvec->bv_page, bvec->bv_offset, from, copy); + skip = copy; + from += copy; + bytes -= copy; + } + kunmap_atomic(kaddr); + if (skip == bvec->bv_len) { + bvec++; + skip = 0; + } + i->count -= wanted - bytes; + i->nr_segs -= bvec - i->bvec; + i->bvec = bvec; + i->iov_offset = skip; + return wanted - bytes; +} + +static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + size_t skip, copy, wanted; + const struct bio_vec *bvec; + void *kaddr, *to; + + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + wanted = bytes; + bvec = i->bvec; + skip = i->iov_offset; + + kaddr = kmap_atomic(page); + + to = kaddr + offset; + + copy = min(bytes, bvec->bv_len - skip); + + memcpy_from_page(to, bvec->bv_page, bvec->bv_offset + skip, copy); + + to += copy; + skip += copy; + bytes -= copy; + + while (bytes) { + bvec++; + copy = min(bytes, (size_t)bvec->bv_len); + memcpy_from_page(to, bvec->bv_page, bvec->bv_offset, copy); + skip = copy; + to += copy; + bytes -= copy; + } + kunmap_atomic(kaddr); + if (skip == bvec->bv_len) { + bvec++; + skip = 0; + } + i->count -= wanted; + i->nr_segs -= bvec - i->bvec; + i->bvec = bvec; + i->iov_offset = skip; + return wanted; +} + +static size_t copy_from_user_bvec(struct page *page, + struct iov_iter *i, unsigned long offset, size_t bytes) +{ + char *kaddr; + size_t left; + const struct bio_vec *bvec; + size_t base = i->iov_offset; + + kaddr = kmap_atomic(page); + for (left = bytes, bvec = i->bvec; left; bvec++, base = 0) { + size_t copy = min(left, bvec->bv_len - base); + if (!bvec->bv_len) + continue; + memcpy_from_page(kaddr + offset, bvec->bv_page, + bvec->bv_offset + base, copy); + offset += copy; + left -= copy; + } + kunmap_atomic(kaddr); + return bytes; +} + +static void advance_bvec(struct iov_iter *i, size_t bytes) +{ + BUG_ON(i->count < bytes); + + if (likely(i->nr_segs == 1)) { + i->iov_offset += bytes; + i->count -= bytes; + } else { + const struct bio_vec *bvec = i->bvec; + size_t base = i->iov_offset; + unsigned long nr_segs = i->nr_segs; + + /* + * The !iov->iov_len check ensures we skip over unlikely + * zero-length segments (without overruning the iovec). + */ + while (bytes || unlikely(i->count && !bvec->bv_len)) { + int copy; + + copy = min(bytes, bvec->bv_len - base); + BUG_ON(!i->count || i->count < copy); + i->count -= copy; + bytes -= copy; + base += copy; + if (bvec->bv_len == base) { + bvec++; + nr_segs--; + base = 0; + } + } + i->bvec = bvec; + i->iov_offset = base; + i->nr_segs = nr_segs; + } +} + +static unsigned long alignment_bvec(const struct iov_iter *i) +{ + const struct bio_vec *bvec = i->bvec; + unsigned long res; + size_t size = i->count; + size_t n; + + if (!size) + return 0; + + res = bvec->bv_offset + i->iov_offset; + n = bvec->bv_len - i->iov_offset; + if (n >= size) + return res | size; + size -= n; + res |= n; + while (size > (++bvec)->bv_len) { + res |= bvec->bv_offset | bvec->bv_len; + size -= bvec->bv_len; + } + res |= bvec->bv_offset | size; + return res; +} + +static ssize_t get_pages_bvec(struct iov_iter *i, + struct page **pages, size_t maxsize, + size_t *start) +{ + const struct bio_vec *bvec = i->bvec; + size_t len = bvec->bv_len - i->iov_offset; + if (len > i->count) + len = i->count; + if (len > maxsize) + len = maxsize; + *start = bvec->bv_offset + i->iov_offset; + + get_page(*pages = bvec->bv_page); + + return len; +} + +static ssize_t get_pages_alloc_bvec(struct iov_iter *i, + struct page ***pages, size_t maxsize, + size_t *start) +{ + const struct bio_vec *bvec = i->bvec; + size_t len = bvec->bv_len - i->iov_offset; + if (len > i->count) + len = i->count; + if (len > maxsize) + len = maxsize; + *start = bvec->bv_offset + i->iov_offset; + + *pages = kmalloc(sizeof(struct page *), GFP_KERNEL); + if (!*pages) + return -ENOMEM; + + get_page(**pages = bvec->bv_page); + + return len; +} + +static int iov_iter_npages_bvec(const struct iov_iter *i, int maxpages) +{ + size_t offset = i->iov_offset; + size_t size = i->count; + const struct bio_vec *bvec = i->bvec; + int npages = 0; + int n; + + for (n = 0; size && n < i->nr_segs; n++, bvec++) { + size_t len = bvec->bv_len - offset; + offset = 0; + if (unlikely(!len)) /* empty segment */ + continue; + if (len > size) + len = size; + npages++; + if (npages >= maxpages) /* don't bother going further */ + return maxpages; + size -= len; + offset = 0; + } + return min(npages, maxpages); +} + +size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + if (i->type & ITER_BVEC) + return copy_page_to_iter_bvec(page, offset, bytes, i); + else + return copy_page_to_iter_iovec(page, offset, bytes, i); +} +EXPORT_SYMBOL(copy_page_to_iter); + +size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + if (i->type & ITER_BVEC) + return copy_page_from_iter_bvec(page, offset, bytes, i); + else + return copy_page_from_iter_iovec(page, offset, bytes, i); +} +EXPORT_SYMBOL(copy_page_from_iter); + +size_t iov_iter_copy_from_user_atomic(struct page *page, + struct iov_iter *i, unsigned long offset, size_t bytes) +{ + if (i->type & ITER_BVEC) + return copy_from_user_bvec(page, i, offset, bytes); + else + return copy_from_user_atomic_iovec(page, i, offset, bytes); +} +EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); + +void iov_iter_advance(struct iov_iter *i, size_t size) +{ + if (i->type & ITER_BVEC) + advance_bvec(i, size); + else + advance_iovec(i, size); +} +EXPORT_SYMBOL(iov_iter_advance); + +/* + * Return the count of just the current iov_iter segment. + */ +size_t iov_iter_single_seg_count(const struct iov_iter *i) +{ + if (i->nr_segs == 1) + return i->count; + else if (i->type & ITER_BVEC) + return min(i->count, i->iov->iov_len - i->iov_offset); + else + return min(i->count, i->bvec->bv_len - i->iov_offset); +} +EXPORT_SYMBOL(iov_iter_single_seg_count); + +unsigned long iov_iter_alignment(const struct iov_iter *i) +{ + if (i->type & ITER_BVEC) + return alignment_bvec(i); + else + return alignment_iovec(i); +} +EXPORT_SYMBOL(iov_iter_alignment); + +ssize_t iov_iter_get_pages(struct iov_iter *i, + struct page **pages, size_t maxsize, + size_t *start) +{ + if (i->type & ITER_BVEC) + return get_pages_bvec(i, pages, maxsize, start); + else + return get_pages_iovec(i, pages, maxsize, start); +} +EXPORT_SYMBOL(iov_iter_get_pages); + +ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, + struct page ***pages, size_t maxsize, + size_t *start) +{ + if (i->type & ITER_BVEC) + return get_pages_alloc_bvec(i, pages, maxsize, start); + else + return get_pages_alloc_iovec(i, pages, maxsize, start); +} +EXPORT_SYMBOL(iov_iter_get_pages_alloc); + +int iov_iter_npages(const struct iov_iter *i, int maxpages) +{ + if (i->type & ITER_BVEC) + return iov_iter_npages_bvec(i, maxpages); + else + return iov_iter_npages_iovec(i, maxpages); +} EXPORT_SYMBOL(iov_iter_npages); diff --git a/mm/page_io.c b/mm/page_io.c index 313bfedb75d1..33bb38c4aad7 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -259,23 +259,28 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, struct kiocb kiocb; struct file *swap_file = sis->swap_file; struct address_space *mapping = swap_file->f_mapping; - struct iovec iov = { - .iov_base = kmap(page), - .iov_len = PAGE_SIZE, + struct bio_vec bv = { + .bv_page = page, + .bv_len = PAGE_SIZE, + .bv_offset = 0 + }; + struct iov_iter from = { + .type = ITER_BVEC | WRITE, + .count = PAGE_SIZE, + .iov_offset = 0, + .nr_segs = 1, + .bvec = &bv }; - struct iov_iter from; init_sync_kiocb(&kiocb, swap_file); kiocb.ki_pos = page_file_offset(page); kiocb.ki_nbytes = PAGE_SIZE; - iov_iter_init(&from, KERNEL_WRITE, &iov, 1, PAGE_SIZE); set_page_writeback(page); unlock_page(page); - ret = mapping->a_ops->direct_IO(KERNEL_WRITE, + ret = mapping->a_ops->direct_IO(ITER_BVEC | WRITE, &kiocb, &from, kiocb.ki_pos); - kunmap(page); if (ret == PAGE_SIZE) { count_vm_event(PSWPOUT); ret = 0; -- cgit v1.2.3-71-gd317 From 9c69de4c94fcb11db919160d5fa0b48f13d1757a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 May 2014 19:37:13 +0200 Subject: nfsd: remove MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only real user of this header is fs/nfsd/nfsfh.h, so merge the two. Various lockѕ source files used it to indirectly get other sunrpc or nfs headers, so fix those up. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/lockd/clnt4xdr.c | 2 ++ fs/lockd/clntxdr.c | 2 ++ fs/lockd/svcsubs.c | 2 +- fs/lockd/xdr.c | 2 ++ fs/nfsd/nfsd.h | 1 + fs/nfsd/nfsfh.h | 59 ++++++++++++++++++++++++++++++++++++++---- fs/nfsd/state.h | 1 - include/linux/lockd/lockd.h | 2 +- include/linux/nfsd/export.h | 6 ++++- include/linux/nfsd/nfsfh.h | 63 --------------------------------------------- 10 files changed, 68 insertions(+), 72 deletions(-) delete mode 100644 include/linux/nfsd/nfsfh.h (limited to 'include') diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c index 00ec0b9c94d1..d3e40db28930 100644 --- a/fs/lockd/clnt4xdr.c +++ b/fs/lockd/clnt4xdr.c @@ -14,6 +14,8 @@ #include #include +#include + #define NLMDBG_FACILITY NLMDBG_XDR #if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ) diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c index 9a55797a1cd4..3e9f7874b975 100644 --- a/fs/lockd/clntxdr.c +++ b/fs/lockd/clntxdr.c @@ -15,6 +15,8 @@ #include #include +#include + #define NLMDBG_FACILITY NLMDBG_XDR #if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ) diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index dc5c75930f0f..7ec6b1074d8c 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -14,12 +14,12 @@ #include #include #include -#include #include #include #include #include #include +#include #define NLMDBG_FACILITY NLMDBG_SVCSUBS diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 964666c68a86..9340e7e10ef6 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -16,6 +16,8 @@ #include #include +#include + #define NLMDBG_FACILITY NLMDBG_XDR diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 479eb681c27c..7d5c310678d0 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index ad67964d0bb1..2e89e70ac15c 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -1,9 +1,58 @@ -/* Copyright (C) 1995, 1996, 1997 Olaf Kirch */ +/* + * Copyright (C) 1995, 1996, 1997 Olaf Kirch + * + * This file describes the layout of the file handles as passed + * over the wire. + */ +#ifndef _LINUX_NFSD_NFSFH_H +#define _LINUX_NFSD_NFSFH_H + +#include +#include + +static inline __u32 ino_t_to_u32(ino_t ino) +{ + return (__u32) ino; +} + +static inline ino_t u32_to_ino_t(__u32 uino) +{ + return (ino_t) uino; +} -#ifndef _LINUX_NFSD_FH_INT_H -#define _LINUX_NFSD_FH_INT_H +/* + * This is the internal representation of an NFS handle used in knfsd. + * pre_mtime/post_version will be used to support wcc_attr's in NFSv3. + */ +typedef struct svc_fh { + struct knfsd_fh fh_handle; /* FH data */ + struct dentry * fh_dentry; /* validated dentry */ + struct svc_export * fh_export; /* export pointer */ + int fh_maxsize; /* max size for fh_handle */ + + unsigned char fh_locked; /* inode locked by us */ + unsigned char fh_want_write; /* remount protection taken */ + +#ifdef CONFIG_NFSD_V3 + unsigned char fh_post_saved; /* post-op attrs saved */ + unsigned char fh_pre_saved; /* pre-op attrs saved */ + + /* Pre-op attributes saved during fh_lock */ + __u64 fh_pre_size; /* size before operation */ + struct timespec fh_pre_mtime; /* mtime before oper */ + struct timespec fh_pre_ctime; /* ctime before oper */ + /* + * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode) + * to find out if it is valid. + */ + u64 fh_pre_change; + + /* Post-op attributes saved in fh_unlock */ + struct kstat fh_post_attr; /* full attrs after operation */ + u64 fh_post_change; /* nfsv4 change; see above */ +#endif /* CONFIG_NFSD_V3 */ -#include +} svc_fh; enum nfsd_fsid { FSID_DEV = 0, @@ -215,4 +264,4 @@ fh_unlock(struct svc_fh *fhp) } } -#endif /* _LINUX_NFSD_FH_INT_H */ +#endif /* _LINUX_NFSD_NFSFH_H */ diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 424d8f5f2317..5b3bbf24097c 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -37,7 +37,6 @@ #include #include -#include #include "nfsfh.h" typedef struct { diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index dcaad79f54ed..219d79627c05 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -17,13 +17,13 @@ #include #include #include -#include #include #include #ifdef CONFIG_LOCKD_V4 #include #endif #include +#include /* * Version string diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index 7898c997dfea..b12c4e526ef2 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -9,9 +9,13 @@ #ifndef NFSD_EXPORT_H #define NFSD_EXPORT_H -# include +#include #include +struct knfsd_fh; +struct svc_fh; +struct svc_rqst; + /* * FS Locations */ diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h deleted file mode 100644 index a93593f1fa4e..000000000000 --- a/include/linux/nfsd/nfsfh.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * include/linux/nfsd/nfsfh.h - * - * This file describes the layout of the file handles as passed - * over the wire. - * - * Earlier versions of knfsd used to sign file handles using keyed MD5 - * or SHA. I've removed this code, because it doesn't give you more - * security than blocking external access to port 2049 on your firewall. - * - * Copyright (C) 1995, 1996, 1997 Olaf Kirch - */ -#ifndef _LINUX_NFSD_FH_H -#define _LINUX_NFSD_FH_H - -# include -#include - -static inline __u32 ino_t_to_u32(ino_t ino) -{ - return (__u32) ino; -} - -static inline ino_t u32_to_ino_t(__u32 uino) -{ - return (ino_t) uino; -} - -/* - * This is the internal representation of an NFS handle used in knfsd. - * pre_mtime/post_version will be used to support wcc_attr's in NFSv3. - */ -typedef struct svc_fh { - struct knfsd_fh fh_handle; /* FH data */ - struct dentry * fh_dentry; /* validated dentry */ - struct svc_export * fh_export; /* export pointer */ - int fh_maxsize; /* max size for fh_handle */ - - unsigned char fh_locked; /* inode locked by us */ - unsigned char fh_want_write; /* remount protection taken */ - -#ifdef CONFIG_NFSD_V3 - unsigned char fh_post_saved; /* post-op attrs saved */ - unsigned char fh_pre_saved; /* pre-op attrs saved */ - - /* Pre-op attributes saved during fh_lock */ - __u64 fh_pre_size; /* size before operation */ - struct timespec fh_pre_mtime; /* mtime before oper */ - struct timespec fh_pre_ctime; /* ctime before oper */ - /* - * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode) - * to find out if it is valid. - */ - u64 fh_pre_change; - - /* Post-op attributes saved in fh_unlock */ - struct kstat fh_post_attr; /* full attrs after operation */ - u64 fh_post_change; /* nfsv4 change; see above */ -#endif /* CONFIG_NFSD_V3 */ - -} svc_fh; - -#endif /* _LINUX_NFSD_FH_H */ -- cgit v1.2.3-71-gd317 From d430e8d530e900c923bf77718d72478b1c280592 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 May 2014 19:37:14 +0200 Subject: nfsd: move to fs/nfsd There are no legitimate users outside of fs/nfsd, so move it there. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/lockd/svcsubs.c | 1 - fs/nfsd/export.h | 109 ++++++++++++++++++++++++++++++++++++++++++ fs/nfsd/nfsd.h | 3 +- include/linux/nfsd/export.h | 114 -------------------------------------------- 4 files changed, 111 insertions(+), 116 deletions(-) create mode 100644 fs/nfsd/export.h delete mode 100644 include/linux/nfsd/export.h (limited to 'include') diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 7ec6b1074d8c..b6f3b84b6e99 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h new file mode 100644 index 000000000000..d7939a62a0ae --- /dev/null +++ b/fs/nfsd/export.h @@ -0,0 +1,109 @@ +/* + * Copyright (C) 1995-1997 Olaf Kirch + */ +#ifndef NFSD_EXPORT_H +#define NFSD_EXPORT_H + +#include +#include + +struct knfsd_fh; +struct svc_fh; +struct svc_rqst; + +/* + * FS Locations + */ + +#define MAX_FS_LOCATIONS 128 + +struct nfsd4_fs_location { + char *hosts; /* colon separated list of hosts */ + char *path; /* slash separated list of path components */ +}; + +struct nfsd4_fs_locations { + uint32_t locations_count; + struct nfsd4_fs_location *locations; +/* If we're not actually serving this data ourselves (only providing a + * list of replicas that do serve it) then we set "migrated": */ + int migrated; +}; + +/* + * We keep an array of pseudoflavors with the export, in order from most + * to least preferred. For the foreseeable future, we don't expect more + * than the eight pseudoflavors null, unix, krb5, krb5i, krb5p, skpm3, + * spkm3i, and spkm3p (and using all 8 at once should be rare). + */ +#define MAX_SECINFO_LIST 8 + +struct exp_flavor_info { + u32 pseudoflavor; + u32 flags; +}; + +struct svc_export { + struct cache_head h; + struct auth_domain * ex_client; + int ex_flags; + struct path ex_path; + kuid_t ex_anon_uid; + kgid_t ex_anon_gid; + int ex_fsid; + unsigned char * ex_uuid; /* 16 byte fsid */ + struct nfsd4_fs_locations ex_fslocs; + int ex_nflavors; + struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST]; + struct cache_detail *cd; +}; + +/* an "export key" (expkey) maps a filehandlefragement to an + * svc_export for a given client. There can be several per export, + * for the different fsid types. + */ +struct svc_expkey { + struct cache_head h; + + struct auth_domain * ek_client; + int ek_fsidtype; + u32 ek_fsid[6]; + + struct path ek_path; +}; + +#define EX_ISSYNC(exp) (!((exp)->ex_flags & NFSEXP_ASYNC)) +#define EX_NOHIDE(exp) ((exp)->ex_flags & NFSEXP_NOHIDE) +#define EX_WGATHER(exp) ((exp)->ex_flags & NFSEXP_GATHERED_WRITES) + +int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp); +__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp); + +/* + * Function declarations + */ +int nfsd_export_init(struct net *); +void nfsd_export_shutdown(struct net *); +void nfsd_export_flush(struct net *); +struct svc_export * rqst_exp_get_by_name(struct svc_rqst *, + struct path *); +struct svc_export * rqst_exp_parent(struct svc_rqst *, + struct path *); +struct svc_export * rqst_find_fsidzero_export(struct svc_rqst *); +int exp_rootfh(struct net *, struct auth_domain *, + char *path, struct knfsd_fh *, int maxsize); +__be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *); +__be32 nfserrno(int errno); + +static inline void exp_put(struct svc_export *exp) +{ + cache_put(&exp->h, exp->cd); +} + +static inline void exp_get(struct svc_export *exp) +{ + cache_get(&exp->h); +} +struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *); + +#endif /* NFSD_EXPORT_H */ diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 7d5c310678d0..72004caad718 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -19,9 +19,10 @@ #include #include -#include #include +#include "export.h" + /* * nfsd version */ diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h deleted file mode 100644 index b12c4e526ef2..000000000000 --- a/include/linux/nfsd/export.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * include/linux/nfsd/export.h - * - * Public declarations for NFS exports. The definitions for the - * syscall interface are in nfsctl.h - * - * Copyright (C) 1995-1997 Olaf Kirch - */ -#ifndef NFSD_EXPORT_H -#define NFSD_EXPORT_H - -#include -#include - -struct knfsd_fh; -struct svc_fh; -struct svc_rqst; - -/* - * FS Locations - */ - -#define MAX_FS_LOCATIONS 128 - -struct nfsd4_fs_location { - char *hosts; /* colon separated list of hosts */ - char *path; /* slash separated list of path components */ -}; - -struct nfsd4_fs_locations { - uint32_t locations_count; - struct nfsd4_fs_location *locations; -/* If we're not actually serving this data ourselves (only providing a - * list of replicas that do serve it) then we set "migrated": */ - int migrated; -}; - -/* - * We keep an array of pseudoflavors with the export, in order from most - * to least preferred. For the foreseeable future, we don't expect more - * than the eight pseudoflavors null, unix, krb5, krb5i, krb5p, skpm3, - * spkm3i, and spkm3p (and using all 8 at once should be rare). - */ -#define MAX_SECINFO_LIST 8 - -struct exp_flavor_info { - u32 pseudoflavor; - u32 flags; -}; - -struct svc_export { - struct cache_head h; - struct auth_domain * ex_client; - int ex_flags; - struct path ex_path; - kuid_t ex_anon_uid; - kgid_t ex_anon_gid; - int ex_fsid; - unsigned char * ex_uuid; /* 16 byte fsid */ - struct nfsd4_fs_locations ex_fslocs; - int ex_nflavors; - struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST]; - struct cache_detail *cd; -}; - -/* an "export key" (expkey) maps a filehandlefragement to an - * svc_export for a given client. There can be several per export, - * for the different fsid types. - */ -struct svc_expkey { - struct cache_head h; - - struct auth_domain * ek_client; - int ek_fsidtype; - u32 ek_fsid[6]; - - struct path ek_path; -}; - -#define EX_ISSYNC(exp) (!((exp)->ex_flags & NFSEXP_ASYNC)) -#define EX_NOHIDE(exp) ((exp)->ex_flags & NFSEXP_NOHIDE) -#define EX_WGATHER(exp) ((exp)->ex_flags & NFSEXP_GATHERED_WRITES) - -int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp); -__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp); - -/* - * Function declarations - */ -int nfsd_export_init(struct net *); -void nfsd_export_shutdown(struct net *); -void nfsd_export_flush(struct net *); -struct svc_export * rqst_exp_get_by_name(struct svc_rqst *, - struct path *); -struct svc_export * rqst_exp_parent(struct svc_rqst *, - struct path *); -struct svc_export * rqst_find_fsidzero_export(struct svc_rqst *); -int exp_rootfh(struct net *, struct auth_domain *, - char *path, struct knfsd_fh *, int maxsize); -__be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *); -__be32 nfserrno(int errno); - -static inline void exp_put(struct svc_export *exp) -{ - cache_put(&exp->h, exp->cd); -} - -static inline void exp_get(struct svc_export *exp) -{ - cache_get(&exp->h); -} -struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *); - -#endif /* NFSD_EXPORT_H */ -- cgit v1.2.3-71-gd317 From 7f94423e8fcc1e0f3416be76d3da0982f586d565 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 May 2014 19:37:15 +0200 Subject: nfsd: move to fs/nfsd There are no legitimate users outside of fs/nfsd, so move it there. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsd.h | 2 +- fs/nfsd/stats.c | 1 - fs/nfsd/stats.h | 43 +++++++++++++++++++++++++++++++++++++++++++ include/linux/nfsd/stats.h | 45 --------------------------------------------- 4 files changed, 44 insertions(+), 47 deletions(-) create mode 100644 fs/nfsd/stats.h delete mode 100644 include/linux/nfsd/stats.h (limited to 'include') diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 72004caad718..7a07f9c6ee78 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -19,8 +19,8 @@ #include #include -#include +#include "stats.h" #include "export.h" /* diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c index 6d4521feb6e3..cd90878a76aa 100644 --- a/fs/nfsd/stats.c +++ b/fs/nfsd/stats.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include "nfsd.h" diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h new file mode 100644 index 000000000000..a5c944b771c6 --- /dev/null +++ b/fs/nfsd/stats.h @@ -0,0 +1,43 @@ +/* + * Statistics for NFS server. + * + * Copyright (C) 1995, 1996 Olaf Kirch + */ +#ifndef _NFSD_STATS_H +#define _NFSD_STATS_H + +#include + + +struct nfsd_stats { + unsigned int rchits; /* repcache hits */ + unsigned int rcmisses; /* repcache hits */ + unsigned int rcnocache; /* uncached reqs */ + unsigned int fh_stale; /* FH stale error */ + unsigned int fh_lookup; /* dentry cached */ + unsigned int fh_anon; /* anon file dentry returned */ + unsigned int fh_nocache_dir; /* filehandle not found in dcache */ + unsigned int fh_nocache_nondir; /* filehandle not found in dcache */ + unsigned int io_read; /* bytes returned to read requests */ + unsigned int io_write; /* bytes passed in write requests */ + unsigned int th_cnt; /* number of available threads */ + unsigned int th_usage[10]; /* number of ticks during which n perdeciles + * of available threads were in use */ + unsigned int th_fullcnt; /* number of times last free thread was used */ + unsigned int ra_size; /* size of ra cache */ + unsigned int ra_depth[11]; /* number of times ra entry was found that deep + * in the cache (10percentiles). [10] = not found */ +#ifdef CONFIG_NFSD_V4 + unsigned int nfs4_opcount[LAST_NFS4_OP + 1]; /* count of individual nfsv4 operations */ +#endif + +}; + + +extern struct nfsd_stats nfsdstats; +extern struct svc_stat nfsd_svcstats; + +void nfsd_stat_init(void); +void nfsd_stat_shutdown(void); + +#endif /* _NFSD_STATS_H */ diff --git a/include/linux/nfsd/stats.h b/include/linux/nfsd/stats.h deleted file mode 100644 index e75b2544ff12..000000000000 --- a/include/linux/nfsd/stats.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * linux/include/linux/nfsd/stats.h - * - * Statistics for NFS server. - * - * Copyright (C) 1995, 1996 Olaf Kirch - */ -#ifndef LINUX_NFSD_STATS_H -#define LINUX_NFSD_STATS_H - -#include - - -struct nfsd_stats { - unsigned int rchits; /* repcache hits */ - unsigned int rcmisses; /* repcache hits */ - unsigned int rcnocache; /* uncached reqs */ - unsigned int fh_stale; /* FH stale error */ - unsigned int fh_lookup; /* dentry cached */ - unsigned int fh_anon; /* anon file dentry returned */ - unsigned int fh_nocache_dir; /* filehandle not found in dcache */ - unsigned int fh_nocache_nondir; /* filehandle not found in dcache */ - unsigned int io_read; /* bytes returned to read requests */ - unsigned int io_write; /* bytes passed in write requests */ - unsigned int th_cnt; /* number of available threads */ - unsigned int th_usage[10]; /* number of ticks during which n perdeciles - * of available threads were in use */ - unsigned int th_fullcnt; /* number of times last free thread was used */ - unsigned int ra_size; /* size of ra cache */ - unsigned int ra_depth[11]; /* number of times ra entry was found that deep - * in the cache (10percentiles). [10] = not found */ -#ifdef CONFIG_NFSD_V4 - unsigned int nfs4_opcount[LAST_NFS4_OP + 1]; /* count of individual nfsv4 operations */ -#endif - -}; - - -extern struct nfsd_stats nfsdstats; -extern struct svc_stat nfsd_svcstats; - -void nfsd_stat_init(void); -void nfsd_stat_shutdown(void); - -#endif /* LINUX_NFSD_STATS_H */ -- cgit v1.2.3-71-gd317 From 6f226e2ab1b895c8685e868af0a5f797fcaaaf57 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 May 2014 19:37:16 +0200 Subject: nfsd: remove There is almost nothing left it in, just merge it into the only file that includes it. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsd.h | 9 ++++++++- include/linux/nfsd/debug.h | 19 ------------------- 2 files changed, 8 insertions(+), 20 deletions(-) delete mode 100644 include/linux/nfsd/debug.h (limited to 'include') diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 7a07f9c6ee78..e9f2fd42d184 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -18,11 +18,18 @@ #include #include -#include +#include #include "stats.h" #include "export.h" +#undef ifdebug +#ifdef NFSD_DEBUG +# define ifdebug(flag) if (nfsd_debug & NFSDDBG_##flag) +#else +# define ifdebug(flag) if (0) +#endif + /* * nfsd version */ diff --git a/include/linux/nfsd/debug.h b/include/linux/nfsd/debug.h deleted file mode 100644 index 19ef8375b577..000000000000 --- a/include/linux/nfsd/debug.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * linux/include/linux/nfsd/debug.h - * - * Debugging-related stuff for nfsd - * - * Copyright (C) 1995 Olaf Kirch - */ -#ifndef LINUX_NFSD_DEBUG_H -#define LINUX_NFSD_DEBUG_H - -#include - -# undef ifdebug -# ifdef NFSD_DEBUG -# define ifdebug(flag) if (nfsd_debug & NFSDDBG_##flag) -# else -# define ifdebug(flag) if (0) -# endif -#endif /* LINUX_NFSD_DEBUG_H */ -- cgit v1.2.3-71-gd317 From 6403eb1f646a49cc92f25c08f8716f8870a4a865 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 26 Apr 2014 19:59:52 +0800 Subject: f2fs: introduce help macro ADDRS_PER_PAGE() Introduce help macro ADDRS_PER_PAGE() to get the number of address pointers in direct node or inode. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 6 ++---- fs/f2fs/file.c | 5 +---- fs/f2fs/recovery.c | 5 +---- include/linux/f2fs_fs.h | 3 +++ 4 files changed, 7 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0147de7e3973..273fe1631af9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -652,8 +652,7 @@ static int get_data_block(struct inode *inode, sector_t iblock, goto put_out; } - end_offset = IS_INODE(dn.node_page) ? - ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; + end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); bh_result->b_size = (((size_t)1) << blkbits); dn.ofs_in_node++; pgofs++; @@ -675,8 +674,7 @@ get_next: if (dn.data_blkaddr == NEW_ADDR) goto put_out; - end_offset = IS_INODE(dn.node_page) ? - ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; + end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); } if (maxblocks > (bh_result->b_size >> blkbits)) { diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 60e7d5448a1d..bb365c932555 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -288,10 +288,7 @@ int truncate_blocks(struct inode *inode, u64 from) return err; } - if (IS_INODE(dn.node_page)) - count = ADDRS_PER_INODE(F2FS_I(inode)); - else - count = ADDRS_PER_BLOCK; + count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); count -= dn.ofs_in_node; f2fs_bug_on(count < 0); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 9eb6487f383d..be1e3e881725 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -301,10 +301,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, goto out; start = start_bidx_of_node(ofs_of_node(page), fi); - if (IS_INODE(page)) - end = start + ADDRS_PER_INODE(fi); - else - end = start + ADDRS_PER_BLOCK; + end = start + ADDRS_PER_PAGE(page, fi); f2fs_lock_op(sbi); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index df53e1753a76..8c03f71307c6 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -146,6 +146,9 @@ struct f2fs_extent { #define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ #define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ +#define ADDRS_PER_PAGE(page, fi) \ + (IS_INODE(page) ? ADDRS_PER_INODE(fi) : ADDRS_PER_BLOCK) + #define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1) #define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2) #define NODE_IND1_BLOCK (DEF_ADDRS_PER_INODE + 3) -- cgit v1.2.3-71-gd317 From 62aed044ea4bef36ac3f6885611b013b11c9be2d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 6 May 2014 16:46:04 +0800 Subject: f2fs: add a tracepoint for f2fs_write_begin This patch adds a tracepoint for f2fs_write_begin to trace write op of user. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 ++ include/trace/events/f2fs.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) (limited to 'include') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a7bb98f5831a..cde0d69969b9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -900,6 +900,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, struct dnode_of_data dn; int err = 0; + trace_f2fs_write_begin(inode, pos, len, flags); + f2fs_balance_fs(sbi); repeat: err = f2fs_convert_inline_data(inode, pos + len); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 67f38faac589..1e9375aa9340 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -659,6 +659,36 @@ DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_read_bio, TP_CONDITION(bio) ); +TRACE_EVENT(f2fs_write_begin, + + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int flags), + + TP_ARGS(inode, pos, len, flags), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, pos) + __field(unsigned int, len) + __field(unsigned int, flags) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pos = pos; + __entry->len = len; + __entry->flags = flags; + ), + + TP_printk("dev = (%d,%d), ino = %lu, pos = %llu, len = %u, flags = %u", + show_dev_ino(__entry), + (unsigned long long)__entry->pos, + __entry->len, + __entry->flags) +); + DECLARE_EVENT_CLASS(f2fs__page, TP_PROTO(struct page *page, int type), -- cgit v1.2.3-71-gd317 From dfb2bf38bf89e15a65f9996566b2945921388a2f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 6 May 2014 16:47:23 +0800 Subject: f2fs: add a tracepoint for f2fs_write_end This patch adds a tracepoint for f2fs_write_end to trace write op of user. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 ++ include/trace/events/f2fs.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) (limited to 'include') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cde0d69969b9..ea58fb698ac6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -989,6 +989,8 @@ static int f2fs_write_end(struct file *file, { struct inode *inode = page->mapping->host; + trace_f2fs_write_end(inode, pos, len, copied); + SetPageUptodate(page); set_page_dirty(page); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 1e9375aa9340..483804b55742 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -689,6 +689,36 @@ TRACE_EVENT(f2fs_write_begin, __entry->flags) ); +TRACE_EVENT(f2fs_write_end, + + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int copied), + + TP_ARGS(inode, pos, len, copied), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, pos) + __field(unsigned int, len) + __field(unsigned int, copied) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pos = pos; + __entry->len = len; + __entry->copied = copied; + ), + + TP_printk("dev = (%d,%d), ino = %lu, pos = %llu, len = %u, copied = %u", + show_dev_ino(__entry), + (unsigned long long)__entry->pos, + __entry->len, + __entry->copied) +); + DECLARE_EVENT_CLASS(f2fs__page, TP_PROTO(struct page *page, int type), -- cgit v1.2.3-71-gd317 From ecda0de3430455378f1c02523bf3ad71d91d613a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 6 May 2014 16:48:26 +0800 Subject: f2fs: add a tracepoint for f2fs_write_{meta,node,data}_page This patch adds a tracepoint for f2fs_write_{meta,node,data}_page to trace when page is writting out. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 ++ fs/f2fs/data.c | 2 ++ fs/f2fs/node.c | 2 ++ include/trace/events/f2fs.h | 7 +++++++ 4 files changed, 13 insertions(+) (limited to 'include') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 4206f1664be5..c95d62281d7e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -157,6 +157,8 @@ static int f2fs_write_meta_page(struct page *page, struct inode *inode = page->mapping->host; struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + trace_f2fs_writepage(page, META); + if (unlikely(sbi->por_doing)) goto redirty_out; if (wbc->for_reclaim) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ea58fb698ac6..b997d552880e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -788,6 +788,8 @@ static int f2fs_write_data_page(struct page *page, .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, }; + trace_f2fs_writepage(page, DATA); + if (page->index < end_index) goto write; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 059aaf5dda2b..49bdddbcadea 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1199,6 +1199,8 @@ static int f2fs_write_node_page(struct page *page, .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, }; + trace_f2fs_writepage(page, NODE); + if (unlikely(sbi->por_doing)) goto redirty_out; diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 483804b55742..d70991e69e58 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -751,6 +751,13 @@ DECLARE_EVENT_CLASS(f2fs__page, __entry->dirty) ); +DEFINE_EVENT(f2fs__page, f2fs_writepage, + + TP_PROTO(struct page *page, int type), + + TP_ARGS(page, type) +); + DEFINE_EVENT(f2fs__page, f2fs_set_page_dirty, TP_PROTO(struct page *page, int type), -- cgit v1.2.3-71-gd317 From e57484343898094bb8f72a2aa1a50929d27aa027 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 6 May 2014 16:51:24 +0800 Subject: f2fs: add a tracepoint for f2fs_write_{meta,node,data}_pages This patch adds a tracepoint for f2fs_write_{meta,node,data}_pages to trace when pages are fsyncing/flushing. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 ++ fs/f2fs/data.c | 2 ++ fs/f2fs/node.c | 2 ++ include/trace/events/f2fs.h | 64 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 70 insertions(+) (limited to 'include') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index c95d62281d7e..fe968c7bfc90 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -186,6 +186,8 @@ static int f2fs_write_meta_pages(struct address_space *mapping, struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); long diff, written; + trace_f2fs_writepages(mapping->host, wbc, META); + /* collect a number of dirty meta pages and write together */ if (wbc->for_kupdate || get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META)) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b997d552880e..21bfafaafe83 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -860,6 +860,8 @@ static int f2fs_write_data_pages(struct address_space *mapping, int ret; long diff; + trace_f2fs_writepages(mapping->host, wbc, DATA); + /* deal with chardevs and other special file */ if (!mapping->a_ops->writepage) return 0; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 49bdddbcadea..3d60d3d34ed2 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1242,6 +1242,8 @@ static int f2fs_write_node_pages(struct address_space *mapping, struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); long diff; + trace_f2fs_writepages(mapping->host, wbc, NODE); + /* balancing f2fs's metadata in background */ f2fs_balance_fs_bg(sbi); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index d70991e69e58..91b1fcc5ec93 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -772,6 +772,70 @@ DEFINE_EVENT(f2fs__page, f2fs_vm_page_mkwrite, TP_ARGS(page, type) ); +TRACE_EVENT(f2fs_writepages, + + TP_PROTO(struct inode *inode, struct writeback_control *wbc, int type), + + TP_ARGS(inode, wbc, type), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(int, type) + __field(int, dir) + __field(long, nr_to_write) + __field(long, pages_skipped) + __field(loff_t, range_start) + __field(loff_t, range_end) + __field(pgoff_t, writeback_index) + __field(int, sync_mode) + __field(char, for_kupdate) + __field(char, for_background) + __field(char, tagged_writepages) + __field(char, for_reclaim) + __field(char, range_cyclic) + __field(char, for_sync) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->type = type; + __entry->dir = S_ISDIR(inode->i_mode); + __entry->nr_to_write = wbc->nr_to_write; + __entry->pages_skipped = wbc->pages_skipped; + __entry->range_start = wbc->range_start; + __entry->range_end = wbc->range_end; + __entry->writeback_index = inode->i_mapping->writeback_index; + __entry->sync_mode = wbc->sync_mode; + __entry->for_kupdate = wbc->for_kupdate; + __entry->for_background = wbc->for_background; + __entry->tagged_writepages = wbc->tagged_writepages; + __entry->for_reclaim = wbc->for_reclaim; + __entry->range_cyclic = wbc->range_cyclic; + __entry->for_sync = wbc->for_sync; + ), + + TP_printk("dev = (%d,%d), ino = %lu, %s, %s, nr_to_write %ld, " + "skipped %ld, start %lld, end %lld, wb_idx %lu, sync_mode %d, " + "kupdate %u background %u tagged %u reclaim %u cyclic %u sync %u", + show_dev_ino(__entry), + show_block_type(__entry->type), + show_file_type(__entry->dir), + __entry->nr_to_write, + __entry->pages_skipped, + __entry->range_start, + __entry->range_end, + (unsigned long)__entry->writeback_index, + __entry->sync_mode, + __entry->for_kupdate, + __entry->for_background, + __entry->tagged_writepages, + __entry->for_reclaim, + __entry->range_cyclic, + __entry->for_sync) +); + TRACE_EVENT(f2fs_submit_page_mbio, TP_PROTO(struct page *page, int rw, int type, block_t blk_addr), -- cgit v1.2.3-71-gd317 From c20e89cde669799eff62bf8c00ca9a4819c4e11f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 6 May 2014 16:53:08 +0800 Subject: f2fs: add a tracepoint for f2fs_read_data_page This patch adds a tracepoint for f2fs_read_data_page to trace when page is readed by user. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 ++ include/trace/events/f2fs.h | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 21bfafaafe83..8c250a5d6f26 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -713,6 +713,8 @@ static int f2fs_read_data_page(struct file *file, struct page *page) struct inode *inode = page->mapping->host; int ret; + trace_f2fs_readpage(page, DATA); + /* If the file has inline data, try to read it directlly */ if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 91b1fcc5ec93..b983990b4a9f 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -732,6 +732,7 @@ DECLARE_EVENT_CLASS(f2fs__page, __field(int, dir) __field(pgoff_t, index) __field(int, dirty) + __field(int, uptodate) ), TP_fast_assign( @@ -741,14 +742,17 @@ DECLARE_EVENT_CLASS(f2fs__page, __entry->dir = S_ISDIR(page->mapping->host->i_mode); __entry->index = page->index; __entry->dirty = PageDirty(page); + __entry->uptodate = PageUptodate(page); ), - TP_printk("dev = (%d,%d), ino = %lu, %s, %s, index = %lu, dirty = %d", + TP_printk("dev = (%d,%d), ino = %lu, %s, %s, index = %lu, " + "dirty = %d, uptodate = %d", show_dev_ino(__entry), show_block_type(__entry->type), show_file_type(__entry->dir), (unsigned long)__entry->index, - __entry->dirty) + __entry->dirty, + __entry->uptodate) ); DEFINE_EVENT(f2fs__page, f2fs_writepage, @@ -758,6 +762,13 @@ DEFINE_EVENT(f2fs__page, f2fs_writepage, TP_ARGS(page, type) ); +DEFINE_EVENT(f2fs__page, f2fs_readpage, + + TP_PROTO(struct page *page, int type), + + TP_ARGS(page, type) +); + DEFINE_EVENT(f2fs__page, f2fs_set_page_dirty, TP_PROTO(struct page *page, int type), -- cgit v1.2.3-71-gd317 From 257462dbf3ed233de0dc2e489dcc58579535b33f Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 27 Feb 2014 14:53:34 +0900 Subject: pwm-backlight: switch to gpiod interface Switch to the new gpiod interface, which allows to handle GPIO properties such as active low transparently and removes a whole bunch of code. There are still a couple of users of this driver that rely on passing the enable GPIO number through platform data, so a fallback mechanism using a GPIO number is still available to avoid breaking them. It will be removed once current users have switched to the GPIO lookup tables provided by the gpiod interface. Signed-off-by: Alexandre Courbot Signed-off-by: Thierry Reding --- drivers/video/backlight/pwm_bl.c | 69 +++++++++++++++++----------------------- include/linux/pwm_backlight.h | 5 +-- 2 files changed, 30 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c index fa7f5c35b7fb..cc49347bbcc0 100644 --- a/drivers/video/backlight/pwm_bl.c +++ b/drivers/video/backlight/pwm_bl.c @@ -10,8 +10,8 @@ * published by the Free Software Foundation. */ +#include #include -#include #include #include #include @@ -32,8 +32,7 @@ struct pwm_bl_data { unsigned int *levels; bool enabled; struct regulator *power_supply; - int enable_gpio; - unsigned long enable_gpio_flags; + struct gpio_desc *enable_gpio; unsigned int scale; int (*notify)(struct device *, int brightness); @@ -54,12 +53,8 @@ static void pwm_backlight_power_on(struct pwm_bl_data *pb, int brightness) if (err < 0) dev_err(pb->dev, "failed to enable power supply\n"); - if (gpio_is_valid(pb->enable_gpio)) { - if (pb->enable_gpio_flags & PWM_BACKLIGHT_GPIO_ACTIVE_LOW) - gpio_set_value(pb->enable_gpio, 0); - else - gpio_set_value(pb->enable_gpio, 1); - } + if (pb->enable_gpio) + gpiod_set_value(pb->enable_gpio, 1); pwm_enable(pb->pwm); pb->enabled = true; @@ -73,12 +68,8 @@ static void pwm_backlight_power_off(struct pwm_bl_data *pb) pwm_config(pb->pwm, 0, pb->period); pwm_disable(pb->pwm); - if (gpio_is_valid(pb->enable_gpio)) { - if (pb->enable_gpio_flags & PWM_BACKLIGHT_GPIO_ACTIVE_LOW) - gpio_set_value(pb->enable_gpio, 1); - else - gpio_set_value(pb->enable_gpio, 0); - } + if (pb->enable_gpio) + gpiod_set_value(pb->enable_gpio, 0); regulator_disable(pb->power_supply); pb->enabled = false; @@ -148,7 +139,6 @@ static int pwm_backlight_parse_dt(struct device *dev, struct platform_pwm_backlight_data *data) { struct device_node *node = dev->of_node; - enum of_gpio_flags flags; struct property *prop; int length; u32 value; @@ -189,14 +179,6 @@ static int pwm_backlight_parse_dt(struct device *dev, data->max_brightness--; } - data->enable_gpio = of_get_named_gpio_flags(node, "enable-gpios", 0, - &flags); - if (data->enable_gpio == -EPROBE_DEFER) - return -EPROBE_DEFER; - - if (gpio_is_valid(data->enable_gpio) && (flags & OF_GPIO_ACTIVE_LOW)) - data->enable_gpio_flags |= PWM_BACKLIGHT_GPIO_ACTIVE_LOW; - return 0; } @@ -256,8 +238,6 @@ static int pwm_backlight_probe(struct platform_device *pdev) } else pb->scale = data->max_brightness; - pb->enable_gpio = data->enable_gpio; - pb->enable_gpio_flags = data->enable_gpio_flags; pb->notify = data->notify; pb->notify_after = data->notify_after; pb->check_fb = data->check_fb; @@ -265,26 +245,38 @@ static int pwm_backlight_probe(struct platform_device *pdev) pb->dev = &pdev->dev; pb->enabled = false; - if (gpio_is_valid(pb->enable_gpio)) { - unsigned long flags; - - if (pb->enable_gpio_flags & PWM_BACKLIGHT_GPIO_ACTIVE_LOW) - flags = GPIOF_OUT_INIT_HIGH; + pb->enable_gpio = devm_gpiod_get(&pdev->dev, "enable"); + if (IS_ERR(pb->enable_gpio)) { + ret = PTR_ERR(pb->enable_gpio); + if (ret == -ENOENT) + pb->enable_gpio = NULL; else - flags = GPIOF_OUT_INIT_LOW; + goto err_alloc; + } - ret = gpio_request_one(pb->enable_gpio, flags, "enable"); + /* + * Compatibility fallback for drivers still using the integer GPIO + * platform data. Must go away soon. + */ + if (!pb->enable_gpio && gpio_is_valid(data->enable_gpio)) { + ret = devm_gpio_request_one(&pdev->dev, data->enable_gpio, + GPIOF_OUT_INIT_HIGH, "enable"); if (ret < 0) { dev_err(&pdev->dev, "failed to request GPIO#%d: %d\n", - pb->enable_gpio, ret); + data->enable_gpio, ret); goto err_alloc; } + + pb->enable_gpio = gpio_to_desc(data->enable_gpio); } + if (pb->enable_gpio) + gpiod_direction_output(pb->enable_gpio, 1); + pb->power_supply = devm_regulator_get(&pdev->dev, "power"); if (IS_ERR(pb->power_supply)) { ret = PTR_ERR(pb->power_supply); - goto err_gpio; + goto err_alloc; } pb->pwm = devm_pwm_get(&pdev->dev, NULL); @@ -295,7 +287,7 @@ static int pwm_backlight_probe(struct platform_device *pdev) if (IS_ERR(pb->pwm)) { dev_err(&pdev->dev, "unable to request legacy PWM\n"); ret = PTR_ERR(pb->pwm); - goto err_gpio; + goto err_alloc; } } @@ -320,7 +312,7 @@ static int pwm_backlight_probe(struct platform_device *pdev) if (IS_ERR(bl)) { dev_err(&pdev->dev, "failed to register backlight\n"); ret = PTR_ERR(bl); - goto err_gpio; + goto err_alloc; } if (data->dft_brightness > data->max_brightness) { @@ -336,9 +328,6 @@ static int pwm_backlight_probe(struct platform_device *pdev) platform_set_drvdata(pdev, bl); return 0; -err_gpio: - if (gpio_is_valid(pb->enable_gpio)) - gpio_free(pb->enable_gpio); err_alloc: if (data->exit) data->exit(&pdev->dev); diff --git a/include/linux/pwm_backlight.h b/include/linux/pwm_backlight.h index 2de2e275b2cb..efdd9227a49c 100644 --- a/include/linux/pwm_backlight.h +++ b/include/linux/pwm_backlight.h @@ -6,9 +6,6 @@ #include -/* TODO: convert to gpiod_*() API once it has been merged */ -#define PWM_BACKLIGHT_GPIO_ACTIVE_LOW (1 << 0) - struct platform_pwm_backlight_data { int pwm_id; unsigned int max_brightness; @@ -16,8 +13,8 @@ struct platform_pwm_backlight_data { unsigned int lth_brightness; unsigned int pwm_period_ns; unsigned int *levels; + /* TODO remove once all users are switched to gpiod_* API */ int enable_gpio; - unsigned long enable_gpio_flags; int (*init)(struct device *dev); int (*notify)(struct device *dev, int brightness); void (*notify_after)(struct device *dev, int brightness); -- cgit v1.2.3-71-gd317 From 2b53f41fa8604845f4f7c538723694a453088b15 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 7 May 2014 09:21:56 -0400 Subject: cgroup: remove unused CGRP_SANE_BEHAVIOR This cgroup flag has never been used. Only CGRP_ROOT_SANE_BEHAVIOR is used. Remove it. Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 2dfabb3b749a..f482f95c2c72 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -140,8 +140,6 @@ enum { * specified at mount time and thus is implemented here. */ CGRP_CPUSET_CLONE_CHILDREN, - /* see the comment above CGRP_ROOT_SANE_BEHAVIOR for details */ - CGRP_SANE_BEHAVIOR, }; struct cgroup { -- cgit v1.2.3-71-gd317 From 7c65bbc7dcface00b295bbd18bce82fe1db3d633 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 6 May 2014 09:26:30 -0400 Subject: tracing: Add trace__enabled() function There are some code paths in the kernel that need to do some preparations before it calls a tracepoint. As that code is worthless overhead when the tracepoint is not enabled, it would be prudent to have that code only run when the tracepoint is active. To accomplish this, all tracepoints now get a static inline function called "trace__enabled()" which returns true when the tracepoint is enabled and false otherwise. As an added bonus, that function uses the static_key of the tracepoint such that no branch is needed. if (trace_mytracepoint_enabled()) { arg = process_tp_arg(); trace_mytracepoint(arg); } Will keep the "process_tp_arg()" (which may be expensive to run) from being executed when the tracepoint isn't enabled. It's best to encapsulate the tracepoint itself in the if statement just to keep races. For example, if you had: if (trace_mytracepoint_enabled()) arg = process_tp_arg(); trace_mytracepoint(arg); There's a chance that the tracepoint could be enabled just after the if statement, and arg will be undefined when calling the tracepoint. Link: http://lkml.kernel.org/r/20140506094407.507b6435@gandalf.local.home Acked-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- Documentation/trace/tracepoints.txt | 24 ++++++++++++++++++++++++ include/linux/tracepoint.h | 10 ++++++++++ 2 files changed, 34 insertions(+) (limited to 'include') diff --git a/Documentation/trace/tracepoints.txt b/Documentation/trace/tracepoints.txt index 6b018b53177a..a3efac621c5a 100644 --- a/Documentation/trace/tracepoints.txt +++ b/Documentation/trace/tracepoints.txt @@ -115,6 +115,30 @@ If the tracepoint has to be used in kernel modules, an EXPORT_TRACEPOINT_SYMBOL_GPL() or EXPORT_TRACEPOINT_SYMBOL() can be used to export the defined tracepoints. +If you need to do a bit of work for a tracepoint parameter, and +that work is only used for the tracepoint, that work can be encapsulated +within an if statement with the following: + + if (trace_foo_bar_enabled()) { + int i; + int tot = 0; + + for (i = 0; i < count; i++) + tot += calculate_nuggets(); + + trace_foo_bar(tot); + } + +All trace_() calls have a matching trace__enabled() +function defined that returns true if the tracepoint is enabled and +false otherwise. The trace_() should always be within the +block of the if (trace__enabled()) to prevent races between +the tracepoint being enabled and the check being seen. + +The advantage of using the trace__enabled() is that it uses +the static_key of the tracepoint to allow the if statement to be implemented +with jump labels and avoid conditional branches. + Note: The convenience macro TRACE_EVENT provides an alternative way to define tracepoints. Check http://lwn.net/Articles/379903, http://lwn.net/Articles/381064 and http://lwn.net/Articles/383362 diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 9d30ee469c2a..2e2a5f7717e5 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -185,6 +185,11 @@ extern void syscall_unregfunc(void); static inline void \ check_trace_callback_type_##name(void (*cb)(data_proto)) \ { \ + } \ + static inline bool \ + trace_##name##_enabled(void) \ + { \ + return static_key_false(&__tracepoint_##name.key); \ } /* @@ -230,6 +235,11 @@ extern void syscall_unregfunc(void); } \ static inline void check_trace_callback_type_##name(void (*cb)(data_proto)) \ { \ + } \ + static inline bool \ + trace_##name##_enabled(void) \ + { \ + return false; \ } #define DEFINE_TRACE_FN(name, reg, unreg) -- cgit v1.2.3-71-gd317 From 69902c718c0b476e94ed7fccd3cf29ca39fe433a Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 1 May 2014 10:56:44 +0530 Subject: kprobes: Ensure blacklist data is aligned ARC Linux (not supporting native unaligned access) was failing to boot because __start_kprobe_blacklist was not aligned. This was because per generated vmlinux.lds it was emitted right next to .rodata with strings etc hence could be randomly unaligned. Fix that by ensuring a word alignment. While 4 would suffice for 32bit arches and problem at hand, it is probably better to put 8. | Path: (null) CPU: 0 PID: 1 Comm: swapper Not tainted | 3.15.0-rc3-next-20140430 #2 | task: 8f044000 ti: 8f01e000 task.ti: 8f01e000 | | [ECR ]: 0x00230400 => Misaligned r/w from 0x800fb0d3 | [EFA ]: 0x800fb0d3 | [BLINK ]: do_one_initcall+0x86/0x1bc | [ERET ]: init_kprobes+0x52/0x120 Signed-off-by: Vineet Gupta Cc: Cc: Cc: Cc: Cc: Cc: Cc: Cc: Cc: Cc: Cc: Cc: Cc: Cc: anton Kolesov Link: http://lkml.kernel.org/r/5361DB14.7010406@synopsys.com Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 40ceb3ceba79..8e0204a68c74 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -110,7 +110,8 @@ #endif #ifdef CONFIG_KPROBES -#define KPROBE_BLACKLIST() VMLINUX_SYMBOL(__start_kprobe_blacklist) = .; \ +#define KPROBE_BLACKLIST() . = ALIGN(8); \ + VMLINUX_SYMBOL(__start_kprobe_blacklist) = .; \ *(_kprobe_blacklist) \ VMLINUX_SYMBOL(__stop_kprobe_blacklist) = .; #else -- cgit v1.2.3-71-gd317 From d28071d102f232d92e52af06d242d041074b54b6 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sun, 4 May 2014 20:55:39 -0400 Subject: tunnel: fix RFC number in comment for INET_ECN_decapsulate() The quoted text and figure are from RFC 6040 ("Tunnelling of Explicit Congestion Notification"). Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/inet_ecn.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index 3bd22795c3e2..84b20835b736 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -150,7 +150,7 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb) } /* - * RFC 6080 4.2 + * RFC 6040 4.2 * To decapsulate the inner header at the tunnel egress, a compliant * tunnel egress MUST set the outgoing ECN field to the codepoint at the * intersection of the appropriate arriving inner header (row) and outer -- cgit v1.2.3-71-gd317 From 698365fa1874aa7635d51667a34a2842228e9837 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 5 May 2014 15:55:55 -0700 Subject: net: clean up snmp stats code commit 8f0ea0fe3a036a47767f9c80e (snmp: reduce percpu needs by 50%) reduced snmp array size to 1, so technically it doesn't have to be an array any more. What's more, after the following commit: commit 933393f58fef9963eac61db8093689544e29a600 Date: Thu Dec 22 11:58:51 2011 -0600 percpu: Remove irqsafe_cpu_xxx variants We simply say that regular this_cpu use must be safe regardless of preemption and interrupt state. That has no material change for x86 and s390 implementations of this_cpu operations. However, arches that do not provide their own implementation for this_cpu operations will now get code generated that disables interrupts instead of preemption. probably no arch wants to have SNMP_ARRAY_SZ == 2. At least after almost 3 years, no one complains. So, just convert the array to a single pointer and remove snmp_mib_init() and snmp_mib_free() as well. Cc: Christoph Lameter Cc: Eric Dumazet Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/ip.h | 18 ++-------- include/net/snmp.h | 32 ++++++++--------- net/dccp/proto.c | 9 ++--- net/ipv4/af_inet.c | 93 ++++++++++++++++-------------------------------- net/ipv4/proc.c | 24 ++++++------- net/ipv6/addrconf.c | 17 ++++----- net/ipv6/addrconf_core.c | 2 +- net/ipv6/af_inet6.c | 42 +++++++++------------- net/ipv6/proc.c | 6 ++-- net/sctp/protocol.c | 9 ++--- net/xfrm/xfrm_policy.c | 10 +++--- net/xfrm/xfrm_proc.c | 3 +- 12 files changed, 103 insertions(+), 162 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 1988cefdbb70..16146b667ddb 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -196,27 +196,15 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, #define NET_ADD_STATS_BH(net, field, adnd) SNMP_ADD_STATS_BH((net)->mib.net_statistics, field, adnd) #define NET_ADD_STATS_USER(net, field, adnd) SNMP_ADD_STATS_USER((net)->mib.net_statistics, field, adnd) -unsigned long snmp_fold_field(void __percpu *mib[], int offt); +unsigned long snmp_fold_field(void __percpu *mib, int offt); #if BITS_PER_LONG==32 -u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t sync_off); +u64 snmp_fold_field64(void __percpu *mib, int offt, size_t sync_off); #else -static inline u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_off) +static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_off) { return snmp_fold_field(mib, offt); } #endif -int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align); - -static inline void snmp_mib_free(void __percpu *ptr[SNMP_ARRAY_SZ]) -{ - int i; - - BUG_ON(ptr == NULL); - for (i = 0; i < SNMP_ARRAY_SZ; i++) { - free_percpu(ptr[i]); - ptr[i] = NULL; - } -} void inet_get_local_port_range(struct net *net, int *low, int *high); diff --git a/include/net/snmp.h b/include/net/snmp.h index 71596261fa99..f1f27fdbb0d5 100644 --- a/include/net/snmp.h +++ b/include/net/snmp.h @@ -116,51 +116,49 @@ struct linux_xfrm_mib { unsigned long mibs[LINUX_MIB_XFRMMAX]; }; -#define SNMP_ARRAY_SZ 1 - #define DEFINE_SNMP_STAT(type, name) \ - __typeof__(type) __percpu *name[SNMP_ARRAY_SZ] + __typeof__(type) __percpu *name #define DEFINE_SNMP_STAT_ATOMIC(type, name) \ __typeof__(type) *name #define DECLARE_SNMP_STAT(type, name) \ - extern __typeof__(type) __percpu *name[SNMP_ARRAY_SZ] + extern __typeof__(type) __percpu *name #define SNMP_INC_STATS_BH(mib, field) \ - __this_cpu_inc(mib[0]->mibs[field]) + __this_cpu_inc(mib->mibs[field]) #define SNMP_INC_STATS_USER(mib, field) \ - this_cpu_inc(mib[0]->mibs[field]) + this_cpu_inc(mib->mibs[field]) #define SNMP_INC_STATS_ATOMIC_LONG(mib, field) \ atomic_long_inc(&mib->mibs[field]) #define SNMP_INC_STATS(mib, field) \ - this_cpu_inc(mib[0]->mibs[field]) + this_cpu_inc(mib->mibs[field]) #define SNMP_DEC_STATS(mib, field) \ - this_cpu_dec(mib[0]->mibs[field]) + this_cpu_dec(mib->mibs[field]) #define SNMP_ADD_STATS_BH(mib, field, addend) \ - __this_cpu_add(mib[0]->mibs[field], addend) + __this_cpu_add(mib->mibs[field], addend) #define SNMP_ADD_STATS_USER(mib, field, addend) \ - this_cpu_add(mib[0]->mibs[field], addend) + this_cpu_add(mib->mibs[field], addend) #define SNMP_ADD_STATS(mib, field, addend) \ - this_cpu_add(mib[0]->mibs[field], addend) + this_cpu_add(mib->mibs[field], addend) /* - * Use "__typeof__(*mib[0]) *ptr" instead of "__typeof__(mib[0]) ptr" + * Use "__typeof__(*mib) *ptr" instead of "__typeof__(mib) ptr" * to make @ptr a non-percpu pointer. */ #define SNMP_UPD_PO_STATS(mib, basefield, addend) \ do { \ - __typeof__(*mib[0]->mibs) *ptr = mib[0]->mibs; \ + __typeof__(*mib->mibs) *ptr = mib->mibs; \ this_cpu_inc(ptr[basefield##PKTS]); \ this_cpu_add(ptr[basefield##OCTETS], addend); \ } while (0) #define SNMP_UPD_PO_STATS_BH(mib, basefield, addend) \ do { \ - __typeof__(*mib[0]->mibs) *ptr = mib[0]->mibs; \ + __typeof__(*mib->mibs) *ptr = mib->mibs; \ __this_cpu_inc(ptr[basefield##PKTS]); \ __this_cpu_add(ptr[basefield##OCTETS], addend); \ } while (0) @@ -170,7 +168,7 @@ struct linux_xfrm_mib { #define SNMP_ADD_STATS64_BH(mib, field, addend) \ do { \ - __typeof__(*mib[0]) *ptr = __this_cpu_ptr((mib)[0]); \ + __typeof__(*mib) *ptr = __this_cpu_ptr(mib); \ u64_stats_update_begin(&ptr->syncp); \ ptr->mibs[field] += addend; \ u64_stats_update_end(&ptr->syncp); \ @@ -191,8 +189,8 @@ struct linux_xfrm_mib { #define SNMP_INC_STATS64(mib, field) SNMP_ADD_STATS64(mib, field, 1) #define SNMP_UPD_PO_STATS64_BH(mib, basefield, addend) \ do { \ - __typeof__(*mib[0]) *ptr; \ - ptr = __this_cpu_ptr((mib)[0]); \ + __typeof__(*mib) *ptr; \ + ptr = __this_cpu_ptr(mib); \ u64_stats_update_begin(&ptr->syncp); \ ptr->mibs[basefield##PKTS]++; \ ptr->mibs[basefield##OCTETS] += addend; \ diff --git a/net/dccp/proto.c b/net/dccp/proto.c index eb892b4f4814..de2c1e719305 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1084,14 +1084,15 @@ EXPORT_SYMBOL_GPL(dccp_shutdown); static inline int dccp_mib_init(void) { - return snmp_mib_init((void __percpu **)dccp_statistics, - sizeof(struct dccp_mib), - __alignof__(struct dccp_mib)); + dccp_statistics = alloc_percpu(struct dccp_mib); + if (!dccp_statistics) + return -ENOMEM; + return 0; } static inline void dccp_mib_exit(void) { - snmp_mib_free((void __percpu **)dccp_statistics); + free_percpu(dccp_statistics); } static int thash_entries; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8c54870db792..6765d91b8e75 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1476,22 +1476,20 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, } EXPORT_SYMBOL_GPL(inet_ctl_sock_create); -unsigned long snmp_fold_field(void __percpu *mib[], int offt) +unsigned long snmp_fold_field(void __percpu *mib, int offt) { unsigned long res = 0; - int i, j; + int i; - for_each_possible_cpu(i) { - for (j = 0; j < SNMP_ARRAY_SZ; j++) - res += *(((unsigned long *) per_cpu_ptr(mib[j], i)) + offt); - } + for_each_possible_cpu(i) + res += *(((unsigned long *) per_cpu_ptr(mib, i)) + offt); return res; } EXPORT_SYMBOL_GPL(snmp_fold_field); #if BITS_PER_LONG==32 -u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset) +u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset) { u64 res = 0; int cpu; @@ -1502,7 +1500,7 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset) u64 v; unsigned int start; - bhptr = per_cpu_ptr(mib[0], cpu); + bhptr = per_cpu_ptr(mib, cpu); syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); do { start = u64_stats_fetch_begin_irq(syncp); @@ -1516,25 +1514,6 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset) EXPORT_SYMBOL_GPL(snmp_fold_field64); #endif -int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align) -{ - BUG_ON(ptr == NULL); - ptr[0] = __alloc_percpu(mibsize, align); - if (!ptr[0]) - return -ENOMEM; - -#if SNMP_ARRAY_SZ == 2 - ptr[1] = __alloc_percpu(mibsize, align); - if (!ptr[1]) { - free_percpu(ptr[0]); - ptr[0] = NULL; - return -ENOMEM; - } -#endif - return 0; -} -EXPORT_SYMBOL_GPL(snmp_mib_init); - #ifdef CONFIG_IP_MULTICAST static const struct net_protocol igmp_protocol = { .handler = igmp_rcv, @@ -1570,40 +1549,30 @@ static __net_init int ipv4_mib_init_net(struct net *net) { int i; - if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics, - sizeof(struct tcp_mib), - __alignof__(struct tcp_mib)) < 0) + net->mib.tcp_statistics = alloc_percpu(struct tcp_mib); + if (!net->mib.tcp_statistics) goto err_tcp_mib; - if (snmp_mib_init((void __percpu **)net->mib.ip_statistics, - sizeof(struct ipstats_mib), - __alignof__(struct ipstats_mib)) < 0) + net->mib.ip_statistics = alloc_percpu(struct ipstats_mib); + if (!net->mib.ip_statistics) goto err_ip_mib; for_each_possible_cpu(i) { struct ipstats_mib *af_inet_stats; - af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[0], i); + af_inet_stats = per_cpu_ptr(net->mib.ip_statistics, i); u64_stats_init(&af_inet_stats->syncp); -#if SNMP_ARRAY_SZ == 2 - af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[1], i); - u64_stats_init(&af_inet_stats->syncp); -#endif } - if (snmp_mib_init((void __percpu **)net->mib.net_statistics, - sizeof(struct linux_mib), - __alignof__(struct linux_mib)) < 0) + net->mib.net_statistics = alloc_percpu(struct linux_mib); + if (!net->mib.net_statistics) goto err_net_mib; - if (snmp_mib_init((void __percpu **)net->mib.udp_statistics, - sizeof(struct udp_mib), - __alignof__(struct udp_mib)) < 0) + net->mib.udp_statistics = alloc_percpu(struct udp_mib); + if (!net->mib.udp_statistics) goto err_udp_mib; - if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics, - sizeof(struct udp_mib), - __alignof__(struct udp_mib)) < 0) + net->mib.udplite_statistics = alloc_percpu(struct udp_mib); + if (!net->mib.udplite_statistics) goto err_udplite_mib; - if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics, - sizeof(struct icmp_mib), - __alignof__(struct icmp_mib)) < 0) + net->mib.icmp_statistics = alloc_percpu(struct icmp_mib); + if (!net->mib.icmp_statistics) goto err_icmp_mib; net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib), GFP_KERNEL); @@ -1614,17 +1583,17 @@ static __net_init int ipv4_mib_init_net(struct net *net) return 0; err_icmpmsg_mib: - snmp_mib_free((void __percpu **)net->mib.icmp_statistics); + free_percpu(net->mib.icmp_statistics); err_icmp_mib: - snmp_mib_free((void __percpu **)net->mib.udplite_statistics); + free_percpu(net->mib.udplite_statistics); err_udplite_mib: - snmp_mib_free((void __percpu **)net->mib.udp_statistics); + free_percpu(net->mib.udp_statistics); err_udp_mib: - snmp_mib_free((void __percpu **)net->mib.net_statistics); + free_percpu(net->mib.net_statistics); err_net_mib: - snmp_mib_free((void __percpu **)net->mib.ip_statistics); + free_percpu(net->mib.ip_statistics); err_ip_mib: - snmp_mib_free((void __percpu **)net->mib.tcp_statistics); + free_percpu(net->mib.tcp_statistics); err_tcp_mib: return -ENOMEM; } @@ -1632,12 +1601,12 @@ err_tcp_mib: static __net_exit void ipv4_mib_exit_net(struct net *net) { kfree(net->mib.icmpmsg_statistics); - snmp_mib_free((void __percpu **)net->mib.icmp_statistics); - snmp_mib_free((void __percpu **)net->mib.udplite_statistics); - snmp_mib_free((void __percpu **)net->mib.udp_statistics); - snmp_mib_free((void __percpu **)net->mib.net_statistics); - snmp_mib_free((void __percpu **)net->mib.ip_statistics); - snmp_mib_free((void __percpu **)net->mib.tcp_statistics); + free_percpu(net->mib.icmp_statistics); + free_percpu(net->mib.udplite_statistics); + free_percpu(net->mib.udp_statistics); + free_percpu(net->mib.net_statistics); + free_percpu(net->mib.ip_statistics); + free_percpu(net->mib.tcp_statistics); } static __net_initdata struct pernet_operations ipv4_mib_ops = { diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index ad737fad6d8b..ae0af9386f7c 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -345,15 +345,15 @@ static void icmp_put(struct seq_file *seq) for (i = 0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " Out%s", icmpmibmap[i].name); seq_printf(seq, "\nIcmp: %lu %lu %lu", - snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS), - snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS), - snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); + snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS), + snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS), + snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); for (i = 0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + icmpmibmap[i].index)); seq_printf(seq, " %lu %lu", - snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), - snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); + snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), + snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); for (i = 0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); @@ -379,7 +379,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) seq_printf(seq, " %llu", - snmp_fold_field64((void __percpu **)net->mib.ip_statistics, + snmp_fold_field64(net->mib.ip_statistics, snmp4_ipstats_list[i].entry, offsetof(struct ipstats_mib, syncp))); @@ -395,11 +395,11 @@ static int snmp_seq_show(struct seq_file *seq, void *v) /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) seq_printf(seq, " %ld", - snmp_fold_field((void __percpu **)net->mib.tcp_statistics, + snmp_fold_field(net->mib.tcp_statistics, snmp4_tcp_list[i].entry)); else seq_printf(seq, " %lu", - snmp_fold_field((void __percpu **)net->mib.tcp_statistics, + snmp_fold_field(net->mib.tcp_statistics, snmp4_tcp_list[i].entry)); } @@ -410,7 +410,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nUdp:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void __percpu **)net->mib.udp_statistics, + snmp_fold_field(net->mib.udp_statistics, snmp4_udp_list[i].entry)); /* the UDP and UDP-Lite MIBs are the same */ @@ -421,7 +421,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nUdpLite:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void __percpu **)net->mib.udplite_statistics, + snmp_fold_field(net->mib.udplite_statistics, snmp4_udp_list[i].entry)); seq_putc(seq, '\n'); @@ -458,7 +458,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nTcpExt:"); for (i = 0; snmp4_net_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void __percpu **)net->mib.net_statistics, + snmp_fold_field(net->mib.net_statistics, snmp4_net_list[i].entry)); seq_puts(seq, "\nIpExt:"); @@ -468,7 +468,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nIpExt:"); for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) seq_printf(seq, " %llu", - snmp_fold_field64((void __percpu **)net->mib.ip_statistics, + snmp_fold_field64(net->mib.ip_statistics, snmp4_ipextstats_list[i].entry, offsetof(struct ipstats_mib, syncp))); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1ac13c0300b7..5667b3003af9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -275,19 +275,14 @@ static int snmp6_alloc_dev(struct inet6_dev *idev) { int i; - if (snmp_mib_init((void __percpu **)idev->stats.ipv6, - sizeof(struct ipstats_mib), - __alignof__(struct ipstats_mib)) < 0) + idev->stats.ipv6 = alloc_percpu(struct ipstats_mib); + if (!idev->stats.ipv6) goto err_ip; for_each_possible_cpu(i) { struct ipstats_mib *addrconf_stats; - addrconf_stats = per_cpu_ptr(idev->stats.ipv6[0], i); + addrconf_stats = per_cpu_ptr(idev->stats.ipv6, i); u64_stats_init(&addrconf_stats->syncp); -#if SNMP_ARRAY_SZ == 2 - addrconf_stats = per_cpu_ptr(idev->stats.ipv6[1], i); - u64_stats_init(&addrconf_stats->syncp); -#endif } @@ -305,7 +300,7 @@ static int snmp6_alloc_dev(struct inet6_dev *idev) err_icmpmsg: kfree(idev->stats.icmpv6dev); err_icmp: - snmp_mib_free((void __percpu **)idev->stats.ipv6); + free_percpu(idev->stats.ipv6); err_ip: return -ENOMEM; } @@ -4363,7 +4358,7 @@ static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib, memset(&stats[items], 0, pad); } -static inline void __snmp6_fill_stats64(u64 *stats, void __percpu **mib, +static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib, int items, int bytes, size_t syncpoff) { int i; @@ -4383,7 +4378,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, { switch (attrtype) { case IFLA_INET6_STATS: - __snmp6_fill_stats64(stats, (void __percpu **)idev->stats.ipv6, + __snmp6_fill_stats64(stats, idev->stats.ipv6, IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp)); break; case IFLA_INET6_ICMP6STATS: diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 4c11cbcf8308..e6960457f625 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -123,7 +123,7 @@ static void snmp6_free_dev(struct inet6_dev *idev) { kfree(idev->stats.icmpv6msgdev); kfree(idev->stats.icmpv6dev); - snmp_mib_free((void __percpu **)idev->stats.ipv6); + free_percpu(idev->stats.ipv6); } /* Nobody refers to this device, we may destroy it. */ diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index d935889f1008..dc47cc757b80 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -715,33 +715,25 @@ static int __net_init ipv6_init_mibs(struct net *net) { int i; - if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6, - sizeof(struct udp_mib), - __alignof__(struct udp_mib)) < 0) + net->mib.udp_stats_in6 = alloc_percpu(struct udp_mib); + if (!net->mib.udp_stats_in6) return -ENOMEM; - if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6, - sizeof(struct udp_mib), - __alignof__(struct udp_mib)) < 0) + net->mib.udplite_stats_in6 = alloc_percpu(struct udp_mib); + if (!net->mib.udplite_stats_in6) goto err_udplite_mib; - if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics, - sizeof(struct ipstats_mib), - __alignof__(struct ipstats_mib)) < 0) + net->mib.ipv6_statistics = alloc_percpu(struct ipstats_mib); + if (!net->mib.ipv6_statistics) goto err_ip_mib; for_each_possible_cpu(i) { struct ipstats_mib *af_inet6_stats; - af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[0], i); + af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics, i); u64_stats_init(&af_inet6_stats->syncp); -#if SNMP_ARRAY_SZ == 2 - af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[1], i); - u64_stats_init(&af_inet6_stats->syncp); -#endif } - if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics, - sizeof(struct icmpv6_mib), - __alignof__(struct icmpv6_mib)) < 0) + net->mib.icmpv6_statistics = alloc_percpu(struct icmpv6_mib); + if (!net->mib.icmpv6_statistics) goto err_icmp_mib; net->mib.icmpv6msg_statistics = kzalloc(sizeof(struct icmpv6msg_mib), GFP_KERNEL); @@ -750,22 +742,22 @@ static int __net_init ipv6_init_mibs(struct net *net) return 0; err_icmpmsg_mib: - snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics); + free_percpu(net->mib.icmpv6_statistics); err_icmp_mib: - snmp_mib_free((void __percpu **)net->mib.ipv6_statistics); + free_percpu(net->mib.ipv6_statistics); err_ip_mib: - snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6); + free_percpu(net->mib.udplite_stats_in6); err_udplite_mib: - snmp_mib_free((void __percpu **)net->mib.udp_stats_in6); + free_percpu(net->mib.udp_stats_in6); return -ENOMEM; } static void ipv6_cleanup_mibs(struct net *net) { - snmp_mib_free((void __percpu **)net->mib.udp_stats_in6); - snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6); - snmp_mib_free((void __percpu **)net->mib.ipv6_statistics); - snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics); + free_percpu(net->mib.udp_stats_in6); + free_percpu(net->mib.udplite_stats_in6); + free_percpu(net->mib.ipv6_statistics); + free_percpu(net->mib.icmpv6_statistics); kfree(net->mib.icmpv6msg_statistics); } diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 091d066a57b3..69cb47625df7 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -201,7 +201,7 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **pcpumib, } } -static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu **mib, +static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib, const struct snmp_mib *itemlist, size_t syncpoff) { int i; @@ -215,7 +215,7 @@ static int snmp6_seq_show(struct seq_file *seq, void *v) { struct net *net = (struct net *)seq->private; - snmp6_seq_show_item64(seq, (void __percpu **)net->mib.ipv6_statistics, + snmp6_seq_show_item64(seq, net->mib.ipv6_statistics, snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp)); snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics, NULL, snmp6_icmp6_list); @@ -245,7 +245,7 @@ static int snmp6_dev_seq_show(struct seq_file *seq, void *v) struct inet6_dev *idev = (struct inet6_dev *)seq->private; seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex); - snmp6_seq_show_item64(seq, (void __percpu **)idev->stats.ipv6, + snmp6_seq_show_item64(seq, idev->stats.ipv6, snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp)); snmp6_seq_show_item(seq, NULL, idev->stats.icmpv6dev->mibs, snmp6_icmp6_list); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index c09757fbf803..074b60e2faab 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1100,14 +1100,15 @@ int sctp_register_pf(struct sctp_pf *pf, sa_family_t family) static inline int init_sctp_mibs(struct net *net) { - return snmp_mib_init((void __percpu **)net->sctp.sctp_statistics, - sizeof(struct sctp_mib), - __alignof__(struct sctp_mib)); + net->sctp.sctp_statistics = alloc_percpu(struct sctp_mib); + if (!net->sctp.sctp_statistics) + return -ENOMEM; + return 0; } static inline void cleanup_sctp_mibs(struct net *net) { - snmp_mib_free((void __percpu **)net->sctp.sctp_statistics); + free_percpu(net->sctp.sctp_statistics); } static void sctp_v4_pf_init(void) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c08fbd11ceff..e63f242ae03e 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2783,21 +2783,19 @@ static struct notifier_block xfrm_dev_notifier = { static int __net_init xfrm_statistics_init(struct net *net) { int rv; - - if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics, - sizeof(struct linux_xfrm_mib), - __alignof__(struct linux_xfrm_mib)) < 0) + net->mib.xfrm_statistics = alloc_percpu(struct linux_xfrm_mib); + if (!net->mib.xfrm_statistics) return -ENOMEM; rv = xfrm_proc_init(net); if (rv < 0) - snmp_mib_free((void __percpu **)net->mib.xfrm_statistics); + free_percpu(net->mib.xfrm_statistics); return rv; } static void xfrm_statistics_fini(struct net *net) { xfrm_proc_fini(net); - snmp_mib_free((void __percpu **)net->mib.xfrm_statistics); + free_percpu(net->mib.xfrm_statistics); } #else static int __net_init xfrm_statistics_init(struct net *net) diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index fc5abd0b456f..9c4fbd8935f4 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -54,8 +54,7 @@ static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) int i; for (i = 0; xfrm_mib_list[i].name; i++) seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name, - snmp_fold_field((void __percpu **) - net->mib.xfrm_statistics, + snmp_fold_field(net->mib.xfrm_statistics, xfrm_mib_list[i].entry)); return 0; } -- cgit v1.2.3-71-gd317 From 552a515707a5caf9fa9f3620d68fc28146c6b1b9 Mon Sep 17 00:00:00 2001 From: Helmut Schaa Date: Wed, 7 May 2014 09:28:31 +0200 Subject: ath9k: Allow platform override without EEPROM override Add a new platform data flag "use_eeprom" that indicates that the eeprom found on the card itself should be used instead of the one present in the platform data. This allows to override the MAC address of a PCI card while preserving the eeprom data from the card itself. The default behavior is preserved. Signed-off-by: Helmut Schaa Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/init.c | 2 +- include/linux/ath9k_platform.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index a6e273a2c44b..bcc7cfb1866d 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -508,7 +508,7 @@ static int ath9k_init_softc(u16 devid, struct ath_softc *sc, sc->tx99_power = MAX_RATE_POWER + 1; init_waitqueue_head(&sc->tx_wait); - if (!pdata) { + if (!pdata || pdata->use_eeprom) { ah->ah_flags |= AH_USE_EEPROM; sc->sc_ah->led_pin = -1; } else { diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h index 8598f8eacb20..a495a959e8a7 100644 --- a/include/linux/ath9k_platform.h +++ b/include/linux/ath9k_platform.h @@ -36,6 +36,8 @@ struct ath9k_platform_data { int (*get_mac_revision)(void); int (*external_reset)(void); + + bool use_eeprom; }; #endif /* _LINUX_ATH9K_PLATFORM_H */ -- cgit v1.2.3-71-gd317 From db6d8cc00773d8ef5a8b421b42a5ded235307b10 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Wed, 26 Mar 2014 02:27:02 -0700 Subject: dell-led: add mic mute led interface This patch provides similar led functional of 420f973 thinkpad-acpi: Add mute and mic-mute LED functionality Signed-off-by: Alex Hung Signed-off-by: Bryan Wu --- drivers/leds/dell-led.c | 171 +++++++++++++++++++++++++++++++++++++++++++++-- include/linux/dell-led.h | 10 +++ 2 files changed, 174 insertions(+), 7 deletions(-) create mode 100644 include/linux/dell-led.h (limited to 'include') diff --git a/drivers/leds/dell-led.c b/drivers/leds/dell-led.c index e5c57389efd6..c36acaf566a6 100644 --- a/drivers/leds/dell-led.c +++ b/drivers/leds/dell-led.c @@ -15,12 +15,15 @@ #include #include #include +#include +#include MODULE_AUTHOR("Louis Davis/Jim Dailey"); MODULE_DESCRIPTION("Dell LED Control Driver"); MODULE_LICENSE("GPL"); #define DELL_LED_BIOS_GUID "F6E4FE6E-909D-47cb-8BAB-C9F6F2F8D396" +#define DELL_APP_GUID "A80593CE-A997-11DA-B012-B622A1EF5492" MODULE_ALIAS("wmi:" DELL_LED_BIOS_GUID); /* Error Result Codes: */ @@ -39,6 +42,149 @@ MODULE_ALIAS("wmi:" DELL_LED_BIOS_GUID); #define CMD_LED_OFF 17 #define CMD_LED_BLINK 18 +struct app_wmi_args { + u16 class; + u16 selector; + u32 arg1; + u32 arg2; + u32 arg3; + u32 arg4; + u32 res1; + u32 res2; + u32 res3; + u32 res4; + char dummy[92]; +}; + +#define GLOBAL_MIC_MUTE_ENABLE 0x364 +#define GLOBAL_MIC_MUTE_DISABLE 0x365 + +struct dell_bios_data_token { + u16 tokenid; + u16 location; + u16 value; +}; + +struct __attribute__ ((__packed__)) dell_bios_calling_interface { + struct dmi_header header; + u16 cmd_io_addr; + u8 cmd_io_code; + u32 supported_cmds; + struct dell_bios_data_token damap[]; +}; + +static struct dell_bios_data_token dell_mic_tokens[2]; + +static int dell_wmi_perform_query(struct app_wmi_args *args) +{ + struct app_wmi_args *bios_return; + union acpi_object *obj; + struct acpi_buffer input; + struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL }; + acpi_status status; + u32 rc = -EINVAL; + + input.length = 128; + input.pointer = args; + + status = wmi_evaluate_method(DELL_APP_GUID, 0, 1, &input, &output); + if (!ACPI_SUCCESS(status)) + goto err_out0; + + obj = output.pointer; + if (!obj) + goto err_out0; + + if (obj->type != ACPI_TYPE_BUFFER) + goto err_out1; + + bios_return = (struct app_wmi_args *)obj->buffer.pointer; + rc = bios_return->res1; + if (rc) + goto err_out1; + + memcpy(args, bios_return, sizeof(struct app_wmi_args)); + rc = 0; + + err_out1: + kfree(obj); + err_out0: + return rc; +} + +static void __init find_micmute_tokens(const struct dmi_header *dm, void *dummy) +{ + struct dell_bios_calling_interface *calling_interface; + struct dell_bios_data_token *token; + int token_size = sizeof(struct dell_bios_data_token); + int i = 0; + + if (dm->type == 0xda && dm->length > 17) { + calling_interface = container_of(dm, + struct dell_bios_calling_interface, header); + + token = &calling_interface->damap[i]; + while (token->tokenid != 0xffff) { + if (token->tokenid == GLOBAL_MIC_MUTE_DISABLE) + memcpy(&dell_mic_tokens[0], token, token_size); + else if (token->tokenid == GLOBAL_MIC_MUTE_ENABLE) + memcpy(&dell_mic_tokens[1], token, token_size); + + i++; + token = &calling_interface->damap[i]; + } + } +} + +static int dell_micmute_led_set(int state) +{ + struct app_wmi_args args; + struct dell_bios_data_token *token; + + if (!wmi_has_guid(DELL_APP_GUID)) + return -ENODEV; + + if (state == 0 || state == 1) + token = &dell_mic_tokens[state]; + else + return -EINVAL; + + memset(&args, 0, sizeof(struct app_wmi_args)); + + args.class = 1; + args.arg1 = token->location; + args.arg2 = token->value; + + dell_wmi_perform_query(&args); + + return state; +} + +int dell_app_wmi_led_set(int whichled, int on) +{ + int state = 0; + + switch (whichled) { + case DELL_LED_MICMUTE: + state = dell_micmute_led_set(on); + break; + default: + pr_warn("led type %x is not supported\n", whichled); + break; + } + + return state; +} +EXPORT_SYMBOL_GPL(dell_app_wmi_led_set); + +static int __init dell_micmute_led_init(void) +{ + memset(dell_mic_tokens, 0, sizeof(struct dell_bios_data_token) * 2); + dmi_walk(find_micmute_tokens, NULL); + + return 0; +} + struct bios_args { unsigned char length; unsigned char result_code; @@ -181,21 +327,32 @@ static int __init dell_led_init(void) { int error = 0; - if (!wmi_has_guid(DELL_LED_BIOS_GUID)) + if (!wmi_has_guid(DELL_LED_BIOS_GUID) && !wmi_has_guid(DELL_APP_GUID)) return -ENODEV; - error = led_off(); - if (error != 0) - return -ENODEV; + if (wmi_has_guid(DELL_APP_GUID)) + error = dell_micmute_led_init(); - return led_classdev_register(NULL, &dell_led); + if (wmi_has_guid(DELL_LED_BIOS_GUID)) { + error = led_off(); + if (error != 0) + return -ENODEV; + + error = led_classdev_register(NULL, &dell_led); + } + + return error; } static void __exit dell_led_exit(void) { - led_classdev_unregister(&dell_led); + int error = 0; - led_off(); + if (wmi_has_guid(DELL_LED_BIOS_GUID)) { + error = led_off(); + if (error == 0) + led_classdev_unregister(&dell_led); + } } module_init(dell_led_init); diff --git a/include/linux/dell-led.h b/include/linux/dell-led.h new file mode 100644 index 000000000000..7009b8bec77b --- /dev/null +++ b/include/linux/dell-led.h @@ -0,0 +1,10 @@ +#ifndef __DELL_LED_H__ +#define __DELL_LED_H__ + +enum { + DELL_LED_MICMUTE, +}; + +int dell_app_wmi_led_set(int whichled, int on); + +#endif -- cgit v1.2.3-71-gd317 From 2ce112f1e788a695630e2c275f47d57a801673d3 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sat, 5 Apr 2014 20:16:34 -0700 Subject: leds: pca9685: Remove leds-pca9685 driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver is replaced by pwm-pca9685 driver and there is no user uses this driver in current tree. So remove it. Signed-off-by: Axel Lin Acked-by: Steffen Trumtrar Acked-by: Maximilian Güntner Signed-off-by: Bryan Wu --- drivers/leds/Kconfig | 10 -- drivers/leds/Makefile | 1 - drivers/leds/leds-pca9685.c | 213 ----------------------------- include/linux/platform_data/leds-pca9685.h | 35 ----- 4 files changed, 259 deletions(-) delete mode 100644 drivers/leds/leds-pca9685.c delete mode 100644 include/linux/platform_data/leds-pca9685.h (limited to 'include') diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index 6de9dfbf61c1..6713dbb6bfda 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -300,16 +300,6 @@ config LEDS_PCA963X LED driver chip accessed via the I2C bus. Supported devices include PCA9633 and PCA9634 -config LEDS_PCA9685 - tristate "LED support for PCA9685 I2C chip" - depends on LEDS_CLASS - depends on I2C - help - This option enables support for LEDs connected to the PCA9685 - LED driver chip accessed via the I2C bus. - The PCA9685 offers 12-bit PWM (4095 levels of brightness) on - 16 individual channels. - config LEDS_WM831X_STATUS tristate "LED support for status LEDs on WM831x PMICs" depends on LEDS_CLASS diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile index 3cd76dbd9be2..8979b0b2c85e 100644 --- a/drivers/leds/Makefile +++ b/drivers/leds/Makefile @@ -36,7 +36,6 @@ obj-$(CONFIG_LEDS_OT200) += leds-ot200.o obj-$(CONFIG_LEDS_FSG) += leds-fsg.o obj-$(CONFIG_LEDS_PCA955X) += leds-pca955x.o obj-$(CONFIG_LEDS_PCA963X) += leds-pca963x.o -obj-$(CONFIG_LEDS_PCA9685) += leds-pca9685.o obj-$(CONFIG_LEDS_DA903X) += leds-da903x.o obj-$(CONFIG_LEDS_DA9052) += leds-da9052.o obj-$(CONFIG_LEDS_WM831X_STATUS) += leds-wm831x-status.o diff --git a/drivers/leds/leds-pca9685.c b/drivers/leds/leds-pca9685.c deleted file mode 100644 index 6e1ef3a9d6ef..000000000000 --- a/drivers/leds/leds-pca9685.c +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Copyright 2013 Maximilian Güntner - * - * This file is subject to the terms and conditions of version 2 of - * the GNU General Public License. See the file COPYING in the main - * directory of this archive for more details. - * - * Based on leds-pca963x.c driver by - * Peter Meerwald - * - * Driver for the NXP PCA9685 12-Bit PWM LED driver chip. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -/* Register Addresses */ -#define PCA9685_MODE1 0x00 -#define PCA9685_MODE2 0x01 -#define PCA9685_LED0_ON_L 0x06 -#define PCA9685_ALL_LED_ON_L 0xFA - -/* MODE1 Register */ -#define PCA9685_ALLCALL 0x00 -#define PCA9685_SLEEP 0x04 -#define PCA9685_AI 0x05 - -/* MODE2 Register */ -#define PCA9685_INVRT 0x04 -#define PCA9685_OUTDRV 0x02 - -static const struct i2c_device_id pca9685_id[] = { - { "pca9685", 0 }, - { } -}; -MODULE_DEVICE_TABLE(i2c, pca9685_id); - -struct pca9685_led { - struct i2c_client *client; - struct work_struct work; - u16 brightness; - struct led_classdev led_cdev; - int led_num; /* 0-15 */ - char name[32]; -}; - -static void pca9685_write_msg(struct i2c_client *client, u8 *buf, u8 len) -{ - struct i2c_msg msg = { - .addr = client->addr, - .flags = 0x00, - .len = len, - .buf = buf - }; - i2c_transfer(client->adapter, &msg, 1); -} - -static void pca9685_all_off(struct i2c_client *client) -{ - u8 i2c_buffer[5] = {PCA9685_ALL_LED_ON_L, 0x00, 0x00, 0x00, 0x10}; - pca9685_write_msg(client, i2c_buffer, 5); -} - -static void pca9685_led_work(struct work_struct *work) -{ - struct pca9685_led *pca9685; - u8 i2c_buffer[5]; - - pca9685 = container_of(work, struct pca9685_led, work); - i2c_buffer[0] = PCA9685_LED0_ON_L + 4 * pca9685->led_num; - /* - * 4095 is the maximum brightness, so we set the ON time to 0x1000 - * which disables the PWM generator for that LED - */ - if (pca9685->brightness == 4095) - *((__le16 *)(i2c_buffer+1)) = cpu_to_le16(0x1000); - else - *((__le16 *)(i2c_buffer+1)) = 0x0000; - - if (pca9685->brightness == 0) - *((__le16 *)(i2c_buffer+3)) = cpu_to_le16(0x1000); - else if (pca9685->brightness == 4095) - *((__le16 *)(i2c_buffer+3)) = 0x0000; - else - *((__le16 *)(i2c_buffer+3)) = cpu_to_le16(pca9685->brightness); - - pca9685_write_msg(pca9685->client, i2c_buffer, 5); -} - -static void pca9685_led_set(struct led_classdev *led_cdev, - enum led_brightness value) -{ - struct pca9685_led *pca9685; - pca9685 = container_of(led_cdev, struct pca9685_led, led_cdev); - pca9685->brightness = value; - - schedule_work(&pca9685->work); -} - -static int pca9685_probe(struct i2c_client *client, - const struct i2c_device_id *id) -{ - struct pca9685_led *pca9685; - struct pca9685_platform_data *pdata; - int err; - u8 i; - - pdata = dev_get_platdata(&client->dev); - if (pdata) { - if (pdata->leds.num_leds < 1 || pdata->leds.num_leds > 15) { - dev_err(&client->dev, "board info must claim 1-16 LEDs"); - return -EINVAL; - } - } - - pca9685 = devm_kzalloc(&client->dev, 16 * sizeof(*pca9685), GFP_KERNEL); - if (!pca9685) - return -ENOMEM; - - i2c_set_clientdata(client, pca9685); - pca9685_all_off(client); - - for (i = 0; i < 16; i++) { - pca9685[i].client = client; - pca9685[i].led_num = i; - pca9685[i].name[0] = '\0'; - if (pdata && i < pdata->leds.num_leds) { - if (pdata->leds.leds[i].name) - strncpy(pca9685[i].name, - pdata->leds.leds[i].name, - sizeof(pca9685[i].name)-1); - if (pdata->leds.leds[i].default_trigger) - pca9685[i].led_cdev.default_trigger = - pdata->leds.leds[i].default_trigger; - } - if (strlen(pca9685[i].name) == 0) { - /* - * Write adapter and address to the name as well. - * Otherwise multiple chips attached to one host would - * not work. - */ - snprintf(pca9685[i].name, sizeof(pca9685[i].name), - "pca9685:%d:x%.2x:%d", - client->adapter->nr, client->addr, i); - } - pca9685[i].led_cdev.name = pca9685[i].name; - pca9685[i].led_cdev.max_brightness = 0xfff; - pca9685[i].led_cdev.brightness_set = pca9685_led_set; - - INIT_WORK(&pca9685[i].work, pca9685_led_work); - err = led_classdev_register(&client->dev, &pca9685[i].led_cdev); - if (err < 0) - goto exit; - } - - if (pdata) - i2c_smbus_write_byte_data(client, PCA9685_MODE2, - pdata->outdrv << PCA9685_OUTDRV | - pdata->inverted << PCA9685_INVRT); - else - i2c_smbus_write_byte_data(client, PCA9685_MODE2, - PCA9685_TOTEM_POLE << PCA9685_OUTDRV); - /* Enable Auto-Increment, enable oscillator, ALLCALL/SUBADDR disabled */ - i2c_smbus_write_byte_data(client, PCA9685_MODE1, BIT(PCA9685_AI)); - - return 0; - -exit: - while (i--) { - led_classdev_unregister(&pca9685[i].led_cdev); - cancel_work_sync(&pca9685[i].work); - } - return err; -} - -static int pca9685_remove(struct i2c_client *client) -{ - struct pca9685_led *pca9685 = i2c_get_clientdata(client); - u8 i; - - for (i = 0; i < 16; i++) { - led_classdev_unregister(&pca9685[i].led_cdev); - cancel_work_sync(&pca9685[i].work); - } - pca9685_all_off(client); - return 0; -} - -static struct i2c_driver pca9685_driver = { - .driver = { - .name = "leds-pca9685", - .owner = THIS_MODULE, - }, - .probe = pca9685_probe, - .remove = pca9685_remove, - .id_table = pca9685_id, -}; - -module_i2c_driver(pca9685_driver); - -MODULE_AUTHOR("Maximilian Güntner "); -MODULE_DESCRIPTION("PCA9685 LED Driver"); -MODULE_LICENSE("GPL v2"); diff --git a/include/linux/platform_data/leds-pca9685.h b/include/linux/platform_data/leds-pca9685.h deleted file mode 100644 index 778e9e4249cc..000000000000 --- a/include/linux/platform_data/leds-pca9685.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright 2013 Maximilian Güntner - * - * This file is subject to the terms and conditions of version 2 of - * the GNU General Public License. See the file COPYING in the main - * directory of this archive for more details. - * - * Based on leds-pca963x.h by Peter Meerwald - * - * LED driver for the NXP PCA9685 PWM chip - * - */ - -#ifndef __LINUX_PCA9685_H -#define __LINUX_PCA9685_H - -#include - -enum pca9685_outdrv { - PCA9685_OPEN_DRAIN, - PCA9685_TOTEM_POLE, -}; - -enum pca9685_inverted { - PCA9685_NOT_INVERTED, - PCA9685_INVERTED, -}; - -struct pca9685_platform_data { - struct led_platform_data leds; - enum pca9685_outdrv outdrv; - enum pca9685_inverted inverted; -}; - -#endif /* __LINUX_PCA9685_H */ -- cgit v1.2.3-71-gd317 From c3d620362dfe9218c7637354c7bce344ea771a31 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Wed, 7 May 2014 19:07:05 +0300 Subject: cfg80211: fix docbook warning When trying to generate documentation, at least xmldocs, we get the following warning: Warning(include/net/cfg80211.h:461): No description found for parameter 'nl80211_iftype' Fix it by adding the iftype argument name to the cfg80211_chandef_dfs_required() function declaration. Reported-and-tested-by: Masanari Iida Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0631230b01eb..28f6f1a5b445 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -458,7 +458,7 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, */ int cfg80211_chandef_dfs_required(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, - enum nl80211_iftype); + enum nl80211_iftype iftype); /** * ieee80211_chandef_rate_flags - returns rate flags for a channel -- cgit v1.2.3-71-gd317 From 5ae76a94150c86a6e0ee84eb74e7f7e1909b8d39 Mon Sep 17 00:00:00 2001 From: Andrzej Kaczmarek Date: Thu, 8 May 2014 15:32:08 +0200 Subject: Bluetooth: Store RSSI for connection This patch adds support to store RSSI for connection when reply for HCI_Read_RSSI is received. Signed-off-by: Andrzej Kaczmarek Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 10 ++++++++++ include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_event.c | 23 +++++++++++++++++++++++ 3 files changed, 34 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 4261a67682c0..ad2ecc92380d 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1064,6 +1064,16 @@ struct hci_rp_read_page_scan_type { #define PAGE_SCAN_TYPE_STANDARD 0x00 #define PAGE_SCAN_TYPE_INTERLACED 0x01 +#define HCI_OP_READ_RSSI 0x1405 +struct hci_cp_read_rssi { + __le16 handle; +} __packed; +struct hci_rp_read_rssi { + __u8 status; + __le16 handle; + __s8 rssi; +} __packed; + #define HCI_OP_READ_LOCAL_AMP_INFO 0x1409 struct hci_rp_read_local_amp_info { __u8 status; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index d73f41855ada..0318d5263837 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -374,6 +374,7 @@ struct hci_conn { __u16 setting; __u16 le_conn_min_interval; __u16 le_conn_max_interval; + __s8 rssi; unsigned long flags; __u8 remote_cap; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 07c37d0cecb2..2bb0053d4c45 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1245,6 +1245,25 @@ static void hci_cc_write_remote_amp_assoc(struct hci_dev *hdev, amp_write_rem_assoc_continue(hdev, rp->phy_handle); } +static void hci_cc_read_rssi(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_rssi *rp = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (rp->status) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle)); + if (conn) + conn->rssi = rp->rssi; + + hci_dev_unlock(hdev); +} + static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) { BT_DBG("%s status 0x%2.2x", hdev->name, status); @@ -2637,6 +2656,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_write_remote_amp_assoc(hdev, skb); break; + case HCI_OP_READ_RSSI: + hci_cc_read_rssi(hdev, skb); + break; + default: BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode); break; -- cgit v1.2.3-71-gd317 From 5409e46f1bcf960c651f3fff35f2f25e539655cf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 7 May 2014 13:49:44 +0200 Subject: nfsd: clean up fh_auth usage Use fh_fsid when reffering to the fsid part of the filehandle. The variable length auth field envisioned in nfsfh wasn't ever implemented. Also clean up some lose ends around this and document the file handle format better. Btw, why do we even export nfsfh.h to userspace? The file handle very much is kernel private, and nothing in nfs-utils include the header either. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsfh.c | 20 +++++++++----------- include/uapi/linux/nfsd/nfsfh.h | 32 +++++++------------------------- 2 files changed, 16 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 3c37b160dcad..a337106d6875 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -169,8 +169,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) data_left -= len; if (data_left < 0) return error; - exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_auth); - fid = (struct fid *)(fh->fh_auth + len); + exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid); + fid = (struct fid *)(fh->fh_fsid + len); } else { __u32 tfh[2]; dev_t xdev; @@ -385,7 +385,7 @@ static void _fh_update(struct svc_fh *fhp, struct svc_export *exp, { if (dentry != exp->ex_path.dentry) { struct fid *fid = (struct fid *) - (fhp->fh_handle.fh_auth + fhp->fh_handle.fh_size/4 - 1); + (fhp->fh_handle.fh_fsid + fhp->fh_handle.fh_size/4 - 1); int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4; int subtreecheck = !(exp->ex_flags & NFSEXP_NOSUBTREECHECK); @@ -513,7 +513,6 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, */ struct inode * inode = dentry->d_inode; - __u32 *datap; dev_t ex_dev = exp_sb(exp)->s_dev; dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n", @@ -557,17 +556,16 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, if (inode) _fh_update_old(dentry, exp, &fhp->fh_handle); } else { - int len; + fhp->fh_handle.fh_size = + key_len(fhp->fh_handle.fh_fsid_type) + 4; fhp->fh_handle.fh_auth_type = 0; - datap = fhp->fh_handle.fh_auth+0; - mk_fsid(fhp->fh_handle.fh_fsid_type, datap, ex_dev, + + mk_fsid(fhp->fh_handle.fh_fsid_type, + fhp->fh_handle.fh_fsid, + ex_dev, exp->ex_path.dentry->d_inode->i_ino, exp->ex_fsid, exp->ex_uuid); - len = key_len(fhp->fh_handle.fh_fsid_type); - datap += len/4; - fhp->fh_handle.fh_size = 4 + len; - if (inode) _fh_update(fhp, exp, dentry); if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { diff --git a/include/uapi/linux/nfsd/nfsfh.h b/include/uapi/linux/nfsd/nfsfh.h index 616e3b396476..20391235d088 100644 --- a/include/uapi/linux/nfsd/nfsfh.h +++ b/include/uapi/linux/nfsd/nfsfh.h @@ -1,13 +1,7 @@ /* - * include/linux/nfsd/nfsfh.h - * * This file describes the layout of the file handles as passed * over the wire. * - * Earlier versions of knfsd used to sign file handles using keyed MD5 - * or SHA. I've removed this code, because it doesn't give you more - * security than blocking external access to port 2049 on your firewall. - * * Copyright (C) 1995, 1996, 1997 Olaf Kirch */ @@ -37,7 +31,7 @@ struct nfs_fhbase_old { }; /* - * This is the new flexible, extensible style NFSv2/v3 file handle. + * This is the new flexible, extensible style NFSv2/v3/v4 file handle. * by Neil Brown - March 2000 * * The file handle starts with a sequence of four-byte words. @@ -47,14 +41,7 @@ struct nfs_fhbase_old { * * All four-byte values are in host-byte-order. * - * The auth_type field specifies how the filehandle can be authenticated - * This might allow a file to be confirmed to be in a writable part of a - * filetree without checking the path from it up to the root. - * Current values: - * 0 - No authentication. fb_auth is 0 bytes long - * Possible future values: - * 1 - 4 bytes taken from MD5 hash of the remainer of the file handle - * prefixed by a secret and with the important export flags. + * The auth_type field is deprecated and must be set to 0. * * The fsid_type identifies how the filesystem (or export point) is * encoded. @@ -71,14 +58,9 @@ struct nfs_fhbase_old { * 7 - 8 byte inode number and 16 byte uuid * * The fileid_type identified how the file within the filesystem is encoded. - * This is (will be) passed to, and set by, the underlying filesystem if it supports - * filehandle operations. The filesystem must not use the value '0' or '0xff' and may - * only use the values 1 and 2 as defined below: - * Current values: - * 0 - The root, or export point, of the filesystem. fb_fileid is 0 bytes. - * 1 - 32bit inode number, 32 bit generation number. - * 2 - 32bit inode number, 32 bit generation number, 32 bit parent directory inode number. - * + * The values for this field are filesystem specific, exccept that + * filesystems must not use the values '0' or '0xff'. 'See enum fid_type' + * in include/linux/exportfs.h for currently registered values. */ struct nfs_fhbase_new { __u8 fb_version; /* == 1, even => nfs_fhbase_old */ @@ -114,9 +96,9 @@ struct knfsd_fh { #define fh_fsid_type fh_base.fh_new.fb_fsid_type #define fh_auth_type fh_base.fh_new.fb_auth_type #define fh_fileid_type fh_base.fh_new.fb_fileid_type -#define fh_auth fh_base.fh_new.fb_auth #define fh_fsid fh_base.fh_new.fb_auth - +/* Do not use, provided for userspace compatiblity. */ +#define fh_auth fh_base.fh_new.fb_auth #endif /* _UAPI_LINUX_NFSD_FH_H */ -- cgit v1.2.3-71-gd317 From f6837ba8c98afcf28ec25f6863a8597274aeefd6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 30 Apr 2014 14:19:04 +0200 Subject: mac80211: handle failed restart/resume better When the driver fails during HW restart or resume, the whole stack goes into a very confused state with interfaces being up while the hardware is down etc. Address this by shutting down everything; we'll run into a lot of warnings in the process but that's better than having the whole stack get messed up. Reviewed-by: Arik Nemtsov Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 14 ++++++ net/mac80211/driver-ops.h | 108 +++++++++++++++++++++++++++++---------------- net/mac80211/ieee80211_i.h | 1 + net/mac80211/scan.c | 15 ++++--- net/mac80211/util.c | 46 +++++++++++++++++-- net/wireless/core.c | 20 ++++++--- 6 files changed, 153 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 28f6f1a5b445..5c7169b0ac57 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4771,6 +4771,20 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev, gfp_t gfp); +/** + * cfg80211_shutdown_all_interfaces - shut down all interfaces for a wiphy + * @wiphy: the wiphy to shut down + * + * This function shuts down all interfaces belonging to this wiphy by + * calling dev_close() (and treating non-netdev interfaces as needed). + * It shouldn't really be used unless there are some fatal device errors + * that really can't be recovered in any other way. + * + * Callers must hold the RTNL and be able to deal with callbacks into + * the driver while the function is running. + */ +void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy); + /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 5331582a2c81..df1d50291344 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -5,11 +5,11 @@ #include "ieee80211_i.h" #include "trace.h" -static inline void check_sdata_in_driver(struct ieee80211_sub_if_data *sdata) +static inline bool check_sdata_in_driver(struct ieee80211_sub_if_data *sdata) { - WARN(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER), - "%s: Failed check-sdata-in-driver check, flags: 0x%x\n", - sdata->dev ? sdata->dev->name : sdata->name, sdata->flags); + return !WARN(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER), + "%s: Failed check-sdata-in-driver check, flags: 0x%x\n", + sdata->dev ? sdata->dev->name : sdata->name, sdata->flags); } static inline struct ieee80211_sub_if_data * @@ -168,7 +168,8 @@ static inline int drv_change_interface(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_change_interface(local, sdata, type, p2p); ret = local->ops->change_interface(&local->hw, &sdata->vif, type, p2p); @@ -181,7 +182,8 @@ static inline void drv_remove_interface(struct ieee80211_local *local, { might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_remove_interface(local, sdata); local->ops->remove_interface(&local->hw, &sdata->vif); @@ -219,7 +221,8 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local, sdata->vif.type == NL80211_IFTYPE_MONITOR)) return; - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_bss_info_changed(local, sdata, info, changed); if (local->ops->bss_info_changed) @@ -278,7 +281,8 @@ static inline int drv_set_key(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_set_key(local, cmd, sdata, sta, key); ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key); @@ -298,7 +302,8 @@ static inline void drv_update_tkip_key(struct ieee80211_local *local, ista = &sta->sta; sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_update_tkip_key(local, sdata, conf, ista, iv32); if (local->ops->update_tkip_key) @@ -315,7 +320,8 @@ static inline int drv_hw_scan(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_hw_scan(local, sdata); ret = local->ops->hw_scan(&local->hw, &sdata->vif, req); @@ -328,7 +334,8 @@ static inline void drv_cancel_hw_scan(struct ieee80211_local *local, { might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_cancel_hw_scan(local, sdata); local->ops->cancel_hw_scan(&local->hw, &sdata->vif); @@ -345,7 +352,8 @@ drv_sched_scan_start(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_sched_scan_start(local, sdata); ret = local->ops->sched_scan_start(&local->hw, &sdata->vif, @@ -361,7 +369,8 @@ static inline int drv_sched_scan_stop(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_sched_scan_stop(local, sdata); ret = local->ops->sched_scan_stop(&local->hw, &sdata->vif); @@ -462,7 +471,8 @@ static inline void drv_sta_notify(struct ieee80211_local *local, struct ieee80211_sta *sta) { sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_sta_notify(local, sdata, cmd, sta); if (local->ops->sta_notify) @@ -479,7 +489,8 @@ static inline int drv_sta_add(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_sta_add(local, sdata, sta); if (local->ops->sta_add) @@ -497,7 +508,8 @@ static inline void drv_sta_remove(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_sta_remove(local, sdata, sta); if (local->ops->sta_remove) @@ -515,7 +527,8 @@ static inline void drv_sta_add_debugfs(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; if (local->ops->sta_add_debugfs) local->ops->sta_add_debugfs(&local->hw, &sdata->vif, @@ -545,7 +558,8 @@ static inline void drv_sta_pre_rcu_remove(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_sta_pre_rcu_remove(local, sdata, &sta->sta); if (local->ops->sta_pre_rcu_remove) @@ -566,7 +580,8 @@ int drv_sta_state(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_sta_state(local, sdata, &sta->sta, old_state, new_state); if (local->ops->sta_state) { @@ -590,7 +605,8 @@ static inline void drv_sta_rc_update(struct ieee80211_local *local, struct ieee80211_sta *sta, u32 changed) { sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; WARN_ON(changed & IEEE80211_RC_SUPP_RATES_CHANGED && (sdata->vif.type != NL80211_IFTYPE_ADHOC && @@ -612,7 +628,8 @@ static inline int drv_conf_tx(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_conf_tx(local, sdata, ac, params); if (local->ops->conf_tx) @@ -629,7 +646,8 @@ static inline u64 drv_get_tsf(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return ret; trace_drv_get_tsf(local, sdata); if (local->ops->get_tsf) @@ -644,7 +662,8 @@ static inline void drv_set_tsf(struct ieee80211_local *local, { might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_set_tsf(local, sdata, tsf); if (local->ops->set_tsf) @@ -657,7 +676,8 @@ static inline void drv_reset_tsf(struct ieee80211_local *local, { might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_reset_tsf(local, sdata); if (local->ops->reset_tsf) @@ -689,7 +709,8 @@ static inline int drv_ampdu_action(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, buf_size); @@ -733,8 +754,8 @@ static inline void drv_flush(struct ieee80211_local *local, might_sleep(); - if (sdata) - check_sdata_in_driver(sdata); + if (sdata && !check_sdata_in_driver(sdata)) + return; trace_drv_flush(local, queues, drop); if (local->ops->flush) @@ -854,7 +875,8 @@ static inline int drv_set_bitrate_mask(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_set_bitrate_mask(local, sdata, mask); if (local->ops->set_bitrate_mask) @@ -869,7 +891,8 @@ static inline void drv_set_rekey_data(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct cfg80211_gtk_rekey_data *data) { - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_set_rekey_data(local, sdata, data); if (local->ops->set_rekey_data) @@ -937,7 +960,8 @@ static inline void drv_mgd_prepare_tx(struct ieee80211_local *local, { might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION); trace_drv_mgd_prepare_tx(local, sdata); @@ -964,6 +988,9 @@ static inline int drv_add_chanctx(struct ieee80211_local *local, static inline void drv_remove_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *ctx) { + if (WARN_ON(!ctx->driver_present)) + return; + trace_drv_remove_chanctx(local, ctx); if (local->ops->remove_chanctx) local->ops->remove_chanctx(&local->hw, &ctx->conf); @@ -989,7 +1016,8 @@ static inline int drv_assign_vif_chanctx(struct ieee80211_local *local, { int ret = 0; - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_assign_vif_chanctx(local, sdata, ctx); if (local->ops->assign_vif_chanctx) { @@ -1007,7 +1035,8 @@ static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_chanctx *ctx) { - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_unassign_vif_chanctx(local, sdata, ctx); if (local->ops->unassign_vif_chanctx) { @@ -1024,7 +1053,8 @@ static inline int drv_start_ap(struct ieee80211_local *local, { int ret = 0; - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_start_ap(local, sdata, &sdata->vif.bss_conf); if (local->ops->start_ap) @@ -1036,7 +1066,8 @@ static inline int drv_start_ap(struct ieee80211_local *local, static inline void drv_stop_ap(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_stop_ap(local, sdata); if (local->ops->stop_ap) @@ -1059,7 +1090,8 @@ drv_set_default_unicast_key(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, int key_idx) { - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; WARN_ON_ONCE(key_idx < -1 || key_idx > 3); @@ -1101,7 +1133,8 @@ static inline int drv_join_ibss(struct ieee80211_local *local, int ret = 0; might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_join_ibss(local, sdata, &sdata->vif.bss_conf); if (local->ops->join_ibss) @@ -1114,7 +1147,8 @@ static inline void drv_leave_ibss(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_leave_ibss(local, sdata); if (local->ops->leave_ibss) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f4ba0c4a4314..4668ce9a1d3f 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1459,6 +1459,7 @@ __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, struct cfg80211_sched_scan_request *req); int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata); +void ieee80211_sched_scan_end(struct ieee80211_local *local); void ieee80211_sched_scan_stopped_work(struct work_struct *work); /* off-channel helpers */ diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 28185c8dc19a..f40661eb75b5 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -1076,12 +1076,8 @@ void ieee80211_sched_scan_results(struct ieee80211_hw *hw) } EXPORT_SYMBOL(ieee80211_sched_scan_results); -void ieee80211_sched_scan_stopped_work(struct work_struct *work) +void ieee80211_sched_scan_end(struct ieee80211_local *local) { - struct ieee80211_local *local = - container_of(work, struct ieee80211_local, - sched_scan_stopped_work); - mutex_lock(&local->mtx); if (!rcu_access_pointer(local->sched_scan_sdata)) { @@ -1099,6 +1095,15 @@ void ieee80211_sched_scan_stopped_work(struct work_struct *work) cfg80211_sched_scan_stopped(local->hw.wiphy); } +void ieee80211_sched_scan_stopped_work(struct work_struct *work) +{ + struct ieee80211_local *local = + container_of(work, struct ieee80211_local, + sched_scan_stopped_work); + + ieee80211_sched_scan_end(local); +} + void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ad058759e85e..7e0dd4be8097 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1457,6 +1457,44 @@ void ieee80211_stop_device(struct ieee80211_local *local) drv_stop(local); } +static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local) +{ + struct ieee80211_sub_if_data *sdata; + struct ieee80211_chanctx *ctx; + + /* + * We get here if during resume the device can't be restarted properly. + * We might also get here if this happens during HW reset, which is a + * slightly different situation and we need to drop all connections in + * the latter case. + * + * Ask cfg80211 to turn off all interfaces, this will result in more + * warnings but at least we'll then get into a clean stopped state. + */ + + local->resuming = false; + local->suspended = false; + local->started = false; + + /* scheduled scan clearly can't be running any more, but tell + * cfg80211 and clear local state + */ + ieee80211_sched_scan_end(local); + + list_for_each_entry(sdata, &local->interfaces, list) + sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER; + + /* Mark channel contexts as not being in the driver any more to avoid + * removing them from the driver during the shutdown process... + */ + mutex_lock(&local->chanctx_mtx); + list_for_each_entry(ctx, &local->chanctx_list, list) + ctx->driver_present = false; + mutex_unlock(&local->chanctx_mtx); + + cfg80211_shutdown_all_interfaces(local->hw.wiphy); +} + static void ieee80211_assign_chanctx(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { @@ -1520,9 +1558,11 @@ int ieee80211_reconfig(struct ieee80211_local *local) */ res = drv_start(local); if (res) { - WARN(local->suspended, "Hardware became unavailable " - "upon resume. This could be a software issue " - "prior to suspend or a hardware issue.\n"); + if (local->suspended) + WARN(1, "Hardware became unavailable upon resume. This could be a software issue prior to suspend or a hardware issue.\n"); + else + WARN(1, "Hardware became unavailable during restart.\n"); + ieee80211_handle_reconfig_failure(local); return res; } diff --git a/net/wireless/core.c b/net/wireless/core.c index 7e023b74f009..39788711ce6e 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -210,15 +210,12 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, } } -static int cfg80211_rfkill_set_block(void *data, bool blocked) +void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev = data; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct wireless_dev *wdev; - if (!blocked) - return 0; - - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(wdev, &rdev->wdev_list, list) { if (wdev->netdev) { @@ -234,7 +231,18 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) break; } } +} +EXPORT_SYMBOL_GPL(cfg80211_shutdown_all_interfaces); +static int cfg80211_rfkill_set_block(void *data, bool blocked) +{ + struct cfg80211_registered_device *rdev = data; + + if (!blocked) + return 0; + + rtnl_lock(); + cfg80211_shutdown_all_interfaces(&rdev->wiphy); rtnl_unlock(); return 0; -- cgit v1.2.3-71-gd317 From 2070d50e1cbe3d7f157cbf8e63279c893f375d7f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 9 May 2014 15:11:53 -0400 Subject: percpu-refcount: rename percpu_ref_tryget() to percpu_ref_tryget_live() percpu_ref_tryget() is different from the usual tryget semantics in that it fails if the refcnt is in its dying stage even if the refcnt hasn't reached zero yet. We're about to introduce the more conventional tryget and the current one has only one user. Let's rename it to percpu_ref_tryget_live() so that it explicitly signifies the peculiarities of its semantics. This is pure rename. Signed-off-by: Tejun Heo Acked-by: Kent Overstreet --- include/linux/cgroup.h | 2 +- include/linux/percpu-refcount.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c2515851c1aa..549aed8de32b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -101,7 +101,7 @@ static inline bool css_tryget(struct cgroup_subsys_state *css) { if (css->flags & CSS_ROOT) return true; - return percpu_ref_tryget(&css->refcnt); + return percpu_ref_tryget_live(&css->refcnt); } /** diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 95961f0bf62d..e22d15597cc3 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -118,7 +118,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref) } /** - * percpu_ref_tryget - try to increment a percpu refcount + * percpu_ref_tryget_live - try to increment a live percpu refcount * @ref: percpu_ref to try-get * * Increment a percpu refcount unless it has already been killed. Returns @@ -129,7 +129,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref) * used. After the confirm_kill callback is invoked, it's guaranteed that * no new reference will be given out by percpu_ref_tryget(). */ -static inline bool percpu_ref_tryget(struct percpu_ref *ref) +static inline bool percpu_ref_tryget_live(struct percpu_ref *ref) { unsigned __percpu *pcpu_count; int ret = false; -- cgit v1.2.3-71-gd317 From 4fb6e25049cb6fa0accc7f1b7c192b952fad7ac8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 9 May 2014 15:11:53 -0400 Subject: percpu-refcount: implement percpu_ref_tryget() Implement percpu_ref_tryget() which fails if the refcnt already reached zero. Note that this is different from the recently renamed percpu_ref_tryget_live() which fails if the refcnt has been killed and is draining the remaining references. percpu_ref_tryget() succeeds on a killed refcnt as long as its current refcnt is above zero. Signed-off-by: Tejun Heo Acked-by: Kent Overstreet --- include/linux/percpu-refcount.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index e22d15597cc3..dba35c411e8c 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -117,6 +117,36 @@ static inline void percpu_ref_get(struct percpu_ref *ref) rcu_read_unlock_sched(); } +/** + * percpu_ref_tryget - try to increment a percpu refcount + * @ref: percpu_ref to try-get + * + * Increment a percpu refcount unless its count already reached zero. + * Returns %true on success; %false on failure. + * + * The caller is responsible for ensuring that @ref stays accessible. + */ +static inline bool percpu_ref_tryget(struct percpu_ref *ref) +{ + unsigned __percpu *pcpu_count; + int ret = false; + + rcu_read_lock_sched(); + + pcpu_count = ACCESS_ONCE(ref->pcpu_count); + + if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) { + __this_cpu_inc(*pcpu_count); + ret = true; + } else { + ret = atomic_inc_not_zero(&ref->count); + } + + rcu_read_unlock_sched(); + + return ret; +} + /** * percpu_ref_tryget_live - try to increment a live percpu refcount * @ref: percpu_ref to try-get @@ -128,6 +158,8 @@ static inline void percpu_ref_get(struct percpu_ref *ref) * will fail. For such guarantee, percpu_ref_kill_and_confirm() should be * used. After the confirm_kill callback is invoked, it's guaranteed that * no new reference will be given out by percpu_ref_tryget(). + * + * The caller is responsible for ensuring that @ref stays accessible. */ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref) { -- cgit v1.2.3-71-gd317 From 49c50b97b5522a987b80fbbf9d9869deee8d23b0 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 6 May 2014 16:02:19 -0700 Subject: mtd: nand: refactor erase_cmd() to return chip status The nand_chip::erase_cmd callback previously served a dual purpose; for one, it allowed a per-flash-chip override, so that AG-AND devices could use a different erase command than other NAND. These AND devices were dropped in commit 14c6578683367b1e7af0c3c09e872b45a45183a7 (mtd: nand: remove AG-AND support). On the other hand, some drivers (denali and doc-g4) need to use this sort of callback to implement controller-specific erase operations. To make the latter operation easier for some drivers (e.g., ST's new BCH NAND driver), it helps if the command dispatch and wait functions can be lumped together, rather than called separately. This patch does two things: 1. Pull the call to chip->waitfunc() into chip->erase_cmd(), and return the status from this callback 2. Rename erase_cmd() to just erase(), since this callback does a little more than just send a command Signed-off-by: Brian Norris Tested-by: Lee Jones --- drivers/mtd/nand/denali.c | 7 +++---- drivers/mtd/nand/docg4.c | 6 ++++-- drivers/mtd/nand/nand_base.c | 14 +++++++------- include/linux/mtd/nand.h | 5 ++--- 4 files changed, 16 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c index c07cd573ad3a..9f2012a3e764 100644 --- a/drivers/mtd/nand/denali.c +++ b/drivers/mtd/nand/denali.c @@ -1233,7 +1233,7 @@ static int denali_waitfunc(struct mtd_info *mtd, struct nand_chip *chip) return status; } -static void denali_erase(struct mtd_info *mtd, int page) +static int denali_erase(struct mtd_info *mtd, int page) { struct denali_nand_info *denali = mtd_to_denali(mtd); @@ -1250,8 +1250,7 @@ static void denali_erase(struct mtd_info *mtd, int page) irq_status = wait_for_irq(denali, INTR_STATUS__ERASE_COMP | INTR_STATUS__ERASE_FAIL); - denali->status = (irq_status & INTR_STATUS__ERASE_FAIL) ? - NAND_STATUS_FAIL : PASS; + return (irq_status & INTR_STATUS__ERASE_FAIL) ? NAND_STATUS_FAIL : PASS; } static void denali_cmdfunc(struct mtd_info *mtd, unsigned int cmd, int col, @@ -1584,7 +1583,7 @@ int denali_init(struct denali_nand_info *denali) denali->nand.ecc.write_page_raw = denali_write_page_raw; denali->nand.ecc.read_oob = denali_read_oob; denali->nand.ecc.write_oob = denali_write_oob; - denali->nand.erase_cmd = denali_erase; + denali->nand.erase = denali_erase; if (nand_scan_tail(&denali->mtd)) { ret = -ENXIO; diff --git a/drivers/mtd/nand/docg4.c b/drivers/mtd/nand/docg4.c index 1b0265e85a06..ce24637e14f1 100644 --- a/drivers/mtd/nand/docg4.c +++ b/drivers/mtd/nand/docg4.c @@ -872,7 +872,7 @@ static int docg4_read_oob(struct mtd_info *mtd, struct nand_chip *nand, return 0; } -static void docg4_erase_block(struct mtd_info *mtd, int page) +static int docg4_erase_block(struct mtd_info *mtd, int page) { struct nand_chip *nand = mtd->priv; struct docg4_priv *doc = nand->priv; @@ -916,6 +916,8 @@ static void docg4_erase_block(struct mtd_info *mtd, int page) write_nop(docptr); poll_status(doc); write_nop(docptr); + + return nand->waitfunc(mtd, nand); } static int write_page(struct mtd_info *mtd, struct nand_chip *nand, @@ -1236,7 +1238,7 @@ static void __init init_mtd_structs(struct mtd_info *mtd) nand->block_markbad = docg4_block_markbad; nand->read_buf = docg4_read_buf; nand->write_buf = docg4_write_buf16; - nand->erase_cmd = docg4_erase_block; + nand->erase = docg4_erase_block; nand->ecc.read_page = docg4_read_page; nand->ecc.write_page = docg4_write_page; nand->ecc.read_page_raw = docg4_read_page_raw; diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index f6c5685b79a6..7853b9b0a05e 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -2617,18 +2617,20 @@ out: } /** - * single_erase_cmd - [GENERIC] NAND standard block erase command function + * single_erase - [GENERIC] NAND standard block erase command function * @mtd: MTD device structure * @page: the page address of the block which will be erased * - * Standard erase command for NAND chips. + * Standard erase command for NAND chips. Returns NAND status. */ -static void single_erase_cmd(struct mtd_info *mtd, int page) +static int single_erase(struct mtd_info *mtd, int page) { struct nand_chip *chip = mtd->priv; /* Send commands to erase a block */ chip->cmdfunc(mtd, NAND_CMD_ERASE1, -1, page); chip->cmdfunc(mtd, NAND_CMD_ERASE2, -1, -1); + + return chip->waitfunc(mtd, chip); } /** @@ -2709,9 +2711,7 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr, (page + pages_per_block)) chip->pagebuf = -1; - chip->erase_cmd(mtd, page & chip->pagemask); - - status = chip->waitfunc(mtd, chip); + status = chip->erase(mtd, page & chip->pagemask); /* * See if operation failed and additional status checks are @@ -3684,7 +3684,7 @@ ident_done: } chip->badblockbits = 8; - chip->erase_cmd = single_erase_cmd; + chip->erase = single_erase; /* Do not replace user supplied command function! */ if (mtd->writesize > 512 && chip->cmdfunc == nand_command) diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 450d61ec7f06..7a922e6c4e4b 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -552,8 +552,7 @@ struct nand_buffers { * @ecc: [BOARDSPECIFIC] ECC control structure * @buffers: buffer structure for read/write * @hwcontrol: platform-specific hardware control structure - * @erase_cmd: [INTERN] erase command write function, selectable due - * to AND support. + * @erase: [REPLACEABLE] erase function * @scan_bbt: [REPLACEABLE] function to scan bad block table * @chip_delay: [BOARDSPECIFIC] chip dependent delay for transferring * data from array to read regs (tR). @@ -637,7 +636,7 @@ struct nand_chip { void (*cmdfunc)(struct mtd_info *mtd, unsigned command, int column, int page_addr); int(*waitfunc)(struct mtd_info *mtd, struct nand_chip *this); - void (*erase_cmd)(struct mtd_info *mtd, int page); + int (*erase)(struct mtd_info *mtd, int page); int (*scan_bbt)(struct mtd_info *mtd); int (*errstat)(struct mtd_info *mtd, struct nand_chip *this, int state, int status, int page); -- cgit v1.2.3-71-gd317 From 5a134faeef82b46ff4ad244d11d8c6be41679834 Mon Sep 17 00:00:00 2001 From: Andrzej Kaczmarek Date: Fri, 9 May 2014 21:35:28 +0200 Subject: Bluetooth: Store TX power level for connection This patch adds support to store local TX power level for connection when reply for HCI_Read_Transmit_Power_Level is received. Signed-off-by: Andrzej Kaczmarek Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 11 +++++++++++ include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_conn.c | 1 + net/bluetooth/hci_event.c | 28 ++++++++++++++++++++++++++++ 4 files changed, 41 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index ad2ecc92380d..16587dcd6a91 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1054,6 +1054,17 @@ struct hci_cp_write_page_scan_activity { __le16 window; } __packed; +#define HCI_OP_READ_TX_POWER 0x0c2d +struct hci_cp_read_tx_power { + __le16 handle; + __u8 type; +} __packed; +struct hci_rp_read_tx_power { + __u8 status; + __le16 handle; + __s8 tx_power; +} __packed; + #define HCI_OP_READ_PAGE_SCAN_TYPE 0x0c46 struct hci_rp_read_page_scan_type { __u8 status; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 0318d5263837..211bad6a3366 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -375,6 +375,7 @@ struct hci_conn { __u16 le_conn_min_interval; __u16 le_conn_max_interval; __s8 rssi; + __s8 tx_power; unsigned long flags; __u8 remote_cap; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 55a174317925..74b368bfe102 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -407,6 +407,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) conn->io_capability = hdev->io_capability; conn->remote_auth = 0xff; conn->key_type = 0xff; + conn->tx_power = HCI_TX_POWER_INVALID; set_bit(HCI_CONN_POWER_SAVE, &conn->flags); conn->disc_timeout = HCI_DISCONN_TIMEOUT; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 2bb0053d4c45..fa614e3430a7 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1264,6 +1264,30 @@ static void hci_cc_read_rssi(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_unlock(hdev); } +static void hci_cc_read_tx_power(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_cp_read_tx_power *sent; + struct hci_rp_read_tx_power *rp = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (rp->status) + return; + + sent = hci_sent_cmd_data(hdev, HCI_OP_READ_TX_POWER); + if (!sent) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle)); + if (conn && sent->type == 0x00) + conn->tx_power = rp->tx_power; + + hci_dev_unlock(hdev); +} + static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) { BT_DBG("%s status 0x%2.2x", hdev->name, status); @@ -2660,6 +2684,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_read_rssi(hdev, skb); break; + case HCI_OP_READ_TX_POWER: + hci_cc_read_tx_power(hdev, skb); + break; + default: BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode); break; -- cgit v1.2.3-71-gd317 From f2727f7eb9132803d06309839a95de3dad82d237 Mon Sep 17 00:00:00 2001 From: Dimitri John Ledkov Date: Wed, 7 May 2014 20:55:30 +0100 Subject: NVMe: Update namespace and controller identify structures to the 1.1a spec Controller: add CNTLID, AVSCC, APSTA, NVSCC, ACWU, SGLS fields. Namespace: add NMIC, RESCAP, EUI64 fields. EUI64 is specifically interesting, since it can be used to construct an UEFI NVMe device path for a boot entry. As per NVM Express 1.1a spec: http://www.nvmexpress.org/wp-content/uploads/NVM-Express-1_1a.pdf Signed-off-by: Dimitri John Ledkov Signed-off-by: Matthew Wilcox --- include/uapi/linux/nvme.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h index f090336d5bad..e74da782d69c 100644 --- a/include/uapi/linux/nvme.h +++ b/include/uapi/linux/nvme.h @@ -45,7 +45,8 @@ struct nvme_id_ctrl { __u8 ieee[3]; __u8 mic; __u8 mdts; - __u8 rsvd78[178]; + __u16 cntlid; + __u8 rsvd80[176]; __le16 oacs; __u8 acl; __u8 aerl; @@ -53,7 +54,9 @@ struct nvme_id_ctrl { __u8 lpa; __u8 elpe; __u8 npss; - __u8 rsvd264[248]; + __u8 avscc; + __u8 apsta; + __u8 rsvd266[246]; __u8 sqes; __u8 cqes; __u8 rsvd514[2]; @@ -64,7 +67,12 @@ struct nvme_id_ctrl { __u8 vwc; __le16 awun; __le16 awupf; - __u8 rsvd530[1518]; + __u8 nvscc; + __u8 rsvd531; + __le16 acwu; + __u8 rsvd534[2]; + __le32 sgls; + __u8 rsvd540[1508]; struct nvme_id_power_state psd[32]; __u8 vs[1024]; }; @@ -92,7 +100,10 @@ struct nvme_id_ns { __u8 mc; __u8 dpc; __u8 dps; - __u8 rsvd30[98]; + __u8 nmic; + __u8 rescap; + __u8 rsvd32[88]; + __u8 eui64[8]; struct nvme_lbaf lbaf[16]; __u8 rsvd192[192]; __u8 vs[3712]; -- cgit v1.2.3-71-gd317 From 9739eef13c926645fbf88bcb77e66442fa75d688 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 8 May 2014 14:10:51 -0700 Subject: net: filter: make BPF conversion more readable Introduce BPF helper macros to define instructions (similar to old BPF_STMT/BPF_JUMP macros) Use them while converting classic BPF to internal and in BPF testsuite later. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 51 ++++++++++++++++++ net/core/filter.c | 142 +++++++++++++++++-------------------------------- 2 files changed, 101 insertions(+), 92 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index ed1efab10b8f..4457b383961c 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -79,6 +79,57 @@ enum { /* BPF program can access up to 512 bytes of stack space. */ #define MAX_BPF_STACK 512 +/* bpf_add|sub|...: a += x, bpf_mov: a = x */ +#define BPF_ALU64_REG(op, a, x) \ + ((struct sock_filter_int) {BPF_ALU64|BPF_OP(op)|BPF_X, a, x, 0, 0}) +#define BPF_ALU32_REG(op, a, x) \ + ((struct sock_filter_int) {BPF_ALU|BPF_OP(op)|BPF_X, a, x, 0, 0}) + +/* bpf_add|sub|...: a += imm, bpf_mov: a = imm */ +#define BPF_ALU64_IMM(op, a, imm) \ + ((struct sock_filter_int) {BPF_ALU64|BPF_OP(op)|BPF_K, a, 0, 0, imm}) +#define BPF_ALU32_IMM(op, a, imm) \ + ((struct sock_filter_int) {BPF_ALU|BPF_OP(op)|BPF_K, a, 0, 0, imm}) + +/* R0 = *(uint *) (skb->data + off) */ +#define BPF_LD_ABS(size, off) \ + ((struct sock_filter_int) {BPF_LD|BPF_SIZE(size)|BPF_ABS, 0, 0, 0, off}) + +/* R0 = *(uint *) (skb->data + x + off) */ +#define BPF_LD_IND(size, x, off) \ + ((struct sock_filter_int) {BPF_LD|BPF_SIZE(size)|BPF_IND, 0, x, 0, off}) + +/* a = *(uint *) (x + off) */ +#define BPF_LDX_MEM(sz, a, x, off) \ + ((struct sock_filter_int) {BPF_LDX|BPF_SIZE(sz)|BPF_MEM, a, x, off, 0}) + +/* if (a 'op' x) goto pc+off */ +#define BPF_JMP_REG(op, a, x, off) \ + ((struct sock_filter_int) {BPF_JMP|BPF_OP(op)|BPF_X, a, x, off, 0}) + +/* if (a 'op' imm) goto pc+off */ +#define BPF_JMP_IMM(op, a, imm, off) \ + ((struct sock_filter_int) {BPF_JMP|BPF_OP(op)|BPF_K, a, 0, off, imm}) + +#define BPF_EXIT_INSN() \ + ((struct sock_filter_int) {BPF_JMP|BPF_EXIT, 0, 0, 0, 0}) + +static inline int size_to_bpf(int size) +{ + switch (size) { + case 1: + return BPF_B; + case 2: + return BPF_H; + case 4: + return BPF_W; + case 8: + return BPF_DW; + default: + return -EINVAL; + } +} + /* Macro to invoke filter function. */ #define SK_RUN_FILTER(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi) diff --git a/net/core/filter.c b/net/core/filter.c index eb020a7d6f55..9aaa05ad8fe3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -668,10 +668,9 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_PROTOCOL: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); - insn->code = BPF_LDX | BPF_MEM | BPF_H; - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_CTX; - insn->off = offsetof(struct sk_buff, protocol); + /* A = *(u16 *) (ctx + offsetof(protocol)) */ + *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, protocol)); insn++; /* A = ntohs(A) [emitting a nop or swap16] */ @@ -681,37 +680,27 @@ static bool convert_bpf_extensions(struct sock_filter *fp, break; case SKF_AD_OFF + SKF_AD_PKTTYPE: - insn->code = BPF_LDX | BPF_MEM | BPF_B; - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_CTX; - insn->off = pkt_type_offset(); + *insn = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX, + pkt_type_offset()); if (insn->off < 0) return false; insn++; - insn->code = BPF_ALU | BPF_AND | BPF_K; - insn->a_reg = BPF_REG_A; - insn->imm = PKT_TYPE_MAX; + *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX); break; case SKF_AD_OFF + SKF_AD_IFINDEX: case SKF_AD_OFF + SKF_AD_HATYPE: - if (FIELD_SIZEOF(struct sk_buff, dev) == 8) - insn->code = BPF_LDX | BPF_MEM | BPF_DW; - else - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = BPF_REG_TMP; - insn->x_reg = BPF_REG_CTX; - insn->off = offsetof(struct sk_buff, dev); + *insn = BPF_LDX_MEM(size_to_bpf(FIELD_SIZEOF(struct sk_buff, dev)), + BPF_REG_TMP, BPF_REG_CTX, + offsetof(struct sk_buff, dev)); insn++; - insn->code = BPF_JMP | BPF_JNE | BPF_K; - insn->a_reg = BPF_REG_TMP; - insn->imm = 0; - insn->off = 1; + /* if (tmp != 0) goto pc+1 */ + *insn = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1); insn++; - insn->code = BPF_JMP | BPF_EXIT; + *insn = BPF_EXIT_INSN(); insn++; BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); @@ -732,55 +721,45 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_MARK: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_CTX; - insn->off = offsetof(struct sk_buff, mark); + *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, mark)); break; case SKF_AD_OFF + SKF_AD_RXHASH: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_CTX; - insn->off = offsetof(struct sk_buff, hash); + *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, hash)); break; case SKF_AD_OFF + SKF_AD_QUEUE: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); - insn->code = BPF_LDX | BPF_MEM | BPF_H; - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_CTX; - insn->off = offsetof(struct sk_buff, queue_mapping); + *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, queue_mapping)); break; case SKF_AD_OFF + SKF_AD_VLAN_TAG: case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); - insn->code = BPF_LDX | BPF_MEM | BPF_H; - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_CTX; - insn->off = offsetof(struct sk_buff, vlan_tci); + /* A = *(u16 *) (ctx + offsetof(vlan_tci)) */ + *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, vlan_tci)); insn++; BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) { - insn->code = BPF_ALU | BPF_AND | BPF_K; - insn->a_reg = BPF_REG_A; - insn->imm = ~VLAN_TAG_PRESENT; + *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, + ~VLAN_TAG_PRESENT); } else { - insn->code = BPF_ALU | BPF_RSH | BPF_K; - insn->a_reg = BPF_REG_A; - insn->imm = 12; + /* A >>= 12 */ + *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12); insn++; - insn->code = BPF_ALU | BPF_AND | BPF_K; - insn->a_reg = BPF_REG_A; - insn->imm = 1; + /* A &= 1 */ + *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1); } break; @@ -790,21 +769,15 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_CPU: case SKF_AD_OFF + SKF_AD_RANDOM: /* arg1 = ctx */ - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = BPF_REG_ARG1; - insn->x_reg = BPF_REG_CTX; + *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG1, BPF_REG_CTX); insn++; /* arg2 = A */ - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = BPF_REG_ARG2; - insn->x_reg = BPF_REG_A; + *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG2, BPF_REG_A); insn++; /* arg3 = X */ - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = BPF_REG_ARG3; - insn->x_reg = BPF_REG_X; + *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG3, BPF_REG_X); insn++; /* Emit call(ctx, arg2=A, arg3=X) */ @@ -829,9 +802,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, break; case SKF_AD_OFF + SKF_AD_ALU_XOR_X: - insn->code = BPF_ALU | BPF_XOR | BPF_X; - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_X; + /* A ^= X */ + *insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X); break; default: @@ -897,9 +869,7 @@ do_pass: fp = prog; if (new_insn) { - new_insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - new_insn->a_reg = BPF_REG_CTX; - new_insn->x_reg = BPF_REG_ARG1; + *new_insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_CTX, BPF_REG_ARG1); } new_insn++; @@ -1027,34 +997,28 @@ do_pass: /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */ case BPF_LDX | BPF_MSH | BPF_B: - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = BPF_REG_TMP; - insn->x_reg = BPF_REG_A; + /* tmp = A */ + *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_TMP, BPF_REG_A); insn++; - insn->code = BPF_LD | BPF_ABS | BPF_B; - insn->a_reg = BPF_REG_A; - insn->imm = fp->k; + /* A = R0 = *(u8 *) (skb->data + K) */ + *insn = BPF_LD_ABS(BPF_B, fp->k); insn++; - insn->code = BPF_ALU | BPF_AND | BPF_K; - insn->a_reg = BPF_REG_A; - insn->imm = 0xf; + /* A &= 0xf */ + *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf); insn++; - insn->code = BPF_ALU | BPF_LSH | BPF_K; - insn->a_reg = BPF_REG_A; - insn->imm = 2; + /* A <<= 2 */ + *insn = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2); insn++; - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = BPF_REG_X; - insn->x_reg = BPF_REG_A; + /* X = A */ + *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_X, BPF_REG_A); insn++; - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_TMP; + /* A = tmp */ + *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_A, BPF_REG_TMP); break; /* RET_K, RET_A are remaped into 2 insns. */ @@ -1068,7 +1032,7 @@ do_pass: insn->imm = fp->k; insn++; - insn->code = BPF_JMP | BPF_EXIT; + *insn = BPF_EXIT_INSN(); break; /* Store to stack. */ @@ -1102,16 +1066,12 @@ do_pass: /* X = A */ case BPF_MISC | BPF_TAX: - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = BPF_REG_X; - insn->x_reg = BPF_REG_A; + *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_X, BPF_REG_A); break; /* A = X */ case BPF_MISC | BPF_TXA: - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_X; + *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_A, BPF_REG_X); break; /* A = skb->len or X = skb->len */ @@ -1126,10 +1086,8 @@ do_pass: /* access seccomp_data fields */ case BPF_LDX | BPF_ABS | BPF_W: - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_CTX; - insn->off = fp->k; + /* A = *(u32 *) (ctx + K) */ + *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k); break; default: -- cgit v1.2.3-71-gd317 From 7d87a7f709650bde4d7d63117f25ee1c095da5dd Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 9 Apr 2014 18:19:04 +0300 Subject: srm/i915/chv: Add Cherryview PCI IDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: Update to also fill in the new num_pipes field. v3: Rebase on top of the pciid extraction. v4: Switch from info->has*ring to info->ring mask. Also add VEBOX support whiel at it. v5: s/CHV_PCI_IDS/CHV_IDS/, and drop the trailing '\' Signed-off-by: Ville Syrjälä Reviewed-by: Jani Nikula Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 12 +++++++++++- include/drm/i915_pciids.h | 6 ++++++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 4024e16ae63d..6868bc0eabd3 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -299,6 +299,15 @@ static const struct intel_device_info intel_broadwell_gt3m_info = { GEN_DEFAULT_PIPEOFFSETS, }; +static const struct intel_device_info intel_cherryview_info = { + .is_preliminary = 1, + .gen = 8, .num_pipes = 2, + .need_gfx_hws = 1, .has_hotplug = 1, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .is_valleyview = 1, + .display_mmio_offset = VLV_DISPLAY_BASE, +}; + /* * Make sure any device matches here are from most specific to most * general. For example, since the Quanta match is based on the subsystem @@ -334,7 +343,8 @@ static const struct intel_device_info intel_broadwell_gt3m_info = { INTEL_BDW_GT12M_IDS(&intel_broadwell_m_info), \ INTEL_BDW_GT12D_IDS(&intel_broadwell_d_info), \ INTEL_BDW_GT3M_IDS(&intel_broadwell_gt3m_info), \ - INTEL_BDW_GT3D_IDS(&intel_broadwell_gt3d_info) + INTEL_BDW_GT3D_IDS(&intel_broadwell_gt3d_info), \ + INTEL_CHV_IDS(&intel_cherryview_info) static const struct pci_device_id pciidlist[] = { /* aka */ INTEL_PCI_IDS, diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 24f3cad045db..d18f31a77987 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -245,4 +245,10 @@ INTEL_BDW_GT12D_IDS(info), \ INTEL_BDW_GT3D_IDS(info) +#define INTEL_CHV_IDS(info) \ + INTEL_VGA_DEVICE(0x22b0, info), \ + INTEL_VGA_DEVICE(0x22b1, info), \ + INTEL_VGA_DEVICE(0x22b2, info), \ + INTEL_VGA_DEVICE(0x22b3, info) + #endif /* _I915_PCIIDS_H */ -- cgit v1.2.3-71-gd317 From 60ff746739bf805a912484643c720b6124826140 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sun, 4 May 2014 16:39:18 -0700 Subject: net: rename local_df to ignore_df MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As suggested by several people, rename local_df to ignore_df, since it means "ignore df bit if it is set". Cc: Maciej Żenczykowski Cc: Florian Westphal Cc: David S. Miller Cc: Eric Dumazet Signed-off-by: Cong Wang Acked-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++-- include/net/ip.h | 6 +++--- include/net/ip6_route.h | 2 +- net/core/skbuff.c | 4 ++-- net/ipv4/ip_forward.c | 2 +- net/ipv4/ip_output.c | 12 ++++++------ net/ipv4/netfilter/nf_defrag_ipv4.c | 2 +- net/ipv4/xfrm4_output.c | 2 +- net/ipv6/ip6_output.c | 12 ++++++------ net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- net/ipv6/xfrm6_output.c | 6 +++--- net/l2tp/l2tp_core.c | 2 +- net/netfilter/ipvs/ip_vs_xmit.c | 20 ++++++++++---------- net/openvswitch/vport-gre.c | 2 +- net/openvswitch/vport-vxlan.c | 2 +- net/sctp/ipv6.c | 2 +- net/sctp/output.c | 2 +- 17 files changed, 42 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3ca0dda5a42e..7a9beeb1c458 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -426,7 +426,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, * @csum_start: Offset from skb->head where checksumming should start * @csum_offset: Offset from csum_start where checksum should be stored * @priority: Packet queueing priority - * @local_df: allow local fragmentation + * @ignore_df: allow local fragmentation * @cloned: Head may be cloned (check refcnt to be sure) * @ip_summed: Driver fed us an IP checksum * @nohdr: Payload reference only, must not modify header @@ -514,7 +514,7 @@ struct sk_buff { }; __u32 priority; kmemcheck_bitfield_begin(flags1); - __u8 local_df:1, + __u8 ignore_df:1, cloned:1, ip_summed:2, nohdr:1, diff --git a/include/net/ip.h b/include/net/ip.h index 16146b667ddb..55752985c144 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -269,7 +269,7 @@ static inline bool ip_sk_use_pmtu(const struct sock *sk) return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE; } -static inline bool ip_sk_local_df(const struct sock *sk) +static inline bool ip_sk_ignore_df(const struct sock *sk) { return inet_sk(sk)->pmtudisc < IP_PMTUDISC_DO || inet_sk(sk)->pmtudisc == IP_PMTUDISC_OMIT; @@ -304,7 +304,7 @@ static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, s { struct iphdr *iph = ip_hdr(skb); - if ((iph->frag_off & htons(IP_DF)) && !skb->local_df) { + if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) { /* This is only to work around buggy Windows95/2000 * VJ compression implementations. If the ID field * does not change, they drop every other packet in @@ -320,7 +320,7 @@ static inline void ip_select_ident_more(struct sk_buff *skb, struct dst_entry *d { struct iphdr *iph = ip_hdr(skb); - if ((iph->frag_off & htons(IP_DF)) && !skb->local_df) { + if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) { if (sk && inet_sk(sk)->inet_daddr) { iph->id = htons(inet_sk(sk)->inet_id); inet_sk(sk)->inet_id += 1 + more; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 6c4f5eac98e7..38e41e4d0998 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -185,7 +185,7 @@ static inline bool ip6_sk_accept_pmtu(const struct sock *sk) inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_OMIT; } -static inline bool ip6_sk_local_df(const struct sock *sk) +static inline bool ip6_sk_ignore_df(const struct sock *sk) { return inet6_sk(sk)->pmtudisc < IPV6_PMTUDISC_DO || inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1b62343f5837..3d74530ae82b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -694,7 +694,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #endif memcpy(new->cb, old->cb, sizeof(old->cb)); new->csum = old->csum; - new->local_df = old->local_df; + new->ignore_df = old->ignore_df; new->pkt_type = old->pkt_type; new->ip_summed = old->ip_summed; skb_copy_queue_mapping(new, old); @@ -3913,7 +3913,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) skb->tstamp.tv64 = 0; skb->pkt_type = PACKET_HOST; skb->skb_iif = 0; - skb->local_df = 0; + skb->ignore_df = 0; skb_dst_drop(skb); skb->mark = 0; secpath_reset(skb); diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 6f111e48e11c..3a83ce5efa80 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -42,7 +42,7 @@ static bool ip_may_fragment(const struct sk_buff *skb) { return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) || - skb->local_df; + skb->ignore_df; } static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a52f50187b54..6aa4380fde1a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -415,7 +415,7 @@ packet_routed: skb_reset_network_header(skb); iph = ip_hdr(skb); *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); - if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df) + if (ip_dont_fragment(sk, &rt->dst) && !skb->ignore_df) iph->frag_off = htons(IP_DF); else iph->frag_off = 0; @@ -501,7 +501,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) iph = ip_hdr(skb); mtu = ip_skb_dst_mtu(skb); - if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) || + if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size > mtu))) { IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); @@ -866,7 +866,7 @@ static int __ip_append_data(struct sock *sk, fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; - maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu; + maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu; if (cork->length + length > maxnonfragsize - fragheaderlen) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, @@ -1189,7 +1189,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; - maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu; + maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu; if (cork->length + size > maxnonfragsize - fragheaderlen) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, @@ -1350,10 +1350,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk, * to fragment the frame generated here. No matter, what transforms * how transforms change size of the packet, it will come out. */ - skb->local_df = ip_sk_local_df(sk); + skb->ignore_df = ip_sk_ignore_df(sk); /* DF bit is set when we want to see DF on outgoing frames. - * If local_df is set too, we still allow to fragment this frame + * If ignore_df is set too, we still allow to fragment this frame * locally. */ if (inet->pmtudisc == IP_PMTUDISC_DO || inet->pmtudisc == IP_PMTUDISC_PROBE || diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index f40f321b41fc..b8f6381c7d0b 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -34,7 +34,7 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) if (!err) { ip_send_check(ip_hdr(skb)); - skb->local_df = 1; + skb->ignore_df = 1; } return err; diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 40e701f2e1e0..8e8c018d9d2d 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -25,7 +25,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) goto out; - if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df) + if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df) goto out; mtu = dst_mtu(skb_dst(skb)); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 31a38bde69ef..ab0cc57f779c 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -219,7 +219,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, skb->mark = sk->sk_mark; mtu = dst_mtu(dst); - if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { + if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUT, skb->len); return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, @@ -347,11 +347,11 @@ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) if (skb->len <= mtu) return false; - /* ipv6 conntrack defrag sets max_frag_size + local_df */ + /* ipv6 conntrack defrag sets max_frag_size + ignore_df */ if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) return true; - if (skb->local_df) + if (skb->ignore_df) return false; if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) @@ -559,7 +559,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) /* We must not fragment if the socket is set to force MTU discovery * or if the skb it not generated by a local socket. */ - if (unlikely(!skb->local_df && skb->len > mtu) || + if (unlikely(!skb->ignore_df && skb->len > mtu) || (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) { if (skb->sk && dst_allfrag(skb_dst(skb))) @@ -1234,7 +1234,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; - if (ip6_sk_local_df(sk)) + if (ip6_sk_ignore_df(sk)) maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; else maxnonfragsize = mtu; @@ -1544,7 +1544,7 @@ int ip6_push_pending_frames(struct sock *sk) } /* Allow local fragmentation. */ - skb->local_df = ip6_sk_local_df(sk); + skb->ignore_df = ip6_sk_ignore_df(sk); *final_dst = fl6->daddr; __skb_pull(skb, skb_network_header_len(skb)); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 767ab8da8218..0d5279fd852a 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -451,7 +451,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) } sub_frag_mem_limit(&fq->q, head->truesize); - head->local_df = 1; + head->ignore_df = 1; head->next = NULL; head->dev = dev; head->tstamp = fq->q.stamp; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 19ef329bdbf8..f47c8b153dd3 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -78,7 +78,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if (!skb->local_df && skb->len > mtu) { + if (!skb->ignore_df && skb->len > mtu) { skb->dev = dst->dev; if (xfrm6_local_dontfrag(skb)) @@ -120,7 +120,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) #endif skb->protocol = htons(ETH_P_IPV6); - skb->local_df = 1; + skb->ignore_df = 1; return x->outer_mode->output2(x, skb); } @@ -150,7 +150,7 @@ static int __xfrm6_output(struct sk_buff *skb) if (skb->len > mtu && xfrm6_local_dontfrag(skb)) { xfrm6_local_rxpmtu(skb, mtu); return -EMSGSIZE; - } else if (!skb->local_df && skb->len > mtu && skb->sk) { + } else if (!skb->ignore_df && skb->len > mtu && skb->sk) { xfrm_local_error(skb, mtu); return -EMSGSIZE; } diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index aa1a9d44c107..ed0716a075ba 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1073,7 +1073,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, } /* Queue the packet to IP for output */ - skb->local_df = 1; + skb->ignore_df = 1; #if IS_ENABLED(CONFIG_IPV6) if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped) error = inet6_csk_xmit(tunnel->sock, skb, NULL); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index c47444e4cf8c..487b55e04337 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -562,7 +562,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_send_check(iph); /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); rcu_read_unlock(); @@ -590,7 +590,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error; /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); rcu_read_unlock(); @@ -684,7 +684,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, MTU problem. */ /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); rcu_read_unlock(); @@ -774,7 +774,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, MTU problem. */ /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); rcu_read_unlock(); @@ -886,7 +886,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_select_ident(skb, &rt->dst, NULL); /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; ret = ip_vs_tunnel_xmit_prepare(skb, cp); if (ret == NF_ACCEPT) @@ -974,7 +974,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, iph->hop_limit = old_iph->hop_limit; /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; ret = ip_vs_tunnel_xmit_prepare(skb, cp); if (ret == NF_ACCEPT) @@ -1023,7 +1023,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_send_check(ip_hdr(skb)); /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); rcu_read_unlock(); @@ -1060,7 +1060,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); rcu_read_unlock(); @@ -1157,7 +1157,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_vs_nat_icmp(skb, pp, cp, 0); /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); rcu_read_unlock(); @@ -1249,7 +1249,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ip_vs_nat_icmp_v6(skb, pp, cp, 0); /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); rcu_read_unlock(); diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index ebb6e2442554..0856f014d7a9 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -172,7 +172,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; - skb->local_df = 1; + skb->ignore_df = 1; return iptunnel_xmit(skb->sk, rt, skb, fl.saddr, OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE, diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 21cceb3bdf78..a93efa3f64c3 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -170,7 +170,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; - skb->local_df = 1; + skb->ignore_df = 1; inet_get_local_port_range(net, &port_min, &port_max); src_port = vxlan_src_port(port_min, port_max, skb); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 2b1738ef9394..4dc5d9e08311 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -216,7 +216,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) IP6_ECN_flow_xmit(sk, fl6->flowlabel); if (!(transport->param_flags & SPP_PMTUD_ENABLE)) - skb->local_df = 1; + skb->ignore_df = 1; SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS); diff --git a/net/sctp/output.c b/net/sctp/output.c index 0f4d15fc2627..01ab8e0723f0 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -591,7 +591,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len); - nskb->local_df = packet->ipfragok; + nskb->ignore_df = packet->ipfragok; tp->af_specific->sctp_xmit(nskb, tp); out: -- cgit v1.2.3-71-gd317 From f06c7f9f92295faf701a9628b383156c4efb6119 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Fri, 9 May 2014 14:58:05 +0800 Subject: vlan: rename __vlan_find_dev_deep() to __vlan_find_dev_deep_rcu() The __vlan_find_dev_deep should always called in RCU, according David's suggestion, rename to __vlan_find_dev_deep_rcu looks more reasonable. Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c | 2 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 2 +- drivers/net/usb/cdc_mbim.c | 4 ++-- drivers/s390/net/qeth_l3_main.c | 10 +++++----- include/linux/if_vlan.h | 4 ++-- net/8021q/vlan_core.c | 6 +++--- net/bridge/br_netfilter.c | 2 +- 8 files changed, 16 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c index c0a9dd55f4e5..b0cbb2b7fd48 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c @@ -185,7 +185,7 @@ static struct net_device *get_iff_from_mac(struct adapter *adapter, if (ether_addr_equal(dev->dev_addr, mac)) { rcu_read_lock(); if (vlan && vlan != VLAN_VID_MASK) { - dev = __vlan_find_dev_deep(dev, htons(ETH_P_8021Q), vlan); + dev = __vlan_find_dev_deep_rcu(dev, htons(ETH_P_8021Q), vlan); } else if (netif_is_bond_slave(dev)) { struct net_device *upper_dev; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 8efeed3325b5..0f1e886d89e3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4068,7 +4068,7 @@ static int update_root_dev_clip(struct net_device *dev) /* Parse all bond and vlan devices layered on top of the physical dev */ for (i = 0; i < VLAN_N_VID; i++) { - root_dev = __vlan_find_dev_deep(dev, htons(ETH_P_8021Q), i); + root_dev = __vlan_find_dev_deep_rcu(dev, htons(ETH_P_8021Q), i); if (!root_dev) continue; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index d2e18b52caba..8a2aeb85e320 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -4146,7 +4146,7 @@ void qlcnic_restore_indev_addr(struct net_device *netdev, unsigned long event) rcu_read_lock(); for_each_set_bit(vid, adapter->vlans, VLAN_N_VID) { - dev = __vlan_find_dev_deep(netdev, htons(ETH_P_8021Q), vid); + dev = __vlan_find_dev_deep_rcu(netdev, htons(ETH_P_8021Q), vid); if (!dev) continue; qlcnic_config_indev_addr(adapter, dev, event); diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index 2e025ddcef21..0ab79fca822c 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -223,8 +223,8 @@ static void do_neigh_solicit(struct usbnet *dev, u8 *buf, u16 tci) /* need to send the NA on the VLAN dev, if any */ rcu_read_lock(); if (tci) { - netdev = __vlan_find_dev_deep(dev->net, htons(ETH_P_8021Q), - tci); + netdev = __vlan_find_dev_deep_rcu(dev->net, htons(ETH_P_8021Q), + tci); if (!netdev) { rcu_read_unlock(); return; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index c8d91d797ec8..bc2499a24884 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1659,7 +1659,7 @@ static void qeth_l3_add_vlan_mc(struct qeth_card *card) for_each_set_bit(vid, card->active_vlans, VLAN_N_VID) { struct net_device *netdev; - netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q), + netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q), vid); if (netdev == NULL || !(netdev->flags & IFF_UP)) @@ -1721,7 +1721,7 @@ static void qeth_l3_add_vlan_mc6(struct qeth_card *card) for_each_set_bit(vid, card->active_vlans, VLAN_N_VID) { struct net_device *netdev; - netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q), + netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q), vid); if (netdev == NULL || !(netdev->flags & IFF_UP)) @@ -1766,7 +1766,7 @@ static void qeth_l3_free_vlan_addresses4(struct qeth_card *card, QETH_CARD_TEXT(card, 4, "frvaddr4"); - netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q), vid); + netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q), vid); if (!netdev) return; in_dev = in_dev_get(netdev); @@ -1796,7 +1796,7 @@ static void qeth_l3_free_vlan_addresses6(struct qeth_card *card, QETH_CARD_TEXT(card, 4, "frvaddr6"); - netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q), vid); + netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q), vid); if (!netdev) return; in6_dev = in6_dev_get(netdev); @@ -2089,7 +2089,7 @@ static int qeth_l3_verify_vlan_dev(struct net_device *dev, struct net_device *netdev; rcu_read_lock(); - netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q), + netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q), vid); rcu_read_unlock(); if (netdev == dev) { diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 13bbbde00e68..8c0fb7f3a9a5 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -106,7 +106,7 @@ struct vlan_pcpu_stats { #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) -extern struct net_device *__vlan_find_dev_deep(struct net_device *real_dev, +extern struct net_device *__vlan_find_dev_deep_rcu(struct net_device *real_dev, __be16 vlan_proto, u16 vlan_id); extern struct net_device *vlan_dev_real_dev(const struct net_device *dev); extern u16 vlan_dev_vlan_id(const struct net_device *dev); @@ -199,7 +199,7 @@ extern void vlan_vids_del_by_dev(struct net_device *dev, extern bool vlan_uses_dev(const struct net_device *dev); #else static inline struct net_device * -__vlan_find_dev_deep(struct net_device *real_dev, +__vlan_find_dev_deep_rcu(struct net_device *real_dev, __be16 vlan_proto, u16 vlan_id) { return NULL; diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 3c32bd257b73..9012b1c922b6 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -63,7 +63,7 @@ bool vlan_do_receive(struct sk_buff **skbp) } /* Must be invoked with rcu_read_lock. */ -struct net_device *__vlan_find_dev_deep(struct net_device *dev, +struct net_device *__vlan_find_dev_deep_rcu(struct net_device *dev, __be16 vlan_proto, u16 vlan_id) { struct vlan_info *vlan_info = rcu_dereference(dev->vlan_info); @@ -81,13 +81,13 @@ struct net_device *__vlan_find_dev_deep(struct net_device *dev, upper_dev = netdev_master_upper_dev_get_rcu(dev); if (upper_dev) - return __vlan_find_dev_deep(upper_dev, + return __vlan_find_dev_deep_rcu(upper_dev, vlan_proto, vlan_id); } return NULL; } -EXPORT_SYMBOL(__vlan_find_dev_deep); +EXPORT_SYMBOL(__vlan_find_dev_deep_rcu); struct net_device *vlan_dev_real_dev(const struct net_device *dev) { diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 2acf7fa1fec6..a615264cf01a 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -535,7 +535,7 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb)) return br; - vlan = __vlan_find_dev_deep(br, skb->vlan_proto, + vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto, vlan_tx_tag_get(skb) & VLAN_VID_MASK); return vlan ? vlan : br; -- cgit v1.2.3-71-gd317 From 907abd51012b9b0b0b687e97d5ebadbddbc682fe Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Mon, 3 Mar 2014 11:36:43 +0900 Subject: mmc: dw_mmc: remove unused member variable. Since using the device-tree, didn't use the callback pointer. So removed the unused callback pointer. When the set_power callback is used, it should be added in future. Signed-off-by: Jaehoon Chung Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- drivers/mmc/host/dw_mmc.c | 33 +++------------------------------ include/linux/mmc/dw_mmc.h | 14 -------------- 2 files changed, 3 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 55c34cb702d4..81991eca5671 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -850,8 +850,6 @@ static void __dw_mci_start_request(struct dw_mci *host, u32 cmdflags; mrq = slot->mrq; - if (host->pdata->select_slot) - host->pdata->select_slot(slot->id); host->cur_slot = slot; host->mrq = mrq; @@ -985,17 +983,11 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) switch (ios->power_mode) { case MMC_POWER_UP: set_bit(DW_MMC_CARD_NEED_INIT, &slot->flags); - /* Power up slot */ - if (slot->host->pdata->setpower) - slot->host->pdata->setpower(slot->id, mmc->ocr_avail); regs = mci_readl(slot->host, PWREN); regs |= (1 << slot->id); mci_writel(slot->host, PWREN, regs); break; case MMC_POWER_OFF: - /* Power down slot */ - if (slot->host->pdata->setpower) - slot->host->pdata->setpower(slot->id, 0); regs = mci_readl(slot->host, PWREN); regs &= ~(1 << slot->id); mci_writel(slot->host, PWREN, regs); @@ -1009,13 +1001,10 @@ static int dw_mci_get_ro(struct mmc_host *mmc) { int read_only; struct dw_mci_slot *slot = mmc_priv(mmc); - struct dw_mci_board *brd = slot->host->pdata; /* Use platform get_ro function, else try on board write protect */ if (slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT) read_only = 0; - else if (brd->get_ro) - read_only = brd->get_ro(slot->id); else if (gpio_is_valid(slot->wp_gpio)) read_only = gpio_get_value(slot->wp_gpio); else @@ -1039,8 +1028,6 @@ static int dw_mci_get_cd(struct mmc_host *mmc) /* Use platform get_cd function, else try onboard card detect */ if (brd->quirks & DW_MCI_QUIRK_BROKEN_CARD_DETECTION) present = 1; - else if (brd->get_cd) - present = !brd->get_cd(slot->id); else if (!IS_ERR_VALUE(gpio_cd)) present = gpio_cd; else @@ -2138,17 +2125,7 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id) mmc->f_max = freq[1]; } - if (host->pdata->get_ocr) - mmc->ocr_avail = host->pdata->get_ocr(id); - else - mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; - - /* - * Start with slot power disabled, it will be enabled when a card - * is detected. - */ - if (host->pdata->setpower) - host->pdata->setpower(id, 0); + mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; if (host->pdata->caps) mmc->caps = host->pdata->caps; @@ -2217,10 +2194,6 @@ err_setup_bus: static void dw_mci_cleanup_slot(struct dw_mci_slot *slot, unsigned int id) { - /* Shutdown detect IRQ */ - if (slot->host->pdata->exit) - slot->host->pdata->exit(id); - /* Debugfs stuff is cleaned up by mmc core */ mmc_remove_host(slot->mmc); slot->host->slot[id] = NULL; @@ -2395,9 +2368,9 @@ int dw_mci_probe(struct dw_mci *host) } } - if (!host->pdata->select_slot && host->pdata->num_slots > 1) { + if (host->pdata->num_slots > 1) { dev_err(host->dev, - "Platform data must supply select_slot function\n"); + "Platform data must supply num_slots.\n"); return -ENODEV; } diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h index 6ce7d2cd3c7a..babaea93bca6 100644 --- a/include/linux/mmc/dw_mmc.h +++ b/include/linux/mmc/dw_mmc.h @@ -248,20 +248,6 @@ struct dw_mci_board { /* delay in mS before detecting cards after interrupt */ u32 detect_delay_ms; - int (*init)(u32 slot_id, irq_handler_t , void *); - int (*get_ro)(u32 slot_id); - int (*get_cd)(u32 slot_id); - int (*get_ocr)(u32 slot_id); - int (*get_bus_wd)(u32 slot_id); - /* - * Enable power to selected slot and set voltage to desired level. - * Voltage levels are specified using MMC_VDD_xxx defines defined - * in linux/mmc/host.h file. - */ - void (*setpower)(u32 slot_id, u32 volt); - void (*exit)(u32 slot_id); - void (*select_slot)(u32 slot_id); - struct dw_mci_dma_ops *dma_ops; struct dma_pdata *data; struct block_settings *blk_settings; -- cgit v1.2.3-71-gd317 From cdc991790c51c693d0c347a5286af017826a5d01 Mon Sep 17 00:00:00 2001 From: Seungwon Jeon Date: Wed, 23 Apr 2014 17:07:35 +0900 Subject: mmc: drop the speed mode of card's state Timing mode identifier has same role and can take the place of speed mode. This change removes all related speed mode. Signed-off-by: Seungwon Jeon Tested-by: Jaehoon Chung Acked-by: Jaehoon Chung Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- drivers/mmc/core/bus.c | 8 ++++---- drivers/mmc/core/core.c | 3 +-- drivers/mmc/core/mmc.c | 11 +++-------- drivers/mmc/core/sd.c | 16 +++------------- drivers/mmc/core/sd.h | 1 - drivers/mmc/core/sdio.c | 8 ++------ drivers/net/wireless/rsi/rsi_91x_sdio.c | 4 +--- include/linux/mmc/card.h | 23 ++++++----------------- include/linux/mmc/host.h | 23 +++++++++++++++++++++++ 9 files changed, 43 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index 824644875d41..f37e9d6af84a 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -341,16 +341,16 @@ int mmc_add_card(struct mmc_card *card) if (mmc_host_is_spi(card->host)) { pr_info("%s: new %s%s%s card on SPI\n", mmc_hostname(card->host), - mmc_card_highspeed(card) ? "high speed " : "", - mmc_card_ddr_mode(card) ? "DDR " : "", + mmc_card_hs(card) ? "high speed " : "", + mmc_card_ddr52(card) ? "DDR " : "", type); } else { pr_info("%s: new %s%s%s%s%s card at address %04x\n", mmc_hostname(card->host), mmc_card_uhs(card) ? "ultra high speed " : - (mmc_card_highspeed(card) ? "high speed " : ""), + (mmc_card_hs(card) ? "high speed " : ""), (mmc_card_hs200(card) ? "HS200 " : ""), - mmc_card_ddr_mode(card) ? "DDR " : "", + mmc_card_ddr52(card) ? "DDR " : "", uhs_bus_speed_mode, type, card->rca); } diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index d97dff5fab62..02baa30653fa 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -2192,7 +2192,7 @@ int mmc_set_blocklen(struct mmc_card *card, unsigned int blocklen) { struct mmc_command cmd = {0}; - if (mmc_card_blockaddr(card) || mmc_card_ddr_mode(card)) + if (mmc_card_blockaddr(card) || mmc_card_ddr52(card)) return 0; cmd.opcode = MMC_SET_BLOCKLEN; @@ -2272,7 +2272,6 @@ static int mmc_do_hw_reset(struct mmc_host *host, int check) } } - host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_DDR); if (mmc_host_is_spi(host)) { host->ios.chip_select = MMC_CS_HIGH; host->ios.bus_mode = MMC_BUSMODE_PUSHPULL; diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index fbcf93d81858..31220529e171 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1083,11 +1083,9 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, } else { if (card->ext_csd.hs_max_dtr > 52000000 && host->caps2 & MMC_CAP2_HS200) { - mmc_card_set_hs200(card); mmc_set_timing(card->host, MMC_TIMING_MMC_HS200); } else { - mmc_card_set_highspeed(card); mmc_set_timing(card->host, MMC_TIMING_MMC_HS); } } @@ -1098,10 +1096,10 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, */ max_dtr = (unsigned int)-1; - if (mmc_card_highspeed(card) || mmc_card_hs200(card)) { + if (mmc_card_hs(card) || mmc_card_hs200(card)) { if (max_dtr > card->ext_csd.hs_max_dtr) max_dtr = card->ext_csd.hs_max_dtr; - if (mmc_card_highspeed(card) && (max_dtr > 52000000)) + if (mmc_card_hs(card) && (max_dtr > 52000000)) max_dtr = 52000000; } else if (max_dtr > card->csd.max_dtr) { max_dtr = card->csd.max_dtr; @@ -1112,7 +1110,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, /* * Indicate DDR mode (if supported). */ - if (mmc_card_highspeed(card)) { + if (mmc_card_hs(card)) { if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_8V) && (host->caps & MMC_CAP_1_8V_DDR)) ddr = MMC_1_8V_DDR_MODE; @@ -1255,7 +1253,6 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, if (err) goto err; } - mmc_card_set_ddr_mode(card); mmc_set_timing(card->host, MMC_TIMING_MMC_DDR52); mmc_set_bus_width(card->host, bus_width); } @@ -1499,7 +1496,6 @@ static int _mmc_suspend(struct mmc_host *host, bool is_suspend) err = mmc_sleep(host); else if (!mmc_host_is_spi(host)) err = mmc_deselect_cards(host); - host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_200); if (!err) { mmc_power_off(host); @@ -1629,7 +1625,6 @@ static int mmc_power_restore(struct mmc_host *host) { int ret; - host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_200); mmc_claim_host(host); ret = mmc_init_card(host, host->card->ocr, host->card); mmc_release_host(host); diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index aef515755e5b..0c44510bf717 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -887,7 +887,7 @@ unsigned mmc_sd_get_max_clock(struct mmc_card *card) { unsigned max_dtr = (unsigned int)-1; - if (mmc_card_highspeed(card)) { + if (mmc_card_hs(card)) { if (max_dtr > card->sw_caps.hs_max_dtr) max_dtr = card->sw_caps.hs_max_dtr; } else if (max_dtr > card->csd.max_dtr) { @@ -897,12 +897,6 @@ unsigned mmc_sd_get_max_clock(struct mmc_card *card) return max_dtr; } -void mmc_sd_go_highspeed(struct mmc_card *card) -{ - mmc_card_set_highspeed(card); - mmc_set_timing(card->host, MMC_TIMING_SD_HS); -} - /* * Handle the detection and initialisation of a card. * @@ -977,16 +971,13 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr, err = mmc_sd_init_uhs_card(card); if (err) goto free_card; - - /* Card is an ultra-high-speed card */ - mmc_card_set_uhs(card); } else { /* * Attempt to change to high-speed (if supported) */ err = mmc_sd_switch_hs(card); if (err > 0) - mmc_sd_go_highspeed(card); + mmc_set_timing(card->host, MMC_TIMING_SD_HS); else if (err) goto free_card; @@ -1081,7 +1072,7 @@ static int _mmc_sd_suspend(struct mmc_host *host) if (!mmc_host_is_spi(host)) err = mmc_deselect_cards(host); - host->card->state &= ~MMC_STATE_HIGHSPEED; + if (!err) { mmc_power_off(host); mmc_card_set_suspended(host->card); @@ -1190,7 +1181,6 @@ static int mmc_sd_power_restore(struct mmc_host *host) { int ret; - host->card->state &= ~MMC_STATE_HIGHSPEED; mmc_claim_host(host); ret = mmc_sd_init_card(host, host->card->ocr, host->card); mmc_release_host(host); diff --git a/drivers/mmc/core/sd.h b/drivers/mmc/core/sd.h index 4b34b24f3f76..aab824a9a7f3 100644 --- a/drivers/mmc/core/sd.h +++ b/drivers/mmc/core/sd.h @@ -12,6 +12,5 @@ int mmc_sd_setup_card(struct mmc_host *host, struct mmc_card *card, bool reinit); unsigned mmc_sd_get_max_clock(struct mmc_card *card); int mmc_sd_switch_hs(struct mmc_card *card); -void mmc_sd_go_highspeed(struct mmc_card *card); #endif diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 9933e426bc36..e636d9e99e4a 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -363,7 +363,7 @@ static unsigned mmc_sdio_get_max_clock(struct mmc_card *card) { unsigned max_dtr; - if (mmc_card_highspeed(card)) { + if (mmc_card_hs(card)) { /* * The SDIO specification doesn't mention how * the CIS transfer speed register relates to @@ -733,7 +733,6 @@ try_again: mmc_set_clock(host, card->cis.max_dtr); if (card->cccr.high_speed) { - mmc_card_set_highspeed(card); mmc_set_timing(card->host, MMC_TIMING_SD_HS); } @@ -792,16 +791,13 @@ try_again: err = mmc_sdio_init_uhs_card(card); if (err) goto remove; - - /* Card is an ultra-high-speed card */ - mmc_card_set_uhs(card); } else { /* * Switch to high-speed (if supported). */ err = sdio_enable_hs(card); if (err > 0) - mmc_sd_go_highspeed(card); + mmc_set_timing(card->host, MMC_TIMING_SD_HS); else if (err) goto remove; diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c index 2e39d38d6a9e..46e7af446f01 100644 --- a/drivers/net/wireless/rsi/rsi_91x_sdio.c +++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c @@ -285,7 +285,6 @@ static void rsi_reset_card(struct sdio_func *pfunction) if (err) { rsi_dbg(ERR_ZONE, "%s: CCCR speed reg read failed: %d\n", __func__, err); - card->state &= ~MMC_STATE_HIGHSPEED; } else { err = rsi_cmd52writebyte(card, SDIO_CCCR_SPEED, @@ -296,14 +295,13 @@ static void rsi_reset_card(struct sdio_func *pfunction) __func__, err); return; } - mmc_card_set_highspeed(card); host->ios.timing = MMC_TIMING_SD_HS; host->ops->set_ios(host, &host->ios); } } /* Set clock */ - if (mmc_card_highspeed(card)) + if (mmc_card_hs(card)) clock = 50000000; else clock = card->cis.max_dtr; diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index aa7e57f60fb2..aadeaf155d0e 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -194,6 +194,7 @@ struct sdio_cis { }; struct mmc_host; +struct mmc_ios; struct sdio_func; struct sdio_func_tuple; @@ -250,15 +251,11 @@ struct mmc_card { unsigned int state; /* (our) card state */ #define MMC_STATE_PRESENT (1<<0) /* present in sysfs */ #define MMC_STATE_READONLY (1<<1) /* card is read-only */ -#define MMC_STATE_HIGHSPEED (1<<2) /* card is in high speed mode */ -#define MMC_STATE_BLOCKADDR (1<<3) /* card uses block-addressing */ -#define MMC_STATE_HIGHSPEED_DDR (1<<4) /* card is in high speed mode */ -#define MMC_STATE_ULTRAHIGHSPEED (1<<5) /* card is in ultra high speed mode */ -#define MMC_CARD_SDXC (1<<6) /* card is SDXC */ -#define MMC_CARD_REMOVED (1<<7) /* card has been removed */ -#define MMC_STATE_HIGHSPEED_200 (1<<8) /* card is in HS200 mode */ -#define MMC_STATE_DOING_BKOPS (1<<10) /* card is doing BKOPS */ -#define MMC_STATE_SUSPENDED (1<<11) /* card is suspended */ +#define MMC_STATE_BLOCKADDR (1<<2) /* card uses block-addressing */ +#define MMC_CARD_SDXC (1<<3) /* card is SDXC */ +#define MMC_CARD_REMOVED (1<<4) /* card has been removed */ +#define MMC_STATE_DOING_BKOPS (1<<5) /* card is doing BKOPS */ +#define MMC_STATE_SUSPENDED (1<<6) /* card is suspended */ unsigned int quirks; /* card quirks */ #define MMC_QUIRK_LENIENT_FN0 (1<<0) /* allow SDIO FN0 writes outside of the VS CCCR range */ #define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1) /* use func->cur_blksize */ @@ -418,11 +415,7 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data) #define mmc_card_present(c) ((c)->state & MMC_STATE_PRESENT) #define mmc_card_readonly(c) ((c)->state & MMC_STATE_READONLY) -#define mmc_card_highspeed(c) ((c)->state & MMC_STATE_HIGHSPEED) -#define mmc_card_hs200(c) ((c)->state & MMC_STATE_HIGHSPEED_200) #define mmc_card_blockaddr(c) ((c)->state & MMC_STATE_BLOCKADDR) -#define mmc_card_ddr_mode(c) ((c)->state & MMC_STATE_HIGHSPEED_DDR) -#define mmc_card_uhs(c) ((c)->state & MMC_STATE_ULTRAHIGHSPEED) #define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC) #define mmc_card_removed(c) ((c) && ((c)->state & MMC_CARD_REMOVED)) #define mmc_card_doing_bkops(c) ((c)->state & MMC_STATE_DOING_BKOPS) @@ -430,11 +423,7 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data) #define mmc_card_set_present(c) ((c)->state |= MMC_STATE_PRESENT) #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY) -#define mmc_card_set_highspeed(c) ((c)->state |= MMC_STATE_HIGHSPEED) -#define mmc_card_set_hs200(c) ((c)->state |= MMC_STATE_HIGHSPEED_200) #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR) -#define mmc_card_set_ddr_mode(c) ((c)->state |= MMC_STATE_HIGHSPEED_DDR) -#define mmc_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED) #define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC) #define mmc_card_set_removed(c) ((c)->state |= MMC_CARD_REMOVED) #define mmc_card_set_doing_bkops(c) ((c)->state |= MMC_STATE_DOING_BKOPS) diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 0cf705c83998..a43853779799 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -17,6 +17,7 @@ #include #include +#include #include struct mmc_ios { @@ -478,4 +479,26 @@ static inline unsigned int mmc_host_clk_rate(struct mmc_host *host) return host->ios.clock; } #endif + +static inline int mmc_card_hs(struct mmc_card *card) +{ + return card->host->ios.timing == MMC_TIMING_SD_HS || + card->host->ios.timing == MMC_TIMING_MMC_HS; +} + +static inline int mmc_card_uhs(struct mmc_card *card) +{ + return card->host->ios.timing >= MMC_TIMING_UHS_SDR12 && + card->host->ios.timing <= MMC_TIMING_UHS_DDR50; +} + +static inline bool mmc_card_hs200(struct mmc_card *card) +{ + return card->host->ios.timing == MMC_TIMING_MMC_HS200; +} + +static inline bool mmc_card_ddr52(struct mmc_card *card) +{ + return card->host->ios.timing == MMC_TIMING_MMC_DDR52; +} #endif /* LINUX_MMC_HOST_H */ -- cgit v1.2.3-71-gd317 From 2415c0ef618b3cd95581c7f633cbab78b29b7ab0 Mon Sep 17 00:00:00 2001 From: Seungwon Jeon Date: Wed, 23 Apr 2014 17:07:58 +0900 Subject: mmc: identify available device type to select Device types which are supported by both host and device can be identified when EXT_CSD is read. There is no need to check host's capability anymore. Signed-off-by: Seungwon Jeon Tested-by: Jaehoon Chung Acked-by: Jaehoon Chung Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- drivers/mmc/core/mmc.c | 72 ++++++++++++++++++++++++++---------------------- include/linux/mmc/card.h | 2 +- include/linux/mmc/host.h | 6 ---- include/linux/mmc/mmc.h | 12 +++++--- 4 files changed, 48 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 31220529e171..b5691fee9629 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -243,28 +243,46 @@ static void mmc_select_card_type(struct mmc_card *card) u8 card_type = card->ext_csd.raw_card_type & EXT_CSD_CARD_TYPE_MASK; u32 caps = host->caps, caps2 = host->caps2; unsigned int hs_max_dtr = 0; + unsigned int avail_type = 0; - if (card_type & EXT_CSD_CARD_TYPE_26) + if (caps & MMC_CAP_MMC_HIGHSPEED && + card_type & EXT_CSD_CARD_TYPE_HS_26) { hs_max_dtr = MMC_HIGH_26_MAX_DTR; + avail_type |= EXT_CSD_CARD_TYPE_HS_26; + } if (caps & MMC_CAP_MMC_HIGHSPEED && - card_type & EXT_CSD_CARD_TYPE_52) + card_type & EXT_CSD_CARD_TYPE_HS_52) { hs_max_dtr = MMC_HIGH_52_MAX_DTR; + avail_type |= EXT_CSD_CARD_TYPE_HS_52; + } - if ((caps & MMC_CAP_1_8V_DDR && - card_type & EXT_CSD_CARD_TYPE_DDR_1_8V) || - (caps & MMC_CAP_1_2V_DDR && - card_type & EXT_CSD_CARD_TYPE_DDR_1_2V)) + if (caps & MMC_CAP_1_8V_DDR && + card_type & EXT_CSD_CARD_TYPE_DDR_1_8V) { hs_max_dtr = MMC_HIGH_DDR_MAX_DTR; + avail_type |= EXT_CSD_CARD_TYPE_DDR_1_8V; + } + + if (caps & MMC_CAP_1_2V_DDR && + card_type & EXT_CSD_CARD_TYPE_DDR_1_2V) { + hs_max_dtr = MMC_HIGH_DDR_MAX_DTR; + avail_type |= EXT_CSD_CARD_TYPE_DDR_1_2V; + } - if ((caps2 & MMC_CAP2_HS200_1_8V_SDR && - card_type & EXT_CSD_CARD_TYPE_SDR_1_8V) || - (caps2 & MMC_CAP2_HS200_1_2V_SDR && - card_type & EXT_CSD_CARD_TYPE_SDR_1_2V)) + if (caps2 & MMC_CAP2_HS200_1_8V_SDR && + card_type & EXT_CSD_CARD_TYPE_HS200_1_8V) { hs_max_dtr = MMC_HS200_MAX_DTR; + avail_type |= EXT_CSD_CARD_TYPE_HS200_1_8V; + } + + if (caps2 & MMC_CAP2_HS200_1_2V_SDR && + card_type & EXT_CSD_CARD_TYPE_HS200_1_2V) { + hs_max_dtr = MMC_HS200_MAX_DTR; + avail_type |= EXT_CSD_CARD_TYPE_HS200_1_2V; + } card->ext_csd.hs_max_dtr = hs_max_dtr; - card->ext_csd.card_type = card_type; + card->mmc_avail_type = avail_type; } /* @@ -800,12 +818,10 @@ static int mmc_select_hs200(struct mmc_card *card) host = card->host; - if (card->ext_csd.card_type & EXT_CSD_CARD_TYPE_SDR_1_2V && - host->caps2 & MMC_CAP2_HS200_1_2V_SDR) + if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_2V) err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120); - if (err && card->ext_csd.card_type & EXT_CSD_CARD_TYPE_SDR_1_8V && - host->caps2 & MMC_CAP2_HS200_1_8V_SDR) + if (err && card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_8V) err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180); /* If fails try again during next card power cycle */ @@ -1064,10 +1080,9 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, */ if (card->ext_csd.hs_max_dtr != 0) { err = 0; - if (card->ext_csd.hs_max_dtr > 52000000 && - host->caps2 & MMC_CAP2_HS200) + if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200) err = mmc_select_hs200(card); - else if (host->caps & MMC_CAP_MMC_HIGHSPEED) + else if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS) err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_HS_TIMING, 1, card->ext_csd.generic_cmd6_time, @@ -1081,13 +1096,11 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, mmc_hostname(card->host)); err = 0; } else { - if (card->ext_csd.hs_max_dtr > 52000000 && - host->caps2 & MMC_CAP2_HS200) { + if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200) mmc_set_timing(card->host, MMC_TIMING_MMC_HS200); - } else { + else mmc_set_timing(card->host, MMC_TIMING_MMC_HS); - } } } @@ -1110,14 +1123,8 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, /* * Indicate DDR mode (if supported). */ - if (mmc_card_hs(card)) { - if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_8V) - && (host->caps & MMC_CAP_1_8V_DDR)) - ddr = MMC_1_8V_DDR_MODE; - else if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_2V) - && (host->caps & MMC_CAP_1_2V_DDR)) - ddr = MMC_1_2V_DDR_MODE; - } + if (mmc_card_hs(card)) + ddr = card->mmc_avail_type & EXT_CSD_CARD_TYPE_DDR_52; /* * Indicate HS200 SDR mode (if supported). @@ -1137,8 +1144,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, * 3. set the clock to > 52Mhz <=200MHz and * 4. execute tuning for HS200 */ - if ((host->caps2 & MMC_CAP2_HS200) && - card->host->ops->execute_tuning) { + if (card->host->ops->execute_tuning) { mmc_host_clk_hold(card->host); err = card->host->ops->execute_tuning(card->host, MMC_SEND_TUNING_BLOCK_HS200); @@ -1247,7 +1253,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, * * WARNING: eMMC rules are NOT the same as SD DDR */ - if (ddr == MMC_1_2V_DDR_MODE) { + if (ddr & EXT_CSD_CARD_TYPE_DDR_1_2V) { err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120); if (err) diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index aadeaf155d0e..fe31f8d89a03 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -68,7 +68,6 @@ struct mmc_ext_csd { #define MMC_HIGH_DDR_MAX_DTR 52000000 #define MMC_HS200_MAX_DTR 200000000 unsigned int sectors; - unsigned int card_type; unsigned int hc_erase_size; /* In sectors */ unsigned int hc_erase_timeout; /* In milliseconds */ unsigned int sec_trim_mult; /* Secure trim multiplier */ @@ -298,6 +297,7 @@ struct mmc_card { struct sdio_func_tuple *tuples; /* unknown common tuples */ unsigned int sd_bus_speed; /* Bus Speed Mode set for the card */ + unsigned int mmc_avail_type; /* supported device type by both host and card */ struct dentry *debugfs_root; struct mmc_part part[MMC_NUM_PHY_PARTITION]; /* physical partitions */ diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index a43853779799..6b1e9ee6ca10 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -62,12 +62,6 @@ struct mmc_ios { #define MMC_TIMING_MMC_DDR52 8 #define MMC_TIMING_MMC_HS200 9 -#define MMC_SDR_MODE 0 -#define MMC_1_2V_DDR_MODE 1 -#define MMC_1_8V_DDR_MODE 2 -#define MMC_1_2V_SDR_MODE 3 -#define MMC_1_8V_SDR_MODE 4 - unsigned char signal_voltage; /* signalling voltage (1.8V or 3.3V) */ #define MMC_SIGNAL_VOLTAGE_330 0 diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 50bcde3677ca..f734c0c64575 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -354,18 +354,22 @@ struct _mmc_csd { #define EXT_CSD_CMD_SET_SECURE (1<<1) #define EXT_CSD_CMD_SET_CPSECURE (1<<2) -#define EXT_CSD_CARD_TYPE_26 (1<<0) /* Card can run at 26MHz */ -#define EXT_CSD_CARD_TYPE_52 (1<<1) /* Card can run at 52MHz */ #define EXT_CSD_CARD_TYPE_MASK 0x3F /* Mask out reserved bits */ +#define EXT_CSD_CARD_TYPE_HS_26 (1<<0) /* Card can run at 26MHz */ +#define EXT_CSD_CARD_TYPE_HS_52 (1<<1) /* Card can run at 52MHz */ +#define EXT_CSD_CARD_TYPE_HS (EXT_CSD_CARD_TYPE_HS_26 | \ + EXT_CSD_CARD_TYPE_HS_52) #define EXT_CSD_CARD_TYPE_DDR_1_8V (1<<2) /* Card can run at 52MHz */ /* DDR mode @1.8V or 3V I/O */ #define EXT_CSD_CARD_TYPE_DDR_1_2V (1<<3) /* Card can run at 52MHz */ /* DDR mode @1.2V I/O */ #define EXT_CSD_CARD_TYPE_DDR_52 (EXT_CSD_CARD_TYPE_DDR_1_8V \ | EXT_CSD_CARD_TYPE_DDR_1_2V) -#define EXT_CSD_CARD_TYPE_SDR_1_8V (1<<4) /* Card can run at 200MHz */ -#define EXT_CSD_CARD_TYPE_SDR_1_2V (1<<5) /* Card can run at 200MHz */ +#define EXT_CSD_CARD_TYPE_HS200_1_8V (1<<4) /* Card can run at 200MHz */ +#define EXT_CSD_CARD_TYPE_HS200_1_2V (1<<5) /* Card can run at 200MHz */ /* SDR mode @1.2V I/O */ +#define EXT_CSD_CARD_TYPE_HS200 (EXT_CSD_CARD_TYPE_HS200_1_8V | \ + EXT_CSD_CARD_TYPE_HS200_1_2V) #define EXT_CSD_BUS_WIDTH_1 0 /* Card is in 1 bit mode */ #define EXT_CSD_BUS_WIDTH_4 1 /* Card is in 4 bit mode */ -- cgit v1.2.3-71-gd317 From 577fb13199b11d8cd75609183649be4b5561243f Mon Sep 17 00:00:00 2001 From: Seungwon Jeon Date: Wed, 23 Apr 2014 17:08:44 +0900 Subject: mmc: rework selection of bus speed mode Current implementation for bus speed mode selection is too complicated. This patch is to simplify the codes and remove some duplicate parts. The following changes are including: * Adds functions for each mode selection(HS, HS-DDR, HS200 and etc) * Rearranged the mode selection sequence with supported device type * Adds maximum speed for HS200 mode(hs200_max_dtr) * Adds field definition for HS_TIMING of EXT_CSD Signed-off-by: Seungwon Jeon Tested-by: Jaehoon Chung Acked-by: Jaehoon Chung Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- drivers/mmc/core/debugfs.c | 2 +- drivers/mmc/core/mmc.c | 431 ++++++++++++++++++++++++--------------------- include/linux/mmc/card.h | 1 + include/linux/mmc/mmc.h | 4 + 4 files changed, 238 insertions(+), 200 deletions(-) (limited to 'include') diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c index 509229b48b55..1f730dbfaeea 100644 --- a/drivers/mmc/core/debugfs.c +++ b/drivers/mmc/core/debugfs.c @@ -139,7 +139,7 @@ static int mmc_ios_show(struct seq_file *s, void *data) str = "mmc DDR52"; break; case MMC_TIMING_MMC_HS200: - str = "mmc high-speed SDR200"; + str = "mmc HS200"; break; default: str = "invalid"; diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 4538541ac5ab..bec6786efd19 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -242,7 +242,7 @@ static void mmc_select_card_type(struct mmc_card *card) struct mmc_host *host = card->host; u8 card_type = card->ext_csd.raw_card_type & EXT_CSD_CARD_TYPE_MASK; u32 caps = host->caps, caps2 = host->caps2; - unsigned int hs_max_dtr = 0; + unsigned int hs_max_dtr = 0, hs200_max_dtr = 0; unsigned int avail_type = 0; if (caps & MMC_CAP_MMC_HIGHSPEED && @@ -271,17 +271,18 @@ static void mmc_select_card_type(struct mmc_card *card) if (caps2 & MMC_CAP2_HS200_1_8V_SDR && card_type & EXT_CSD_CARD_TYPE_HS200_1_8V) { - hs_max_dtr = MMC_HS200_MAX_DTR; + hs200_max_dtr = MMC_HS200_MAX_DTR; avail_type |= EXT_CSD_CARD_TYPE_HS200_1_8V; } if (caps2 & MMC_CAP2_HS200_1_2V_SDR && card_type & EXT_CSD_CARD_TYPE_HS200_1_2V) { - hs_max_dtr = MMC_HS200_MAX_DTR; + hs200_max_dtr = MMC_HS200_MAX_DTR; avail_type |= EXT_CSD_CARD_TYPE_HS200_1_2V; } card->ext_csd.hs_max_dtr = hs_max_dtr; + card->ext_csd.hs200_max_dtr = hs200_max_dtr; card->mmc_avail_type = avail_type; } @@ -825,37 +826,46 @@ static int mmc_select_powerclass(struct mmc_card *card) } /* - * Selects the desired buswidth and switch to the HS200 mode - * if bus width set without error + * Set the bus speed for the selected speed mode. */ -static int mmc_select_hs200(struct mmc_card *card) +static void mmc_set_bus_speed(struct mmc_card *card) +{ + unsigned int max_dtr = (unsigned int)-1; + + if (mmc_card_hs200(card) && max_dtr > card->ext_csd.hs200_max_dtr) + max_dtr = card->ext_csd.hs200_max_dtr; + else if (mmc_card_hs(card) && max_dtr > card->ext_csd.hs_max_dtr) + max_dtr = card->ext_csd.hs_max_dtr; + else if (max_dtr > card->csd.max_dtr) + max_dtr = card->csd.max_dtr; + + mmc_set_clock(card->host, max_dtr); +} + +/* + * Select the bus width amoung 4-bit and 8-bit(SDR). + * If the bus width is changed successfully, return the selected width value. + * Zero is returned instead of error value if the wide width is not supported. + */ +static int mmc_select_bus_width(struct mmc_card *card) { - int idx, err = -EINVAL; - struct mmc_host *host; static unsigned ext_csd_bits[] = { - EXT_CSD_BUS_WIDTH_4, EXT_CSD_BUS_WIDTH_8, + EXT_CSD_BUS_WIDTH_4, }; static unsigned bus_widths[] = { - MMC_BUS_WIDTH_4, MMC_BUS_WIDTH_8, + MMC_BUS_WIDTH_4, }; + struct mmc_host *host = card->host; + unsigned idx, bus_width = 0; + int err = 0; - BUG_ON(!card); - - host = card->host; - - if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_2V) - err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120); - - if (err && card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_8V) - err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180); - - /* If fails try again during next card power cycle */ - if (err) - goto err; + if ((card->csd.mmca_vsn < CSD_SPEC_VER_4) && + !(host->caps & (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA))) + return 0; - idx = (host->caps & MMC_CAP_8_BIT_DATA) ? 1 : 0; + idx = (host->caps & MMC_CAP_8_BIT_DATA) ? 0 : 1; /* * Unlike SD, MMC cards dont have a configuration register to notify @@ -863,8 +873,7 @@ static int mmc_select_hs200(struct mmc_card *card) * the supported bus width or compare the ext csd values of current * bus width and ext csd values of 1 bit mode read earlier. */ - for (; idx >= 0; idx--) { - + for (; idx < ARRAY_SIZE(bus_widths); idx++) { /* * Host is capable of 8bit transfer, then switch * the device to work in 8bit transfer mode. If the @@ -879,26 +888,201 @@ static int mmc_select_hs200(struct mmc_card *card) if (err) continue; - mmc_set_bus_width(card->host, bus_widths[idx]); + bus_width = bus_widths[idx]; + mmc_set_bus_width(host, bus_width); + /* + * If controller can't handle bus width test, + * compare ext_csd previously read in 1 bit mode + * against ext_csd at new bus width + */ if (!(host->caps & MMC_CAP_BUS_WIDTH_TEST)) - err = mmc_compare_ext_csds(card, bus_widths[idx]); + err = mmc_compare_ext_csds(card, bus_width); else - err = mmc_bus_test(card, bus_widths[idx]); - if (!err) + err = mmc_bus_test(card, bus_width); + + if (!err) { + err = bus_width; break; + } else { + pr_warn("%s: switch to bus width %d failed\n", + mmc_hostname(host), ext_csd_bits[idx]); + } } - /* switch to HS200 mode if bus width set successfully */ + return err; +} + +/* + * Switch to the high-speed mode + */ +static int mmc_select_hs(struct mmc_card *card) +{ + int err; + + err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, + EXT_CSD_HS_TIMING, EXT_CSD_TIMING_HS, + card->ext_csd.generic_cmd6_time, + true, true, true); if (!err) + mmc_set_timing(card->host, MMC_TIMING_MMC_HS); + + return err; +} + +/* + * Activate wide bus and DDR if supported. + */ +static int mmc_select_hs_ddr(struct mmc_card *card) +{ + struct mmc_host *host = card->host; + u32 bus_width, ext_csd_bits; + int err = 0; + + if (!(card->mmc_avail_type & EXT_CSD_CARD_TYPE_DDR_52)) + return 0; + + bus_width = host->ios.bus_width; + if (bus_width == MMC_BUS_WIDTH_1) + return 0; + + ext_csd_bits = (bus_width == MMC_BUS_WIDTH_8) ? + EXT_CSD_DDR_BUS_WIDTH_8 : EXT_CSD_DDR_BUS_WIDTH_4; + + err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, + EXT_CSD_BUS_WIDTH, + ext_csd_bits, + card->ext_csd.generic_cmd6_time); + if (err) { + pr_warn("%s: switch to bus width %d ddr failed\n", + mmc_hostname(host), 1 << bus_width); + return err; + } + + /* + * eMMC cards can support 3.3V to 1.2V i/o (vccq) + * signaling. + * + * EXT_CSD_CARD_TYPE_DDR_1_8V means 3.3V or 1.8V vccq. + * + * 1.8V vccq at 3.3V core voltage (vcc) is not required + * in the JEDEC spec for DDR. + * + * Do not force change in vccq since we are obviously + * working and no change to vccq is needed. + * + * WARNING: eMMC rules are NOT the same as SD DDR + */ + if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_DDR_1_2V) { + err = __mmc_set_signal_voltage(host, + MMC_SIGNAL_VOLTAGE_120); + if (err) + return err; + } + + mmc_set_timing(host, MMC_TIMING_MMC_DDR52); + + return err; +} + +/* + * For device supporting HS200 mode, the following sequence + * should be done before executing the tuning process. + * 1. set the desired bus width(4-bit or 8-bit, 1-bit is not supported) + * 2. switch to HS200 mode + * 3. set the clock to > 52Mhz and <=200MHz + */ +static int mmc_select_hs200(struct mmc_card *card) +{ + struct mmc_host *host = card->host; + int err = -EINVAL; + + if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_2V) + err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120); + + if (err && card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_8V) + err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180); + + /* If fails try again during next card power cycle */ + if (err) + goto err; + + /* + * Set the bus width(4 or 8) with host's support and + * switch to HS200 mode if bus width is set successfully. + */ + err = mmc_select_bus_width(card); + if (!IS_ERR_VALUE(err)) { err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, - EXT_CSD_HS_TIMING, 2, - card->ext_csd.generic_cmd6_time, - true, true, true); + EXT_CSD_HS_TIMING, EXT_CSD_TIMING_HS200, + card->ext_csd.generic_cmd6_time, + true, true, true); + if (!err) + mmc_set_timing(host, MMC_TIMING_MMC_HS200); + } err: return err; } +/* + * Activate High Speed or HS200 mode if supported. + */ +static int mmc_select_timing(struct mmc_card *card) +{ + int err = 0; + + if ((card->csd.mmca_vsn < CSD_SPEC_VER_4 && + card->ext_csd.hs_max_dtr == 0)) + goto bus_speed; + + if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200) + err = mmc_select_hs200(card); + else if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS) + err = mmc_select_hs(card); + + if (err && err != -EBADMSG) + return err; + + if (err) { + pr_warn("%s: switch to %s failed\n", + mmc_card_hs(card) ? "high-speed" : + (mmc_card_hs200(card) ? "hs200" : ""), + mmc_hostname(card->host)); + err = 0; + } + +bus_speed: + /* + * Set the bus speed to the selected bus timing. + * If timing is not selected, backward compatible is the default. + */ + mmc_set_bus_speed(card); + return err; +} + +/* + * Execute tuning sequence to seek the proper bus operating + * conditions for HS200, which sends CMD21 to the device. + */ +static int mmc_hs200_tuning(struct mmc_card *card) +{ + struct mmc_host *host = card->host; + int err = 0; + + if (host->ops->execute_tuning) { + mmc_host_clk_hold(host); + err = host->ops->execute_tuning(host, + MMC_SEND_TUNING_BLOCK_HS200); + mmc_host_clk_release(host); + + if (err) + pr_warn("%s: tuning execution failed\n", + mmc_hostname(host)); + } + + return err; +} + /* * Handle the detection and initialisation of a card. * @@ -909,9 +1093,8 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, struct mmc_card *oldcard) { struct mmc_card *card; - int err, ddr = 0; + int err; u32 cid[4]; - unsigned int max_dtr; u32 rocr; u8 *ext_csd = NULL; @@ -1103,173 +1286,23 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, } /* - * Activate high speed (if supported) - */ - if (card->ext_csd.hs_max_dtr != 0) { - err = 0; - if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200) - err = mmc_select_hs200(card); - else if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS) - err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, - EXT_CSD_HS_TIMING, 1, - card->ext_csd.generic_cmd6_time, - true, true, true); - - if (err && err != -EBADMSG) - goto free_card; - - if (err) { - pr_warning("%s: switch to highspeed failed\n", - mmc_hostname(card->host)); - err = 0; - } else { - if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200) - mmc_set_timing(card->host, - MMC_TIMING_MMC_HS200); - else - mmc_set_timing(card->host, MMC_TIMING_MMC_HS); - } - } - - /* - * Compute bus speed. - */ - max_dtr = (unsigned int)-1; - - if (mmc_card_hs(card) || mmc_card_hs200(card)) { - if (max_dtr > card->ext_csd.hs_max_dtr) - max_dtr = card->ext_csd.hs_max_dtr; - if (mmc_card_hs(card) && (max_dtr > 52000000)) - max_dtr = 52000000; - } else if (max_dtr > card->csd.max_dtr) { - max_dtr = card->csd.max_dtr; - } - - mmc_set_clock(host, max_dtr); - - /* - * Indicate DDR mode (if supported). + * Select timing interface */ - if (mmc_card_hs(card)) - ddr = card->mmc_avail_type & EXT_CSD_CARD_TYPE_DDR_52; + err = mmc_select_timing(card); + if (err) + goto free_card; - /* - * Indicate HS200 SDR mode (if supported). - */ if (mmc_card_hs200(card)) { - u32 ext_csd_bits; - u32 bus_width = card->host->ios.bus_width; - - /* - * For devices supporting HS200 mode, the bus width has - * to be set before executing the tuning function. If - * set before tuning, then device will respond with CRC - * errors for responses on CMD line. So for HS200 the - * sequence will be - * 1. set bus width 4bit / 8 bit (1 bit not supported) - * 2. switch to HS200 mode - * 3. set the clock to > 52Mhz <=200MHz and - * 4. execute tuning for HS200 - */ - if (card->host->ops->execute_tuning) { - mmc_host_clk_hold(card->host); - err = card->host->ops->execute_tuning(card->host, - MMC_SEND_TUNING_BLOCK_HS200); - mmc_host_clk_release(card->host); - } - if (err) { - pr_warning("%s: tuning execution failed\n", - mmc_hostname(card->host)); + err = mmc_hs200_tuning(card); + if (err) goto err; - } - - ext_csd_bits = (bus_width == MMC_BUS_WIDTH_8) ? - EXT_CSD_BUS_WIDTH_8 : EXT_CSD_BUS_WIDTH_4; - } - - /* - * Activate wide bus and DDR (if supported). - */ - if (!mmc_card_hs200(card) && - (card->csd.mmca_vsn >= CSD_SPEC_VER_4) && - (host->caps & (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA))) { - static unsigned ext_csd_bits[][2] = { - { EXT_CSD_BUS_WIDTH_8, EXT_CSD_DDR_BUS_WIDTH_8 }, - { EXT_CSD_BUS_WIDTH_4, EXT_CSD_DDR_BUS_WIDTH_4 }, - { EXT_CSD_BUS_WIDTH_1, EXT_CSD_BUS_WIDTH_1 }, - }; - static unsigned bus_widths[] = { - MMC_BUS_WIDTH_8, - MMC_BUS_WIDTH_4, - MMC_BUS_WIDTH_1 - }; - unsigned idx, bus_width = 0; - - if (host->caps & MMC_CAP_8_BIT_DATA) - idx = 0; - else - idx = 1; - for (; idx < ARRAY_SIZE(bus_widths); idx++) { - bus_width = bus_widths[idx]; - if (bus_width == MMC_BUS_WIDTH_1) - ddr = 0; /* no DDR for 1-bit width */ - - err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, - EXT_CSD_BUS_WIDTH, - ext_csd_bits[idx][0], - card->ext_csd.generic_cmd6_time); - if (!err) { - mmc_set_bus_width(card->host, bus_width); - - /* - * If controller can't handle bus width test, - * compare ext_csd previously read in 1 bit mode - * against ext_csd at new bus width - */ - if (!(host->caps & MMC_CAP_BUS_WIDTH_TEST)) - err = mmc_compare_ext_csds(card, - bus_width); - else - err = mmc_bus_test(card, bus_width); - if (!err) - break; - } - } - - if (!err && ddr) { - err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, - EXT_CSD_BUS_WIDTH, - ext_csd_bits[idx][1], - card->ext_csd.generic_cmd6_time); - } - if (err) { - pr_warning("%s: switch to bus width %d ddr %d " - "failed\n", mmc_hostname(card->host), - 1 << bus_width, ddr); - goto free_card; - } else if (ddr) { - /* - * eMMC cards can support 3.3V to 1.2V i/o (vccq) - * signaling. - * - * EXT_CSD_CARD_TYPE_DDR_1_8V means 3.3V or 1.8V vccq. - * - * 1.8V vccq at 3.3V core voltage (vcc) is not required - * in the JEDEC spec for DDR. - * - * Do not force change in vccq since we are obviously - * working and no change to vccq is needed. - * - * WARNING: eMMC rules are NOT the same as SD DDR - */ - if (ddr & EXT_CSD_CARD_TYPE_DDR_1_2V) { - err = __mmc_set_signal_voltage(host, - MMC_SIGNAL_VOLTAGE_120); - if (err) - goto err; - } - mmc_set_timing(card->host, MMC_TIMING_MMC_DDR52); - mmc_set_bus_width(card->host, bus_width); + } else if (mmc_card_hs(card)) { + /* Select the desired bus width optionally */ + err = mmc_select_bus_width(card); + if (!IS_ERR_VALUE(err)) { + err = mmc_select_hs_ddr(card); + if (err) + goto err; } } diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index fe31f8d89a03..176073692872 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -63,6 +63,7 @@ struct mmc_ext_csd { unsigned int power_off_longtime; /* Units: ms */ u8 power_off_notification; /* state */ unsigned int hs_max_dtr; + unsigned int hs200_max_dtr; #define MMC_HIGH_26_MAX_DTR 26000000 #define MMC_HIGH_52_MAX_DTR 52000000 #define MMC_HIGH_DDR_MAX_DTR 52000000 diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index f734c0c64575..f429f13be433 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -377,6 +377,10 @@ struct _mmc_csd { #define EXT_CSD_DDR_BUS_WIDTH_4 5 /* Card is in 4 bit DDR mode */ #define EXT_CSD_DDR_BUS_WIDTH_8 6 /* Card is in 8 bit DDR mode */ +#define EXT_CSD_TIMING_BC 0 /* Backwards compatility */ +#define EXT_CSD_TIMING_HS 1 /* High speed */ +#define EXT_CSD_TIMING_HS200 2 /* HS200 */ + #define EXT_CSD_SEC_ER_EN BIT(0) #define EXT_CSD_SEC_BD_BLK_EN BIT(2) #define EXT_CSD_SEC_GB_CL_EN BIT(4) -- cgit v1.2.3-71-gd317 From 0a5b6438ee482696360bb013e67b8488f63d3e9e Mon Sep 17 00:00:00 2001 From: Seungwon Jeon Date: Wed, 23 Apr 2014 17:14:58 +0900 Subject: mmc: add support for HS400 mode of eMMC5.0 This patch adds HS400 mode support for eMMC5.0 device. HS400 mode is high speed DDR interface timing from HS200. Clock frequency is up to 200MHz and only 8-bit bus width is supported. In addition, tuning process of HS200 is required to synchronize the command response on the CMD line because CMD input timing for HS400 mode is the same as HS200 mode. Signed-off-by: Seungwon Jeon Reviewed-by: Jackey Shen Tested-by: Jaehoon Chung Acked-by: Jaehoon Chung Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- drivers/mmc/core/bus.c | 1 + drivers/mmc/core/debugfs.c | 3 ++ drivers/mmc/core/mmc.c | 98 +++++++++++++++++++++++++++++++++++++++++++--- include/linux/mmc/card.h | 1 + include/linux/mmc/host.h | 14 +++++++ include/linux/mmc/mmc.h | 7 +++- 6 files changed, 118 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index f37e9d6af84a..d2dbf02022bd 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -349,6 +349,7 @@ int mmc_add_card(struct mmc_card *card) mmc_hostname(card->host), mmc_card_uhs(card) ? "ultra high speed " : (mmc_card_hs(card) ? "high speed " : ""), + mmc_card_hs400(card) ? "HS400 " : (mmc_card_hs200(card) ? "HS200 " : ""), mmc_card_ddr52(card) ? "DDR " : "", uhs_bus_speed_mode, type, card->rca); diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c index 1f730dbfaeea..91eb16223246 100644 --- a/drivers/mmc/core/debugfs.c +++ b/drivers/mmc/core/debugfs.c @@ -141,6 +141,9 @@ static int mmc_ios_show(struct seq_file *s, void *data) case MMC_TIMING_MMC_HS200: str = "mmc HS200"; break; + case MMC_TIMING_MMC_HS400: + str = "mmc HS400"; + break; default: str = "invalid"; break; diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index bec6786efd19..793c6f7ddb04 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -240,7 +240,7 @@ static int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd) static void mmc_select_card_type(struct mmc_card *card) { struct mmc_host *host = card->host; - u8 card_type = card->ext_csd.raw_card_type & EXT_CSD_CARD_TYPE_MASK; + u8 card_type = card->ext_csd.raw_card_type; u32 caps = host->caps, caps2 = host->caps2; unsigned int hs_max_dtr = 0, hs200_max_dtr = 0; unsigned int avail_type = 0; @@ -281,6 +281,18 @@ static void mmc_select_card_type(struct mmc_card *card) avail_type |= EXT_CSD_CARD_TYPE_HS200_1_2V; } + if (caps2 & MMC_CAP2_HS400_1_8V && + card_type & EXT_CSD_CARD_TYPE_HS400_1_8V) { + hs200_max_dtr = MMC_HS200_MAX_DTR; + avail_type |= EXT_CSD_CARD_TYPE_HS400_1_8V; + } + + if (caps2 & MMC_CAP2_HS400_1_2V && + card_type & EXT_CSD_CARD_TYPE_HS400_1_2V) { + hs200_max_dtr = MMC_HS200_MAX_DTR; + avail_type |= EXT_CSD_CARD_TYPE_HS400_1_2V; + } + card->ext_csd.hs_max_dtr = hs_max_dtr; card->ext_csd.hs200_max_dtr = hs200_max_dtr; card->mmc_avail_type = avail_type; @@ -499,6 +511,8 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd) ext_csd[EXT_CSD_PWR_CL_DDR_52_195]; card->ext_csd.raw_pwr_cl_ddr_52_360 = ext_csd[EXT_CSD_PWR_CL_DDR_52_360]; + card->ext_csd.raw_pwr_cl_ddr_200_360 = + ext_csd[EXT_CSD_PWR_CL_DDR_200_360]; } if (card->ext_csd.rev >= 5) { @@ -665,7 +679,10 @@ static int mmc_compare_ext_csds(struct mmc_card *card, unsigned bus_width) (card->ext_csd.raw_pwr_cl_ddr_52_195 == bw_ext_csd[EXT_CSD_PWR_CL_DDR_52_195]) && (card->ext_csd.raw_pwr_cl_ddr_52_360 == - bw_ext_csd[EXT_CSD_PWR_CL_DDR_52_360])); + bw_ext_csd[EXT_CSD_PWR_CL_DDR_52_360]) && + (card->ext_csd.raw_pwr_cl_ddr_200_360 == + bw_ext_csd[EXT_CSD_PWR_CL_DDR_200_360])); + if (err) err = -EINVAL; @@ -768,7 +785,9 @@ static int __mmc_select_powerclass(struct mmc_card *card, ext_csd->raw_pwr_cl_52_360 : ext_csd->raw_pwr_cl_ddr_52_360; else if (host->ios.clock <= MMC_HS200_MAX_DTR) - pwrclass_val = ext_csd->raw_pwr_cl_200_360; + pwrclass_val = (bus_width == EXT_CSD_DDR_BUS_WIDTH_8) ? + ext_csd->raw_pwr_cl_ddr_200_360 : + ext_csd->raw_pwr_cl_200_360; break; default: pr_warning("%s: Voltage range not supported " @@ -832,7 +851,8 @@ static void mmc_set_bus_speed(struct mmc_card *card) { unsigned int max_dtr = (unsigned int)-1; - if (mmc_card_hs200(card) && max_dtr > card->ext_csd.hs200_max_dtr) + if ((mmc_card_hs200(card) || mmc_card_hs400(card)) && + max_dtr > card->ext_csd.hs200_max_dtr) max_dtr = card->ext_csd.hs200_max_dtr; else if (mmc_card_hs(card) && max_dtr > card->ext_csd.hs_max_dtr) max_dtr = card->ext_csd.hs_max_dtr; @@ -985,6 +1005,61 @@ static int mmc_select_hs_ddr(struct mmc_card *card) return err; } +static int mmc_select_hs400(struct mmc_card *card) +{ + struct mmc_host *host = card->host; + int err = 0; + + /* + * HS400 mode requires 8-bit bus width + */ + if (!(card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS400 && + host->ios.bus_width == MMC_BUS_WIDTH_8)) + return 0; + + /* + * Before switching to dual data rate operation for HS400, + * it is required to convert from HS200 mode to HS mode. + */ + mmc_set_timing(card->host, MMC_TIMING_MMC_HS); + mmc_set_bus_speed(card); + + err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, + EXT_CSD_HS_TIMING, EXT_CSD_TIMING_HS, + card->ext_csd.generic_cmd6_time, + true, true, true); + if (err) { + pr_warn("%s: switch to high-speed from hs200 failed, err:%d\n", + mmc_hostname(host), err); + return err; + } + + err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, + EXT_CSD_BUS_WIDTH, + EXT_CSD_DDR_BUS_WIDTH_8, + card->ext_csd.generic_cmd6_time); + if (err) { + pr_warn("%s: switch to bus width for hs400 failed, err:%d\n", + mmc_hostname(host), err); + return err; + } + + err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, + EXT_CSD_HS_TIMING, EXT_CSD_TIMING_HS400, + card->ext_csd.generic_cmd6_time, + true, true, true); + if (err) { + pr_warn("%s: switch to hs400 failed, err:%d\n", + mmc_hostname(host), err); + return err; + } + + mmc_set_timing(host, MMC_TIMING_MMC_HS400); + mmc_set_bus_speed(card); + + return 0; +} + /* * For device supporting HS200 mode, the following sequence * should be done before executing the tuning process. @@ -1062,13 +1137,22 @@ bus_speed: /* * Execute tuning sequence to seek the proper bus operating - * conditions for HS200, which sends CMD21 to the device. + * conditions for HS200 and HS400, which sends CMD21 to the device. */ static int mmc_hs200_tuning(struct mmc_card *card) { struct mmc_host *host = card->host; int err = 0; + /* + * Timing should be adjusted to the HS400 target + * operation frequency for tuning process + */ + if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS400 && + host->ios.bus_width == MMC_BUS_WIDTH_8) + if (host->ops->prepare_hs400_tuning) + host->ops->prepare_hs400_tuning(host, &host->ios); + if (host->ops->execute_tuning) { mmc_host_clk_hold(host); err = host->ops->execute_tuning(host, @@ -1296,6 +1380,10 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, err = mmc_hs200_tuning(card); if (err) goto err; + + err = mmc_select_hs400(card); + if (err) + goto err; } else if (mmc_card_hs(card)) { /* Select the desired bus width optionally */ err = mmc_select_bus_width(card); diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 176073692872..d424b9de3aff 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -110,6 +110,7 @@ struct mmc_ext_csd { u8 raw_pwr_cl_200_360; /* 237 */ u8 raw_pwr_cl_ddr_52_195; /* 238 */ u8 raw_pwr_cl_ddr_52_360; /* 239 */ + u8 raw_pwr_cl_ddr_200_360; /* 253 */ u8 raw_bkops_status; /* 246 */ u8 raw_sectors[4]; /* 212 - 4 bytes */ diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 6b1e9ee6ca10..183087374215 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -61,6 +61,7 @@ struct mmc_ios { #define MMC_TIMING_UHS_DDR50 7 #define MMC_TIMING_MMC_DDR52 8 #define MMC_TIMING_MMC_HS200 9 +#define MMC_TIMING_MMC_HS400 10 unsigned char signal_voltage; /* signalling voltage (1.8V or 3.3V) */ @@ -132,6 +133,9 @@ struct mmc_host_ops { /* The tuning command opcode value is different for SD and eMMC cards */ int (*execute_tuning)(struct mmc_host *host, u32 opcode); + + /* Prepare HS400 target operating frequency depending host driver */ + int (*prepare_hs400_tuning)(struct mmc_host *host, struct mmc_ios *ios); int (*select_drive_strength)(unsigned int max_dtr, int host_drv, int card_drv); void (*hw_reset)(struct mmc_host *host); void (*card_event)(struct mmc_host *host); @@ -274,6 +278,10 @@ struct mmc_host { #define MMC_CAP2_PACKED_CMD (MMC_CAP2_PACKED_RD | \ MMC_CAP2_PACKED_WR) #define MMC_CAP2_NO_PRESCAN_POWERUP (1 << 14) /* Don't power up before scan */ +#define MMC_CAP2_HS400_1_8V (1 << 15) /* Can support HS400 1.8V */ +#define MMC_CAP2_HS400_1_2V (1 << 16) /* Can support HS400 1.2V */ +#define MMC_CAP2_HS400 (MMC_CAP2_HS400_1_8V | \ + MMC_CAP2_HS400_1_2V) mmc_pm_flag_t pm_caps; /* supported pm features */ @@ -495,4 +503,10 @@ static inline bool mmc_card_ddr52(struct mmc_card *card) { return card->host->ios.timing == MMC_TIMING_MMC_DDR52; } + +static inline bool mmc_card_hs400(struct mmc_card *card) +{ + return card->host->ios.timing == MMC_TIMING_MMC_HS400; +} + #endif /* LINUX_MMC_HOST_H */ diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index f429f13be433..64ec963ed347 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -325,6 +325,7 @@ struct _mmc_csd { #define EXT_CSD_POWER_OFF_LONG_TIME 247 /* RO */ #define EXT_CSD_GENERIC_CMD6_TIME 248 /* RO */ #define EXT_CSD_CACHE_SIZE 249 /* RO, 4 bytes */ +#define EXT_CSD_PWR_CL_DDR_200_360 253 /* RO */ #define EXT_CSD_TAG_UNIT_SIZE 498 /* RO */ #define EXT_CSD_DATA_TAG_SUPPORT 499 /* RO */ #define EXT_CSD_MAX_PACKED_WRITES 500 /* RO */ @@ -354,7 +355,6 @@ struct _mmc_csd { #define EXT_CSD_CMD_SET_SECURE (1<<1) #define EXT_CSD_CMD_SET_CPSECURE (1<<2) -#define EXT_CSD_CARD_TYPE_MASK 0x3F /* Mask out reserved bits */ #define EXT_CSD_CARD_TYPE_HS_26 (1<<0) /* Card can run at 26MHz */ #define EXT_CSD_CARD_TYPE_HS_52 (1<<1) /* Card can run at 52MHz */ #define EXT_CSD_CARD_TYPE_HS (EXT_CSD_CARD_TYPE_HS_26 | \ @@ -370,6 +370,10 @@ struct _mmc_csd { /* SDR mode @1.2V I/O */ #define EXT_CSD_CARD_TYPE_HS200 (EXT_CSD_CARD_TYPE_HS200_1_8V | \ EXT_CSD_CARD_TYPE_HS200_1_2V) +#define EXT_CSD_CARD_TYPE_HS400_1_8V (1<<6) /* Card can run at 200MHz DDR, 1.8V */ +#define EXT_CSD_CARD_TYPE_HS400_1_2V (1<<7) /* Card can run at 200MHz DDR, 1.2V */ +#define EXT_CSD_CARD_TYPE_HS400 (EXT_CSD_CARD_TYPE_HS400_1_8V | \ + EXT_CSD_CARD_TYPE_HS400_1_2V) #define EXT_CSD_BUS_WIDTH_1 0 /* Card is in 1 bit mode */ #define EXT_CSD_BUS_WIDTH_4 1 /* Card is in 4 bit mode */ @@ -380,6 +384,7 @@ struct _mmc_csd { #define EXT_CSD_TIMING_BC 0 /* Backwards compatility */ #define EXT_CSD_TIMING_HS 1 /* High speed */ #define EXT_CSD_TIMING_HS200 2 /* HS200 */ +#define EXT_CSD_TIMING_HS400 3 /* HS400 */ #define EXT_CSD_SEC_ER_EN BIT(0) #define EXT_CSD_SEC_BD_BLK_EN BIT(2) -- cgit v1.2.3-71-gd317 From 4d1f52f9a9f9a63371dba589093b3ae90fc80c3d Mon Sep 17 00:00:00 2001 From: Tim Kryger Date: Tue, 6 May 2014 15:57:01 -0700 Subject: mmc: core: Improve support for deferred regulators Callers of mmc_regulator_get_supply could benefit from knowing if either of the regulators are present but not yet available. Since callers do not currently examine the return value, modify this function to return zero or -EPROBE_DEFER if either regulator get returns the same. Furthermore, since callers check vmmc/vqmmc using IS_ERR and can deal with absent regulators, switch to devm_regulator_get_optional. This has the added benefit of allowing this function to behave correctly even in the !CONFIG_REGULATOR case such that the stub can be removed. Signed-off-by: Tim Kryger Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- drivers/mmc/core/core.c | 31 +++++++++++++++++++------------ include/linux/mmc/host.h | 8 ++------ 2 files changed, 21 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 02baa30653fa..7dc0c85fdb60 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1314,31 +1314,38 @@ int mmc_regulator_set_ocr(struct mmc_host *mmc, } EXPORT_SYMBOL_GPL(mmc_regulator_set_ocr); +#endif /* CONFIG_REGULATOR */ + int mmc_regulator_get_supply(struct mmc_host *mmc) { struct device *dev = mmc_dev(mmc); - struct regulator *supply; int ret; - supply = devm_regulator_get(dev, "vmmc"); - mmc->supply.vmmc = supply; + mmc->supply.vmmc = devm_regulator_get_optional(dev, "vmmc"); mmc->supply.vqmmc = devm_regulator_get_optional(dev, "vqmmc"); - if (IS_ERR(supply)) - return PTR_ERR(supply); + if (IS_ERR(mmc->supply.vmmc)) { + if (PTR_ERR(mmc->supply.vmmc) == -EPROBE_DEFER) + return -EPROBE_DEFER; + dev_info(dev, "No vmmc regulator found\n"); + } else { + ret = mmc_regulator_get_ocrmask(mmc->supply.vmmc); + if (ret > 0) + mmc->ocr_avail = ret; + else + dev_warn(dev, "Failed getting OCR mask: %d\n", ret); + } - ret = mmc_regulator_get_ocrmask(supply); - if (ret > 0) - mmc->ocr_avail = ret; - else - dev_warn(mmc_dev(mmc), "Failed getting OCR mask: %d\n", ret); + if (IS_ERR(mmc->supply.vqmmc)) { + if (PTR_ERR(mmc->supply.vqmmc) == -EPROBE_DEFER) + return -EPROBE_DEFER; + dev_info(dev, "No vqmmc regulator found\n"); + } return 0; } EXPORT_SYMBOL_GPL(mmc_regulator_get_supply); -#endif /* CONFIG_REGULATOR */ - /* * Mask off any voltages we don't support and select * the lowest voltage diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 183087374215..cd595275e118 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -402,7 +402,6 @@ int mmc_regulator_get_ocrmask(struct regulator *supply); int mmc_regulator_set_ocr(struct mmc_host *mmc, struct regulator *supply, unsigned short vdd_bit); -int mmc_regulator_get_supply(struct mmc_host *mmc); #else static inline int mmc_regulator_get_ocrmask(struct regulator *supply) { @@ -415,13 +414,10 @@ static inline int mmc_regulator_set_ocr(struct mmc_host *mmc, { return 0; } - -static inline int mmc_regulator_get_supply(struct mmc_host *mmc) -{ - return 0; -} #endif +int mmc_regulator_get_supply(struct mmc_host *mmc); + int mmc_pm_notify(struct notifier_block *notify_block, unsigned long, void *); static inline int mmc_card_is_removable(struct mmc_host *host) -- cgit v1.2.3-71-gd317 From 8c48b50a1a888ac5511fe856d63f72fb688c6bb4 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 5 May 2014 11:48:40 +0200 Subject: cfg80211: allow restricting supported dfs regions At the moment, the ath9k/ath10k DFS module only supports detecting ETSI radar patterns. Add a bitmap in the interface combinations, indicating which DFS regions are supported by the detector. If unset, support for all regions is assumed. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 3 +++ net/wireless/nl80211.c | 6 ++++-- net/wireless/util.c | 14 ++++++++++++++ 4 files changed, 23 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5c7169b0ac57..e3a48b0a2b3b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2638,6 +2638,7 @@ struct ieee80211_iface_limit { * between infrastructure and AP types must match. This is required * only in special cases. * @radar_detect_widths: bitmap of channel widths supported for radar detection + * @radar_detect_regions: bitmap of regions supported for radar detection * * With this structure the driver can describe which interface * combinations it supports concurrently. @@ -2695,6 +2696,7 @@ struct ieee80211_iface_combination { u8 n_limits; bool beacon_int_infra_match; u8 radar_detect_widths; + u8 radar_detect_regions; }; struct ieee80211_txrx_stypes { diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 406010d4def0..b65095a85dee 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3688,6 +3688,8 @@ enum nl80211_iface_limit_attrs { * different channels may be used within this group. * @NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS: u32 attribute containing the bitmap * of supported channel widths for radar detection. + * @NL80211_IFACE_COMB_RADAR_DETECT_REGIONS: u32 attribute containing the bitmap + * of supported regulatory regions for radar detection. * @NUM_NL80211_IFACE_COMB: number of attributes * @MAX_NL80211_IFACE_COMB: highest attribute number * @@ -3721,6 +3723,7 @@ enum nl80211_if_combination_attrs { NL80211_IFACE_COMB_STA_AP_BI_MATCH, NL80211_IFACE_COMB_NUM_CHANNELS, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, + NL80211_IFACE_COMB_RADAR_DETECT_REGIONS, /* keep last */ NUM_NL80211_IFACE_COMB, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0f1b18f209d6..c0833830cfe7 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -970,8 +970,10 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy, c->max_interfaces)) goto nla_put_failure; if (large && - nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, - c->radar_detect_widths)) + (nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, + c->radar_detect_widths) || + nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS, + c->radar_detect_regions))) goto nla_put_failure; nla_nest_end(msg, nl_combi); diff --git a/net/wireless/util.c b/net/wireless/util.c index a756429b3a0a..8c61d5c6fad3 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1274,10 +1274,20 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, void *data), void *data) { + const struct ieee80211_regdomain *regdom; + enum nl80211_dfs_regions region = 0; int i, j, iftype; int num_interfaces = 0; u32 used_iftypes = 0; + if (radar_detect) { + rcu_read_lock(); + regdom = rcu_dereference(cfg80211_regdomain); + if (regdom) + region = regdom->dfs_region; + rcu_read_unlock(); + } + for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) { num_interfaces += iftype_num[iftype]; if (iftype_num[iftype] > 0 && @@ -1318,6 +1328,10 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, if (radar_detect != (c->radar_detect_widths & radar_detect)) goto cont; + if (radar_detect && c->radar_detect_regions && + !(c->radar_detect_regions & BIT(region))) + goto cont; + /* Finally check that all iftypes that we're currently * using are actually part of this combination. If they * aren't then we can't use this combination and have -- cgit v1.2.3-71-gd317 From ec903c0c858e4963a9e0724bdcadfa837253341c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 May 2014 12:11:01 -0400 Subject: cgroup: rename css_tryget*() to css_tryget_online*() Unlike the more usual refcnting, what css_tryget() provides is the distinction between online and offline csses instead of protection against upping a refcnt which already reached zero. cgroup is planning to provide actual tryget which fails if the refcnt already reached zero. Let's rename the existing trygets so that they clearly indicate that they're onliness. I thought about keeping the existing names as-are and introducing new names for the planned actual tryget; however, given that each controller participates in the synchronization of the online state, it seems worthwhile to make it explicit that these functions are about on/offline state. Rename css_tryget() to css_tryget_online() and css_tryget_from_dir() to css_tryget_online_from_dir(). This is pure rename. v2: cgroup_freezer grew new usages of css_tryget(). Update accordingly. Signed-off-by: Tejun Heo Acked-by: Johannes Weiner Acked-by: Michal Hocko Acked-by: Li Zefan Cc: Vivek Goyal Cc: Jens Axboe Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo --- block/blk-cgroup.c | 2 +- fs/bio.c | 2 +- include/linux/cgroup.h | 14 +++++++------- kernel/cgroup.c | 40 ++++++++++++++++++++-------------------- kernel/cgroup_freezer.c | 4 ++-- kernel/cpuset.c | 6 +++--- kernel/events/core.c | 3 ++- mm/hugetlb_cgroup.c | 2 +- mm/memcontrol.c | 46 ++++++++++++++++++++++++---------------------- 9 files changed, 61 insertions(+), 58 deletions(-) (limited to 'include') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 1039fb9ff5f5..9f5bce33e6fe 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -185,7 +185,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, lockdep_assert_held(q->queue_lock); /* blkg holds a reference to blkcg */ - if (!css_tryget(&blkcg->css)) { + if (!css_tryget_online(&blkcg->css)) { ret = -EINVAL; goto err_free_blkg; } diff --git a/fs/bio.c b/fs/bio.c index 6f0362b77806..0608ef9422bd 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1970,7 +1970,7 @@ int bio_associate_current(struct bio *bio) /* associate blkcg if exists */ rcu_read_lock(); css = task_css(current, blkio_cgrp_id); - if (css && css_tryget(css)) + if (css && css_tryget_online(css)) bio->bi_css = css; rcu_read_unlock(); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index bde44618d8c2..c5f3684ef557 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -95,16 +95,16 @@ static inline void css_get(struct cgroup_subsys_state *css) } /** - * css_tryget - try to obtain a reference on the specified css + * css_tryget_online - try to obtain a reference on the specified css if online * @css: target css * - * Obtain a reference on @css if it's alive. The caller naturally needs to - * ensure that @css is accessible but doesn't have to be holding a + * Obtain a reference on @css if it's online. The caller naturally needs + * to ensure that @css is accessible but doesn't have to be holding a * reference on it - IOW, RCU protected access is good enough for this * function. Returns %true if a reference count was successfully obtained; * %false otherwise. */ -static inline bool css_tryget(struct cgroup_subsys_state *css) +static inline bool css_tryget_online(struct cgroup_subsys_state *css) { if (css->flags & CSS_ROOT) return true; @@ -115,7 +115,7 @@ static inline bool css_tryget(struct cgroup_subsys_state *css) * css_put - put a css reference * @css: target css * - * Put a reference obtained via css_get() and css_tryget(). + * Put a reference obtained via css_get() and css_tryget_online(). */ static inline void css_put(struct cgroup_subsys_state *css) { @@ -905,8 +905,8 @@ void css_task_iter_end(struct css_task_iter *it); int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); -struct cgroup_subsys_state *css_tryget_from_dir(struct dentry *dentry, - struct cgroup_subsys *ss); +struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, + struct cgroup_subsys *ss); #else /* !CONFIG_CGROUPS */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7633703e9baf..671d8a6dae37 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3771,7 +3771,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) /* * We aren't being called from kernfs and there's no guarantee on - * @kn->priv's validity. For this and css_tryget_from_dir(), + * @kn->priv's validity. For this and css_tryget_online_from_dir(), * @kn->priv is RCU safe. Let's do the RCU dancing. */ rcu_read_lock(); @@ -4060,9 +4060,9 @@ err: * Implemented in kill_css(). * * 2. When the percpu_ref is confirmed to be visible as killed on all CPUs - * and thus css_tryget() is guaranteed to fail, the css can be offlined - * by invoking offline_css(). After offlining, the base ref is put. - * Implemented in css_killed_work_fn(). + * and thus css_tryget_online() is guaranteed to fail, the css can be + * offlined by invoking offline_css(). After offlining, the base ref is + * put. Implemented in css_killed_work_fn(). * * 3. When the percpu_ref reaches zero, the only possible remaining * accessors are inside RCU read sections. css_release() schedules the @@ -4386,7 +4386,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, /* * This is called when the refcnt of a css is confirmed to be killed. - * css_tryget() is now guaranteed to fail. + * css_tryget_online() is now guaranteed to fail. */ static void css_killed_work_fn(struct work_struct *work) { @@ -4398,8 +4398,8 @@ static void css_killed_work_fn(struct work_struct *work) mutex_lock(&cgroup_mutex); /* - * css_tryget() is guaranteed to fail now. Tell subsystems to - * initate destruction. + * css_tryget_online() is guaranteed to fail now. Tell subsystems + * to initate destruction. */ offline_css(css); @@ -4440,8 +4440,8 @@ static void css_killed_ref_fn(struct percpu_ref *ref) * * This function initiates destruction of @css by removing cgroup interface * files and putting its base reference. ->css_offline() will be invoked - * asynchronously once css_tryget() is guaranteed to fail and when the - * reference count reaches zero, @css will be released. + * asynchronously once css_tryget_online() is guaranteed to fail and when + * the reference count reaches zero, @css will be released. */ static void kill_css(struct cgroup_subsys_state *css) { @@ -4462,7 +4462,7 @@ static void kill_css(struct cgroup_subsys_state *css) /* * cgroup core guarantees that, by the time ->css_offline() is * invoked, no new css reference will be given out via - * css_tryget(). We can't simply call percpu_ref_kill() and + * css_tryget_online(). We can't simply call percpu_ref_kill() and * proceed to offlining css's because percpu_ref_kill() doesn't * guarantee that the ref is seen as killed on all CPUs on return. * @@ -4478,9 +4478,9 @@ static void kill_css(struct cgroup_subsys_state *css) * * css's make use of percpu refcnts whose killing latency shouldn't be * exposed to userland and are RCU protected. Also, cgroup core needs to - * guarantee that css_tryget() won't succeed by the time ->css_offline() is - * invoked. To satisfy all the requirements, destruction is implemented in - * the following two steps. + * guarantee that css_tryget_online() won't succeed by the time + * ->css_offline() is invoked. To satisfy all the requirements, + * destruction is implemented in the following two steps. * * s1. Verify @cgrp can be destroyed and mark it dying. Remove all * userland visible parts and start killing the percpu refcnts of @@ -4574,9 +4574,9 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) /* * There are two control paths which try to determine cgroup from * dentry without going through kernfs - cgroupstats_build() and - * css_tryget_from_dir(). Those are supported by RCU protecting - * clearing of cgrp->kn->priv backpointer, which should happen - * after all files under it have been removed. + * css_tryget_online_from_dir(). Those are supported by RCU + * protecting clearing of cgrp->kn->priv backpointer, which should + * happen after all files under it have been removed. */ kernfs_remove(cgrp->kn); /* @cgrp has an extra ref on its kn */ RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv, NULL); @@ -5173,7 +5173,7 @@ static int __init cgroup_disable(char *str) __setup("cgroup_disable=", cgroup_disable); /** - * css_tryget_from_dir - get corresponding css from the dentry of a cgroup dir + * css_tryget_online_from_dir - get corresponding css from a cgroup dentry * @dentry: directory dentry of interest * @ss: subsystem of interest * @@ -5181,8 +5181,8 @@ __setup("cgroup_disable=", cgroup_disable); * to get the corresponding css and return it. If such css doesn't exist * or can't be pinned, an ERR_PTR value is returned. */ -struct cgroup_subsys_state *css_tryget_from_dir(struct dentry *dentry, - struct cgroup_subsys *ss) +struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, + struct cgroup_subsys *ss) { struct kernfs_node *kn = kernfs_node_from_dentry(dentry); struct cgroup_subsys_state *css = NULL; @@ -5204,7 +5204,7 @@ struct cgroup_subsys_state *css_tryget_from_dir(struct dentry *dentry, if (cgrp) css = cgroup_css(cgrp, ss); - if (!css || !css_tryget(css)) + if (!css || !css_tryget_online(css)) css = ERR_PTR(-ENOENT); rcu_read_unlock(); diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 345628c78b5b..0398f7e9ac81 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -304,7 +304,7 @@ static int freezer_read(struct seq_file *m, void *v) /* update states bottom-up */ css_for_each_descendant_post(pos, css) { - if (!css_tryget(pos)) + if (!css_tryget_online(pos)) continue; rcu_read_unlock(); @@ -404,7 +404,7 @@ static void freezer_change_state(struct freezer *freezer, bool freeze) struct freezer *pos_f = css_freezer(pos); struct freezer *parent = parent_freezer(pos_f); - if (!css_tryget(pos)) + if (!css_tryget_online(pos)) continue; rcu_read_unlock(); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 7c0e8da59e26..37ca0a5c226d 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -872,7 +872,7 @@ static void update_tasks_cpumask_hier(struct cpuset *root_cs, bool update_root) continue; } } - if (!css_tryget(&cp->css)) + if (!css_tryget_online(&cp->css)) continue; rcu_read_unlock(); @@ -1108,7 +1108,7 @@ static void update_tasks_nodemask_hier(struct cpuset *root_cs, bool update_root) continue; } } - if (!css_tryget(&cp->css)) + if (!css_tryget_online(&cp->css)) continue; rcu_read_unlock(); @@ -2153,7 +2153,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) rcu_read_lock(); cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) { - if (cs == &top_cpuset || !css_tryget(&cs->css)) + if (cs == &top_cpuset || !css_tryget_online(&cs->css)) continue; rcu_read_unlock(); diff --git a/kernel/events/core.c b/kernel/events/core.c index f83a71a3e46d..077968d19b8a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -607,7 +607,8 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event, if (!f.file) return -EBADF; - css = css_tryget_from_dir(f.file->f_dentry, &perf_event_cgrp_subsys); + css = css_tryget_online_from_dir(f.file->f_dentry, + &perf_event_cgrp_subsys); if (IS_ERR(css)) { ret = PTR_ERR(css); goto out; diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 595d7fd795e1..372f1adca491 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -181,7 +181,7 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, again: rcu_read_lock(); h_cg = hugetlb_cgroup_from_task(current); - if (!css_tryget(&h_cg->css)) { + if (!css_tryget_online(&h_cg->css)) { rcu_read_unlock(); goto again; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c3f82f69ef58..5cf3246314a2 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -567,7 +567,8 @@ void sock_update_memcg(struct sock *sk) memcg = mem_cgroup_from_task(current); cg_proto = sk->sk_prot->proto_cgroup(memcg); if (!mem_cgroup_is_root(memcg) && - memcg_proto_active(cg_proto) && css_tryget(&memcg->css)) { + memcg_proto_active(cg_proto) && + css_tryget_online(&memcg->css)) { sk->sk_cgrp = cg_proto; } rcu_read_unlock(); @@ -834,7 +835,7 @@ retry: */ __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz); if (!res_counter_soft_limit_excess(&mz->memcg->res) || - !css_tryget(&mz->memcg->css)) + !css_tryget_online(&mz->memcg->css)) goto retry; done: return mz; @@ -1076,7 +1077,7 @@ static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); if (unlikely(!memcg)) memcg = root_mem_cgroup; - } while (!css_tryget(&memcg->css)); + } while (!css_tryget_online(&memcg->css)); rcu_read_unlock(); return memcg; } @@ -1113,7 +1114,8 @@ skip_node: */ if (next_css) { if ((next_css == &root->css) || - ((next_css->flags & CSS_ONLINE) && css_tryget(next_css))) + ((next_css->flags & CSS_ONLINE) && + css_tryget_online(next_css))) return mem_cgroup_from_css(next_css); prev_css = next_css; @@ -1159,7 +1161,7 @@ mem_cgroup_iter_load(struct mem_cgroup_reclaim_iter *iter, * would be returned all the time. */ if (position && position != root && - !css_tryget(&position->css)) + !css_tryget_online(&position->css)) position = NULL; } return position; @@ -2785,9 +2787,9 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg, /* * A helper function to get mem_cgroup from ID. must be called under - * rcu_read_lock(). The caller is responsible for calling css_tryget if - * the mem_cgroup is used for charging. (dropping refcnt from swap can be - * called against removed memcg.) + * rcu_read_lock(). The caller is responsible for calling + * css_tryget_online() if the mem_cgroup is used for charging. (dropping + * refcnt from swap can be called against removed memcg.) */ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id) { @@ -2810,14 +2812,14 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) lock_page_cgroup(pc); if (PageCgroupUsed(pc)) { memcg = pc->mem_cgroup; - if (memcg && !css_tryget(&memcg->css)) + if (memcg && !css_tryget_online(&memcg->css)) memcg = NULL; } else if (PageSwapCache(page)) { ent.val = page_private(page); id = lookup_swap_cgroup_id(ent); rcu_read_lock(); memcg = mem_cgroup_lookup(id); - if (memcg && !css_tryget(&memcg->css)) + if (memcg && !css_tryget_online(&memcg->css)) memcg = NULL; rcu_read_unlock(); } @@ -3473,7 +3475,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep, } /* The corresponding put will be done in the workqueue. */ - if (!css_tryget(&memcg->css)) + if (!css_tryget_online(&memcg->css)) goto out; rcu_read_unlock(); @@ -4246,8 +4248,8 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent) memcg = mem_cgroup_lookup(id); if (memcg) { /* - * We uncharge this because swap is freed. - * This memcg can be obsolete one. We avoid calling css_tryget + * We uncharge this because swap is freed. This memcg can + * be obsolete one. We avoid calling css_tryget_online(). */ if (!mem_cgroup_is_root(memcg)) res_counter_uncharge(&memcg->memsw, PAGE_SIZE); @@ -5840,10 +5842,10 @@ static void kmem_cgroup_css_offline(struct mem_cgroup *memcg) * which is then paired with css_put during uncharge resp. here. * * Although this might sound strange as this path is called from - * css_offline() when the referencemight have dropped down to 0 - * and shouldn't be incremented anymore (css_tryget would fail) - * we do not have other options because of the kmem allocations - * lifetime. + * css_offline() when the referencemight have dropped down to 0 and + * shouldn't be incremented anymore (css_tryget_online() would + * fail) we do not have other options because of the kmem + * allocations lifetime. */ css_get(&memcg->css); @@ -6051,8 +6053,8 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css, * automatically removed on cgroup destruction but the removal is * asynchronous, so take an extra ref on @css. */ - cfile_css = css_tryget_from_dir(cfile.file->f_dentry->d_parent, - &memory_cgrp_subsys); + cfile_css = css_tryget_online_from_dir(cfile.file->f_dentry->d_parent, + &memory_cgrp_subsys); ret = -EINVAL; if (IS_ERR(cfile_css)) goto out_put_cfile; @@ -6496,7 +6498,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css) /* * XXX: css_offline() would be where we should reparent all * memory to prepare the cgroup for destruction. However, - * memcg does not do css_tryget() and res_counter charging + * memcg does not do css_tryget_online() and res_counter charging * under the same RCU lock region, which means that charging * could race with offlining. Offlining only happens to * cgroups with no tasks in them but charges can show up @@ -6510,9 +6512,9 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css) * lookup_swap_cgroup_id() * rcu_read_lock() * mem_cgroup_lookup() - * css_tryget() + * css_tryget_online() * rcu_read_unlock() - * disable css_tryget() + * disable css_tryget_online() * call_rcu() * offline_css() * reparent_charges() -- cgit v1.2.3-71-gd317 From b41686401e501430ffe93b575ef7959d2ecc6f2e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 May 2014 12:16:21 -0400 Subject: cgroup: implement cftype->write() During the recent conversion to kernfs, cftype's seq_file operations are updated so that they are directly mapped to kernfs operations and thus can fully access the associated kernfs and cgroup contexts; however, write path hasn't seen similar updates and none of the existing write operations has access to, for example, the associated kernfs_open_file. Let's introduce a new operation cftype->write() which maps directly to the kernfs write operation and has access to all the arguments and contexts. This will replace ->write_string() and ->trigger() and ease manipulation of kernfs active protection from cgroup file operations. Two accessors - of_cft() and of_css() - are introduced to enable accessing the associated cgroup context from cftype->write() which only takes kernfs_open_file for the context information. The accessors for seq_file operations - seq_cft() and seq_css() - are rewritten to wrap the of_ accessors. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 27 +++++++++++++++++++++++---- kernel/cgroup.c | 14 ++++++++------ 2 files changed, 31 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c5f3684ef557..c5a170ca4a48 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -515,6 +515,15 @@ struct cftype { */ int (*trigger)(struct cgroup_subsys_state *css, unsigned int event); + /* + * write() is the generic write callback which maps directly to + * kernfs write operation and overrides all other operations. + * Maximum write size is determined by ->max_write_len. Use + * of_css/cft() to access the associated css and cft. + */ + ssize_t (*write)(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off); + #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lock_class_key lockdep_key; #endif @@ -552,14 +561,24 @@ static inline ino_t cgroup_ino(struct cgroup *cgrp) return 0; } -static inline struct cftype *seq_cft(struct seq_file *seq) +/* cft/css accessors for cftype->write() operation */ +static inline struct cftype *of_cft(struct kernfs_open_file *of) { - struct kernfs_open_file *of = seq->private; - return of->kn->priv; } -struct cgroup_subsys_state *seq_css(struct seq_file *seq); +struct cgroup_subsys_state *of_css(struct kernfs_open_file *of); + +/* cft/css accessors for cftype->seq_*() operations */ +static inline struct cftype *seq_cft(struct seq_file *seq) +{ + return of_cft(seq->private); +} + +static inline struct cgroup_subsys_state *seq_css(struct seq_file *seq) +{ + return of_css(seq->private); +} /* * Name / path handling functions. All are thin wrappers around the kernfs diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 671d8a6dae37..a16f91d12f4e 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -283,11 +283,10 @@ static inline bool cgroup_is_dead(const struct cgroup *cgrp) return test_bit(CGRP_DEAD, &cgrp->flags); } -struct cgroup_subsys_state *seq_css(struct seq_file *seq) +struct cgroup_subsys_state *of_css(struct kernfs_open_file *of) { - struct kernfs_open_file *of = seq->private; struct cgroup *cgrp = of->kn->parent->priv; - struct cftype *cft = seq_cft(seq); + struct cftype *cft = of_cft(of); /* * This is open and unprotected implementation of cgroup_css(). @@ -302,7 +301,7 @@ struct cgroup_subsys_state *seq_css(struct seq_file *seq) else return &cgrp->dummy_css; } -EXPORT_SYMBOL_GPL(seq_css); +EXPORT_SYMBOL_GPL(of_css); /** * cgroup_is_descendant - test ancestry @@ -1035,8 +1034,8 @@ static umode_t cgroup_file_mode(const struct cftype *cft) if (cft->read_u64 || cft->read_s64 || cft->seq_show) mode |= S_IRUGO; - if (cft->write_u64 || cft->write_s64 || cft->write_string || - cft->trigger) + if (cft->write_u64 || cft->write_s64 || cft->write || + cft->write_string || cft->trigger) mode |= S_IWUSR; return mode; @@ -2726,6 +2725,9 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, struct cgroup_subsys_state *css; int ret; + if (cft->write) + return cft->write(of, buf, nbytes, off); + /* * kernfs guarantees that a file isn't deleted with operations in * flight, which means that the matching css is and stays alive and -- cgit v1.2.3-71-gd317 From 451af504df0c62f695a69b83c250486e77c66378 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 May 2014 12:16:21 -0400 Subject: cgroup: replace cftype->write_string() with cftype->write() Convert all cftype->write_string() users to the new cftype->write() which maps directly to kernfs write operation and has full access to kernfs and cgroup contexts. The conversions are mostly mechanical. * @css and @cft are accessed using of_css() and of_cft() accessors respectively instead of being specified as arguments. * Should return @nbytes on success instead of 0. * @buf is not trimmed automatically. Trim if necessary. Note that blkcg and netprio don't need this as the parsers already handle whitespaces. cftype->write_string() has no user left after the conversions and removed. While at it, remove unnecessary local variable @p in cgroup_subtree_control_write() and stale comment about CGROUP_LOCAL_BUFFER_SIZE in cgroup_freezer.c. This patch doesn't introduce any visible behavior changes. v2: netprio was missing from conversion. Converted. Signed-off-by: Tejun Heo Acked-by: Aristeu Rozanski Acked-by: Vivek Goyal Acked-by: Li Zefan Cc: Jens Axboe Cc: Johannes Weiner Cc: Michal Hocko Cc: Neil Horman Cc: "David S. Miller" --- block/blk-throttle.c | 32 ++++++++++++++++---------------- block/cfq-iosched.c | 28 ++++++++++++++-------------- include/linux/cgroup.h | 10 +--------- kernel/cgroup.c | 38 +++++++++++++++++++------------------- kernel/cgroup_freezer.c | 20 +++++++++----------- kernel/cpuset.c | 16 +++++++++------- mm/hugetlb_cgroup.c | 17 +++++++++-------- mm/memcontrol.c | 46 +++++++++++++++++++++++++--------------------- net/core/netprio_cgroup.c | 12 ++++++------ net/ipv4/tcp_memcontrol.c | 16 +++++++++------- security/device_cgroup.c | 14 +++++++------- 11 files changed, 124 insertions(+), 125 deletions(-) (limited to 'include') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 033745cd7fba..5e8fd1bace98 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1346,10 +1346,10 @@ static int tg_print_conf_uint(struct seq_file *sf, void *v) return 0; } -static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft, - const char *buf, bool is_u64) +static ssize_t tg_set_conf(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off, bool is_u64) { - struct blkcg *blkcg = css_to_blkcg(css); + struct blkcg *blkcg = css_to_blkcg(of_css(of)); struct blkg_conf_ctx ctx; struct throtl_grp *tg; struct throtl_service_queue *sq; @@ -1368,9 +1368,9 @@ static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft, ctx.v = -1; if (is_u64) - *(u64 *)((void *)tg + cft->private) = ctx.v; + *(u64 *)((void *)tg + of_cft(of)->private) = ctx.v; else - *(unsigned int *)((void *)tg + cft->private) = ctx.v; + *(unsigned int *)((void *)tg + of_cft(of)->private) = ctx.v; throtl_log(&tg->service_queue, "limit change rbps=%llu wbps=%llu riops=%u wiops=%u", @@ -1404,19 +1404,19 @@ static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft, } blkg_conf_finish(&ctx); - return 0; + return nbytes; } -static int tg_set_conf_u64(struct cgroup_subsys_state *css, struct cftype *cft, - char *buf) +static ssize_t tg_set_conf_u64(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { - return tg_set_conf(css, cft, buf, true); + return tg_set_conf(of, buf, nbytes, off, true); } -static int tg_set_conf_uint(struct cgroup_subsys_state *css, struct cftype *cft, - char *buf) +static ssize_t tg_set_conf_uint(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { - return tg_set_conf(css, cft, buf, false); + return tg_set_conf(of, buf, nbytes, off, false); } static struct cftype throtl_files[] = { @@ -1424,25 +1424,25 @@ static struct cftype throtl_files[] = { .name = "throttle.read_bps_device", .private = offsetof(struct throtl_grp, bps[READ]), .seq_show = tg_print_conf_u64, - .write_string = tg_set_conf_u64, + .write = tg_set_conf_u64, }, { .name = "throttle.write_bps_device", .private = offsetof(struct throtl_grp, bps[WRITE]), .seq_show = tg_print_conf_u64, - .write_string = tg_set_conf_u64, + .write = tg_set_conf_u64, }, { .name = "throttle.read_iops_device", .private = offsetof(struct throtl_grp, iops[READ]), .seq_show = tg_print_conf_uint, - .write_string = tg_set_conf_uint, + .write = tg_set_conf_uint, }, { .name = "throttle.write_iops_device", .private = offsetof(struct throtl_grp, iops[WRITE]), .seq_show = tg_print_conf_uint, - .write_string = tg_set_conf_uint, + .write = tg_set_conf_uint, }, { .name = "throttle.io_service_bytes", diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index e0985f1955e7..a73020b8c9af 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1670,11 +1670,11 @@ static int cfq_print_leaf_weight(struct seq_file *sf, void *v) return 0; } -static int __cfqg_set_weight_device(struct cgroup_subsys_state *css, - struct cftype *cft, const char *buf, - bool is_leaf_weight) +static ssize_t __cfqg_set_weight_device(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off, + bool is_leaf_weight) { - struct blkcg *blkcg = css_to_blkcg(css); + struct blkcg *blkcg = css_to_blkcg(of_css(of)); struct blkg_conf_ctx ctx; struct cfq_group *cfqg; int ret; @@ -1697,19 +1697,19 @@ static int __cfqg_set_weight_device(struct cgroup_subsys_state *css, } blkg_conf_finish(&ctx); - return ret; + return ret ?: nbytes; } -static int cfqg_set_weight_device(struct cgroup_subsys_state *css, - struct cftype *cft, char *buf) +static ssize_t cfqg_set_weight_device(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { - return __cfqg_set_weight_device(css, cft, buf, false); + return __cfqg_set_weight_device(of, buf, nbytes, off, false); } -static int cfqg_set_leaf_weight_device(struct cgroup_subsys_state *css, - struct cftype *cft, char *buf) +static ssize_t cfqg_set_leaf_weight_device(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { - return __cfqg_set_weight_device(css, cft, buf, true); + return __cfqg_set_weight_device(of, buf, nbytes, off, true); } static int __cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft, @@ -1837,7 +1837,7 @@ static struct cftype cfq_blkcg_files[] = { .name = "weight_device", .flags = CFTYPE_ONLY_ON_ROOT, .seq_show = cfqg_print_leaf_weight_device, - .write_string = cfqg_set_leaf_weight_device, + .write = cfqg_set_leaf_weight_device, }, { .name = "weight", @@ -1851,7 +1851,7 @@ static struct cftype cfq_blkcg_files[] = { .name = "weight_device", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = cfqg_print_weight_device, - .write_string = cfqg_set_weight_device, + .write = cfqg_set_weight_device, }, { .name = "weight", @@ -1863,7 +1863,7 @@ static struct cftype cfq_blkcg_files[] = { { .name = "leaf_weight_device", .seq_show = cfqg_print_leaf_weight_device, - .write_string = cfqg_set_leaf_weight_device, + .write = cfqg_set_leaf_weight_device, }, { .name = "leaf_weight", diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c5a170ca4a48..aecdc84fe128 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -453,8 +453,7 @@ struct cftype { /* * The maximum length of string, excluding trailing nul, that can - * be passed to write_string. If < PAGE_SIZE-1, PAGE_SIZE-1 is - * assumed. + * be passed to write. If < PAGE_SIZE-1, PAGE_SIZE-1 is assumed. */ size_t max_write_len; @@ -500,13 +499,6 @@ struct cftype { int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft, s64 val); - /* - * write_string() is passed a nul-terminated kernelspace - * buffer of maximum length determined by max_write_len. - * Returns 0 or -ve error code. - */ - int (*write_string)(struct cgroup_subsys_state *css, struct cftype *cft, - char *buffer); /* * trigger() callback can be used to get some kick from the * userspace, when the actual string written is not important diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a16f91d12f4e..2a88ce7b24b6 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1035,7 +1035,7 @@ static umode_t cgroup_file_mode(const struct cftype *cft) mode |= S_IRUGO; if (cft->write_u64 || cft->write_s64 || cft->write || - cft->write_string || cft->trigger) + cft->trigger) mode |= S_IWUSR; return mode; @@ -2352,20 +2352,21 @@ static int cgroup_procs_write(struct cgroup_subsys_state *css, return attach_task_by_pid(css->cgroup, tgid, true); } -static int cgroup_release_agent_write(struct cgroup_subsys_state *css, - struct cftype *cft, char *buffer) +static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { - struct cgroup_root *root = css->cgroup->root; + struct cgroup *cgrp = of_css(of)->cgroup; + struct cgroup_root *root = cgrp->root; BUILD_BUG_ON(sizeof(root->release_agent_path) < PATH_MAX); - if (!cgroup_lock_live_group(css->cgroup)) + if (!cgroup_lock_live_group(cgrp)) return -ENODEV; spin_lock(&release_agent_path_lock); - strlcpy(root->release_agent_path, buffer, + strlcpy(root->release_agent_path, strstrip(buf), sizeof(root->release_agent_path)); spin_unlock(&release_agent_path_lock); mutex_unlock(&cgroup_mutex); - return 0; + return nbytes; } static int cgroup_release_agent_show(struct seq_file *seq, void *v) @@ -2530,21 +2531,22 @@ out_finish: } /* change the enabled child controllers for a cgroup in the default hierarchy */ -static int cgroup_subtree_control_write(struct cgroup_subsys_state *dummy_css, - struct cftype *cft, char *buffer) +static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, + loff_t off) { unsigned int enable = 0, disable = 0; - struct cgroup *cgrp = dummy_css->cgroup, *child; + struct cgroup *cgrp = of_css(of)->cgroup, *child; struct cgroup_subsys *ss; - char *tok, *p; + char *tok; int ssid, ret; /* * Parse input - space separated list of subsystem names prefixed * with either + or -. */ - p = buffer; - while ((tok = strsep(&p, " "))) { + buf = strstrip(buf); + while ((tok = strsep(&buf, " "))) { if (tok[0] == '\0') continue; for_each_subsys(ss, ssid) { @@ -2692,7 +2694,7 @@ out_unlock_tree: out_unbreak: kernfs_unbreak_active_protection(cgrp->control_kn); cgroup_put(cgrp); - return ret; + return ret ?: nbytes; err_undo_css: cgrp->child_subsys_mask &= ~enable; @@ -2738,9 +2740,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, css = cgroup_css(cgrp, cft->ss); rcu_read_unlock(); - if (cft->write_string) { - ret = cft->write_string(css, cft, strstrip(buf)); - } else if (cft->write_u64) { + if (cft->write_u64) { unsigned long long v; ret = kstrtoull(buf, 0, &v); if (!ret) @@ -3984,7 +3984,7 @@ static struct cftype cgroup_base_files[] = { .name = "cgroup.subtree_control", .flags = CFTYPE_ONLY_ON_DFL, .seq_show = cgroup_subtree_control_show, - .write_string = cgroup_subtree_control_write, + .write = cgroup_subtree_control_write, }, { .name = "cgroup.populated", @@ -4018,7 +4018,7 @@ static struct cftype cgroup_base_files[] = { .name = "release_agent", .flags = CFTYPE_INSANE | CFTYPE_ONLY_ON_ROOT, .seq_show = cgroup_release_agent_show, - .write_string = cgroup_release_agent_write, + .write = cgroup_release_agent_write, .max_write_len = PATH_MAX - 1, }, { } /* terminate */ diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 0398f7e9ac81..6b4e60e33a9a 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -73,10 +73,6 @@ bool cgroup_freezing(struct task_struct *task) return ret; } -/* - * cgroups_write_string() limits the size of freezer state strings to - * CGROUP_LOCAL_BUFFER_SIZE - */ static const char *freezer_state_strs(unsigned int state) { if (state & CGROUP_FROZEN) @@ -423,20 +419,22 @@ static void freezer_change_state(struct freezer *freezer, bool freeze) mutex_unlock(&freezer_mutex); } -static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft, - char *buffer) +static ssize_t freezer_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { bool freeze; - if (strcmp(buffer, freezer_state_strs(0)) == 0) + buf = strstrip(buf); + + if (strcmp(buf, freezer_state_strs(0)) == 0) freeze = false; - else if (strcmp(buffer, freezer_state_strs(CGROUP_FROZEN)) == 0) + else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0) freeze = true; else return -EINVAL; - freezer_change_state(css_freezer(css), freeze); - return 0; + freezer_change_state(css_freezer(of_css(of)), freeze); + return nbytes; } static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css, @@ -460,7 +458,7 @@ static struct cftype files[] = { .name = "state", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = freezer_read, - .write_string = freezer_write, + .write = freezer_write, }, { .name = "self_freezing", diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 37ca0a5c226d..2f4b08b8db24 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1603,13 +1603,15 @@ out_unlock: /* * Common handling for a write to a "cpus" or "mems" file. */ -static int cpuset_write_resmask(struct cgroup_subsys_state *css, - struct cftype *cft, char *buf) +static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { - struct cpuset *cs = css_cs(css); + struct cpuset *cs = css_cs(of_css(of)); struct cpuset *trialcs; int retval = -ENODEV; + buf = strstrip(buf); + /* * CPU or memory hotunplug may leave @cs w/o any execution * resources, in which case the hotplug code asynchronously updates @@ -1633,7 +1635,7 @@ static int cpuset_write_resmask(struct cgroup_subsys_state *css, goto out_unlock; } - switch (cft->private) { + switch (of_cft(of)->private) { case FILE_CPULIST: retval = update_cpumask(cs, trialcs, buf); break; @@ -1648,7 +1650,7 @@ static int cpuset_write_resmask(struct cgroup_subsys_state *css, free_trial_cpuset(trialcs); out_unlock: mutex_unlock(&cpuset_mutex); - return retval; + return retval ?: nbytes; } /* @@ -1750,7 +1752,7 @@ static struct cftype files[] = { { .name = "cpus", .seq_show = cpuset_common_seq_show, - .write_string = cpuset_write_resmask, + .write = cpuset_write_resmask, .max_write_len = (100U + 6 * NR_CPUS), .private = FILE_CPULIST, }, @@ -1758,7 +1760,7 @@ static struct cftype files[] = { { .name = "mems", .seq_show = cpuset_common_seq_show, - .write_string = cpuset_write_resmask, + .write = cpuset_write_resmask, .max_write_len = (100U + 6 * MAX_NUMNODES), .private = FILE_MEMLIST, }, diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 372f1adca491..191de26b0148 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -253,15 +253,16 @@ static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css, return res_counter_read_u64(&h_cg->hugepage[idx], name); } -static int hugetlb_cgroup_write(struct cgroup_subsys_state *css, - struct cftype *cft, char *buffer) +static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { int idx, name, ret; unsigned long long val; - struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); + struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); - idx = MEMFILE_IDX(cft->private); - name = MEMFILE_ATTR(cft->private); + buf = strstrip(buf); + idx = MEMFILE_IDX(of_cft(of)->private); + name = MEMFILE_ATTR(of_cft(of)->private); switch (name) { case RES_LIMIT: @@ -271,7 +272,7 @@ static int hugetlb_cgroup_write(struct cgroup_subsys_state *css, break; } /* This function does all necessary parse...reuse it */ - ret = res_counter_memparse_write_strategy(buffer, &val); + ret = res_counter_memparse_write_strategy(buf, &val); if (ret) break; ret = res_counter_set_limit(&h_cg->hugepage[idx], val); @@ -280,7 +281,7 @@ static int hugetlb_cgroup_write(struct cgroup_subsys_state *css, ret = -EINVAL; break; } - return ret; + return ret ?: nbytes; } static int hugetlb_cgroup_reset(struct cgroup_subsys_state *css, @@ -331,7 +332,7 @@ static void __init __hugetlb_cgroup_file_init(int idx) snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf); cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT); cft->read_u64 = hugetlb_cgroup_read_u64; - cft->write_string = hugetlb_cgroup_write; + cft->write = hugetlb_cgroup_write; /* Add the usage file */ cft = &h->cgroup_files[1]; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 5cf3246314a2..7098a43f7447 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5143,17 +5143,18 @@ static int memcg_update_kmem_limit(struct mem_cgroup *memcg, * The user of this function is... * RES_LIMIT. */ -static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, - char *buffer) +static ssize_t mem_cgroup_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { - struct mem_cgroup *memcg = mem_cgroup_from_css(css); + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); enum res_type type; int name; unsigned long long val; int ret; - type = MEMFILE_TYPE(cft->private); - name = MEMFILE_ATTR(cft->private); + buf = strstrip(buf); + type = MEMFILE_TYPE(of_cft(of)->private); + name = MEMFILE_ATTR(of_cft(of)->private); switch (name) { case RES_LIMIT: @@ -5162,7 +5163,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, break; } /* This function does all necessary parse...reuse it */ - ret = res_counter_memparse_write_strategy(buffer, &val); + ret = res_counter_memparse_write_strategy(buf, &val); if (ret) break; if (type == _MEM) @@ -5175,7 +5176,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, return -EINVAL; break; case RES_SOFT_LIMIT: - ret = res_counter_memparse_write_strategy(buffer, &val); + ret = res_counter_memparse_write_strategy(buf, &val); if (ret) break; /* @@ -5192,7 +5193,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, ret = -EINVAL; /* should be BUG() ? */ break; } - return ret; + return ret ?: nbytes; } static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg, @@ -5964,9 +5965,10 @@ static void memcg_event_ptable_queue_proc(struct file *file, * Input must be in format ' '. * Interpretation of args is defined by control file implementation. */ -static int memcg_write_event_control(struct cgroup_subsys_state *css, - struct cftype *cft, char *buffer) +static ssize_t memcg_write_event_control(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { + struct cgroup_subsys_state *css = of_css(of); struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup_event *event; struct cgroup_subsys_state *cfile_css; @@ -5977,15 +5979,17 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css, char *endp; int ret; - efd = simple_strtoul(buffer, &endp, 10); + buf = strstrip(buf); + + efd = simple_strtoul(buf, &endp, 10); if (*endp != ' ') return -EINVAL; - buffer = endp + 1; + buf = endp + 1; - cfd = simple_strtoul(buffer, &endp, 10); + cfd = simple_strtoul(buf, &endp, 10); if ((*endp != ' ') && (*endp != '\0')) return -EINVAL; - buffer = endp + 1; + buf = endp + 1; event = kzalloc(sizeof(*event), GFP_KERNEL); if (!event) @@ -6063,7 +6067,7 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css, goto out_put_cfile; } - ret = event->register_event(memcg, event->eventfd, buffer); + ret = event->register_event(memcg, event->eventfd, buf); if (ret) goto out_put_css; @@ -6076,7 +6080,7 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css, fdput(cfile); fdput(efile); - return 0; + return nbytes; out_put_css: css_put(css); @@ -6107,13 +6111,13 @@ static struct cftype mem_cgroup_files[] = { { .name = "limit_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT), - .write_string = mem_cgroup_write, + .write = mem_cgroup_write, .read_u64 = mem_cgroup_read_u64, }, { .name = "soft_limit_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT), - .write_string = mem_cgroup_write, + .write = mem_cgroup_write, .read_u64 = mem_cgroup_read_u64, }, { @@ -6138,7 +6142,7 @@ static struct cftype mem_cgroup_files[] = { }, { .name = "cgroup.event_control", /* XXX: for compat */ - .write_string = memcg_write_event_control, + .write = memcg_write_event_control, .flags = CFTYPE_NO_PREFIX, .mode = S_IWUGO, }, @@ -6171,7 +6175,7 @@ static struct cftype mem_cgroup_files[] = { { .name = "kmem.limit_in_bytes", .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT), - .write_string = mem_cgroup_write, + .write = mem_cgroup_write, .read_u64 = mem_cgroup_read_u64, }, { @@ -6217,7 +6221,7 @@ static struct cftype memsw_cgroup_files[] = { { .name = "memsw.limit_in_bytes", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), - .write_string = mem_cgroup_write, + .write = mem_cgroup_write, .read_u64 = mem_cgroup_read_u64, }, { diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 3825f669147b..b990cefd906b 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -185,15 +185,15 @@ static int read_priomap(struct seq_file *sf, void *v) return 0; } -static int write_priomap(struct cgroup_subsys_state *css, struct cftype *cft, - char *buffer) +static ssize_t write_priomap(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { char devname[IFNAMSIZ + 1]; struct net_device *dev; u32 prio; int ret; - if (sscanf(buffer, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2) + if (sscanf(buf, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2) return -EINVAL; dev = dev_get_by_name(&init_net, devname); @@ -202,11 +202,11 @@ static int write_priomap(struct cgroup_subsys_state *css, struct cftype *cft, rtnl_lock(); - ret = netprio_set_prio(css, dev, prio); + ret = netprio_set_prio(of_css(of), dev, prio); rtnl_unlock(); dev_put(dev); - return ret; + return ret ?: nbytes; } static int update_netprio(const void *v, struct file *file, unsigned n) @@ -239,7 +239,7 @@ static struct cftype ss_files[] = { { .name = "ifpriomap", .seq_show = read_priomap, - .write_string = write_priomap, + .write = write_priomap, }, { } /* terminate */ }; diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index d4f015ad6c84..841fd3fa937a 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -102,17 +102,19 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) return 0; } -static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, - char *buffer) +static ssize_t tcp_cgroup_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { - struct mem_cgroup *memcg = mem_cgroup_from_css(css); + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); unsigned long long val; int ret = 0; - switch (cft->private) { + buf = strstrip(buf); + + switch (of_cft(of)->private) { case RES_LIMIT: /* see memcontrol.c */ - ret = res_counter_memparse_write_strategy(buffer, &val); + ret = res_counter_memparse_write_strategy(buf, &val); if (ret) break; ret = tcp_update_limit(memcg, val); @@ -121,7 +123,7 @@ static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, ret = -EINVAL; break; } - return ret; + return ret ?: nbytes; } static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val) @@ -193,7 +195,7 @@ static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event) static struct cftype tcp_files[] = { { .name = "kmem.tcp.limit_in_bytes", - .write_string = tcp_cgroup_write, + .write = tcp_cgroup_write, .read_u64 = tcp_cgroup_read, .private = RES_LIMIT, }, diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 9134dbf70d3e..7dbac4061b1c 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -767,27 +767,27 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, return rc; } -static int devcgroup_access_write(struct cgroup_subsys_state *css, - struct cftype *cft, char *buffer) +static ssize_t devcgroup_access_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { int retval; mutex_lock(&devcgroup_mutex); - retval = devcgroup_update_access(css_to_devcgroup(css), - cft->private, buffer); + retval = devcgroup_update_access(css_to_devcgroup(of_css(of)), + of_cft(of)->private, strstrip(buf)); mutex_unlock(&devcgroup_mutex); - return retval; + return retval ?: nbytes; } static struct cftype dev_cgroup_files[] = { { .name = "allow", - .write_string = devcgroup_access_write, + .write = devcgroup_access_write, .private = DEVCG_ALLOW, }, { .name = "deny", - .write_string = devcgroup_access_write, + .write = devcgroup_access_write, .private = DEVCG_DENY, }, { -- cgit v1.2.3-71-gd317 From 6770c64e5c8da4705d1f0973bdeb5c2bf4f3a404 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 May 2014 12:16:21 -0400 Subject: cgroup: replace cftype->trigger() with cftype->write() cftype->trigger() is pointless. It's trivial to ignore the input buffer from a regular ->write() operation. Convert all ->trigger() users to ->write() and remove ->trigger(). This patch doesn't introduce any visible behavior changes. Signed-off-by: Tejun Heo Acked-by: Li Zefan Cc: Johannes Weiner Cc: Michal Hocko --- include/linux/cgroup.h | 8 -------- kernel/cgroup.c | 5 +---- mm/hugetlb_cgroup.c | 16 ++++++++-------- mm/memcontrol.c | 34 ++++++++++++++++++---------------- net/ipv4/tcp_memcontrol.c | 15 ++++++++------- 5 files changed, 35 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index aecdc84fe128..08eb71ee600b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -499,14 +499,6 @@ struct cftype { int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft, s64 val); - /* - * trigger() callback can be used to get some kick from the - * userspace, when the actual string written is not important - * at all. The private field can be used to determine the - * kick type for multiplexing. - */ - int (*trigger)(struct cgroup_subsys_state *css, unsigned int event); - /* * write() is the generic write callback which maps directly to * kernfs write operation and overrides all other operations. diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2a88ce7b24b6..2f16aab03493 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1034,8 +1034,7 @@ static umode_t cgroup_file_mode(const struct cftype *cft) if (cft->read_u64 || cft->read_s64 || cft->seq_show) mode |= S_IRUGO; - if (cft->write_u64 || cft->write_s64 || cft->write || - cft->trigger) + if (cft->write_u64 || cft->write_s64 || cft->write) mode |= S_IWUSR; return mode; @@ -2750,8 +2749,6 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, ret = kstrtoll(buf, 0, &v); if (!ret) ret = cft->write_s64(css, cft, v); - } else if (cft->trigger) { - ret = cft->trigger(css, (unsigned int)cft->private); } else { ret = -EINVAL; } diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 191de26b0148..a380681ab3cf 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -284,14 +284,14 @@ static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of, return ret ?: nbytes; } -static int hugetlb_cgroup_reset(struct cgroup_subsys_state *css, - unsigned int event) +static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { int idx, name, ret = 0; - struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); + struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); - idx = MEMFILE_IDX(event); - name = MEMFILE_ATTR(event); + idx = MEMFILE_IDX(of_cft(of)->private); + name = MEMFILE_ATTR(of_cft(of)->private); switch (name) { case RES_MAX_USAGE: @@ -304,7 +304,7 @@ static int hugetlb_cgroup_reset(struct cgroup_subsys_state *css, ret = -EINVAL; break; } - return ret; + return ret ?: nbytes; } static char *mem_fmt(char *buf, int size, unsigned long hsize) @@ -344,14 +344,14 @@ static void __init __hugetlb_cgroup_file_init(int idx) cft = &h->cgroup_files[2]; snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf); cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE); - cft->trigger = hugetlb_cgroup_reset; + cft->write = hugetlb_cgroup_reset; cft->read_u64 = hugetlb_cgroup_read_u64; /* Add the failcntfile */ cft = &h->cgroup_files[3]; snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf); cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT); - cft->trigger = hugetlb_cgroup_reset; + cft->write = hugetlb_cgroup_reset; cft->read_u64 = hugetlb_cgroup_read_u64; /* NULL terminate the last cft */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7098a43f7447..b638a79209ee 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4887,14 +4887,15 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg) return 0; } -static int mem_cgroup_force_empty_write(struct cgroup_subsys_state *css, - unsigned int event) +static ssize_t mem_cgroup_force_empty_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, + loff_t off) { - struct mem_cgroup *memcg = mem_cgroup_from_css(css); + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); if (mem_cgroup_is_root(memcg)) return -EINVAL; - return mem_cgroup_force_empty(memcg); + return mem_cgroup_force_empty(memcg) ?: nbytes; } static u64 mem_cgroup_hierarchy_read(struct cgroup_subsys_state *css, @@ -5220,14 +5221,15 @@ out: *memsw_limit = min_memsw_limit; } -static int mem_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event) +static ssize_t mem_cgroup_reset(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) { - struct mem_cgroup *memcg = mem_cgroup_from_css(css); + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); int name; enum res_type type; - type = MEMFILE_TYPE(event); - name = MEMFILE_ATTR(event); + type = MEMFILE_TYPE(of_cft(of)->private); + name = MEMFILE_ATTR(of_cft(of)->private); switch (name) { case RES_MAX_USAGE: @@ -5252,7 +5254,7 @@ static int mem_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event) break; } - return 0; + return nbytes; } static u64 mem_cgroup_move_charge_read(struct cgroup_subsys_state *css, @@ -6105,7 +6107,7 @@ static struct cftype mem_cgroup_files[] = { { .name = "max_usage_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE), - .trigger = mem_cgroup_reset, + .write = mem_cgroup_reset, .read_u64 = mem_cgroup_read_u64, }, { @@ -6123,7 +6125,7 @@ static struct cftype mem_cgroup_files[] = { { .name = "failcnt", .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), - .trigger = mem_cgroup_reset, + .write = mem_cgroup_reset, .read_u64 = mem_cgroup_read_u64, }, { @@ -6132,7 +6134,7 @@ static struct cftype mem_cgroup_files[] = { }, { .name = "force_empty", - .trigger = mem_cgroup_force_empty_write, + .write = mem_cgroup_force_empty_write, }, { .name = "use_hierarchy", @@ -6186,13 +6188,13 @@ static struct cftype mem_cgroup_files[] = { { .name = "kmem.failcnt", .private = MEMFILE_PRIVATE(_KMEM, RES_FAILCNT), - .trigger = mem_cgroup_reset, + .write = mem_cgroup_reset, .read_u64 = mem_cgroup_read_u64, }, { .name = "kmem.max_usage_in_bytes", .private = MEMFILE_PRIVATE(_KMEM, RES_MAX_USAGE), - .trigger = mem_cgroup_reset, + .write = mem_cgroup_reset, .read_u64 = mem_cgroup_read_u64, }, #ifdef CONFIG_SLABINFO @@ -6215,7 +6217,7 @@ static struct cftype memsw_cgroup_files[] = { { .name = "memsw.max_usage_in_bytes", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), - .trigger = mem_cgroup_reset, + .write = mem_cgroup_reset, .read_u64 = mem_cgroup_read_u64, }, { @@ -6227,7 +6229,7 @@ static struct cftype memsw_cgroup_files[] = { { .name = "memsw.failcnt", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), - .trigger = mem_cgroup_reset, + .write = mem_cgroup_reset, .read_u64 = mem_cgroup_read_u64, }, { }, /* terminate */ diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 841fd3fa937a..f7a2ec3ac584 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -170,17 +170,18 @@ static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft) return val; } -static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event) +static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { struct mem_cgroup *memcg; struct cg_proto *cg_proto; - memcg = mem_cgroup_from_css(css); + memcg = mem_cgroup_from_css(of_css(of)); cg_proto = tcp_prot.proto_cgroup(memcg); if (!cg_proto) - return 0; + return nbytes; - switch (event) { + switch (of_cft(of)->private) { case RES_MAX_USAGE: res_counter_reset_max(&cg_proto->memory_allocated); break; @@ -189,7 +190,7 @@ static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event) break; } - return 0; + return nbytes; } static struct cftype tcp_files[] = { @@ -207,13 +208,13 @@ static struct cftype tcp_files[] = { { .name = "kmem.tcp.failcnt", .private = RES_FAILCNT, - .trigger = tcp_cgroup_reset, + .write = tcp_cgroup_reset, .read_u64 = tcp_cgroup_read, }, { .name = "kmem.tcp.max_usage_in_bytes", .private = RES_MAX_USAGE, - .trigger = tcp_cgroup_reset, + .write = tcp_cgroup_reset, .read_u64 = tcp_cgroup_read, }, { } /* terminate */ -- cgit v1.2.3-71-gd317 From b7fc5ad235936379fae67a9f7b50bb53487a1a3a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 May 2014 12:16:22 -0400 Subject: cgroup: remove cgroup->control_kn Now that cgroup_subtree_control_write() has access to the associated kernfs_open_file and thus the kernfs_node, there's no need to cache it in cgroup->control_kn on creation. Remove cgroup->control_kn and use @of->kn directly. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 1 - kernel/cgroup.c | 8 +++----- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 08eb71ee600b..aa7353deaaf3 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -177,7 +177,6 @@ struct cgroup { struct cgroup *parent; /* my parent */ struct kernfs_node *kn; /* cgroup kernfs entry */ - struct kernfs_node *control_kn; /* kn for "cgroup.subtree_control" */ struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */ /* diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9a48c117ebf1..94d259bcd2b9 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2580,7 +2580,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, * active_ref protection. */ cgroup_get(cgrp); - kernfs_break_active_protection(cgrp->control_kn); + kernfs_break_active_protection(of->kn); mutex_lock(&cgroup_tree_mutex); @@ -2697,7 +2697,7 @@ out_unlock: out_unlock_tree: mutex_unlock(&cgroup_tree_mutex); out_unbreak: - kernfs_unbreak_active_protection(cgrp->control_kn); + kernfs_unbreak_active_protection(of->kn); cgroup_put(cgrp); return ret ?: nbytes; @@ -2887,9 +2887,7 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft) return ret; } - if (cft->seq_show == cgroup_subtree_control_show) - cgrp->control_kn = kn; - else if (cft->seq_show == cgroup_populated_show) + if (cft->seq_show == cgroup_populated_show) cgrp->populated_kn = kn; return 0; } -- cgit v1.2.3-71-gd317 From 7ad24ea4bf620a32631d7b3069c3e30c078b0c3e Mon Sep 17 00:00:00 2001 From: Wilfried Klaebe Date: Sun, 11 May 2014 00:12:32 +0000 Subject: net: get rid of SET_ETHTOOL_OPS net: get rid of SET_ETHTOOL_OPS Dave Miller mentioned he'd like to see SET_ETHTOOL_OPS gone. This does that. Mostly done via coccinelle script: @@ struct ethtool_ops *ops; struct net_device *dev; @@ - SET_ETHTOOL_OPS(dev, ops); + dev->ethtool_ops = ops; Compile tested only, but I'd seriously wonder if this broke anything. Suggested-by: Dave Miller Signed-off-by: Wilfried Klaebe Acked-by: Felipe Balbi Signed-off-by: David S. Miller --- drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 2 +- drivers/net/ethernet/3com/3c509.c | 2 +- drivers/net/ethernet/3com/3c589_cs.c | 2 +- drivers/net/ethernet/3com/typhoon.c | 2 +- drivers/net/ethernet/adaptec/starfire.c | 2 +- drivers/net/ethernet/alteon/acenic.c | 2 +- drivers/net/ethernet/altera/altera_tse_ethtool.c | 2 +- drivers/net/ethernet/amd/amd8111e.c | 2 +- drivers/net/ethernet/amd/au1000_eth.c | 2 +- drivers/net/ethernet/amd/nmclan_cs.c | 2 +- drivers/net/ethernet/atheros/alx/main.c | 2 +- drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c | 2 +- drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c | 2 +- drivers/net/ethernet/atheros/atlx/atl2.c | 2 +- drivers/net/ethernet/broadcom/b44.c | 2 +- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 4 ++-- drivers/net/ethernet/broadcom/bcmsysport.c | 2 +- drivers/net/ethernet/broadcom/bgmac.c | 2 +- drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 6 ++---- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 2 +- drivers/net/ethernet/brocade/bna/bnad_ethtool.c | 2 +- drivers/net/ethernet/calxeda/xgmac.c | 2 +- drivers/net/ethernet/chelsio/cxgb/cxgb2.c | 2 +- drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 2 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 2 +- drivers/net/ethernet/cisco/enic/enic_ethtool.c | 2 +- drivers/net/ethernet/dec/tulip/tulip_core.c | 2 +- drivers/net/ethernet/dlink/dl2k.c | 2 +- drivers/net/ethernet/dlink/sundance.c | 2 +- drivers/net/ethernet/emulex/benet/be_main.c | 2 +- drivers/net/ethernet/faraday/ftgmac100.c | 2 +- drivers/net/ethernet/faraday/ftmac100.c | 2 +- drivers/net/ethernet/freescale/ucc_geth_ethtool.c | 2 +- drivers/net/ethernet/fujitsu/fmvj18x_cs.c | 2 +- drivers/net/ethernet/ibm/ehea/ehea_ethtool.c | 2 +- drivers/net/ethernet/ibm/emac/core.c | 2 +- drivers/net/ethernet/icplus/ipg.c | 2 +- drivers/net/ethernet/intel/e100.c | 2 +- drivers/net/ethernet/intel/e1000/e1000_ethtool.c | 2 +- drivers/net/ethernet/intel/e1000e/ethtool.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 2 +- drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c | 2 +- drivers/net/ethernet/intel/igb/igb_ethtool.c | 2 +- drivers/net/ethernet/intel/igbvf/ethtool.c | 2 +- drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 2 +- drivers/net/ethernet/intel/ixgbevf/ethtool.c | 2 +- drivers/net/ethernet/marvell/mv643xx_eth.c | 2 +- drivers/net/ethernet/marvell/mvneta.c | 2 +- drivers/net/ethernet/marvell/pxa168_eth.c | 2 +- drivers/net/ethernet/marvell/sky2.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +- drivers/net/ethernet/micrel/ks8695net.c | 6 +++--- drivers/net/ethernet/micrel/ks8851.c | 2 +- drivers/net/ethernet/micrel/ksz884x.c | 2 +- drivers/net/ethernet/microchip/enc28j60.c | 2 +- drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 2 +- drivers/net/ethernet/natsemi/natsemi.c | 2 +- drivers/net/ethernet/natsemi/ns83820.c | 2 +- drivers/net/ethernet/neterion/s2io.c | 2 +- drivers/net/ethernet/neterion/vxge/vxge-ethtool.c | 2 +- drivers/net/ethernet/nvidia/forcedeth.c | 2 +- drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c | 2 +- drivers/net/ethernet/packetengines/hamachi.c | 6 ++---- drivers/net/ethernet/packetengines/yellowfin.c | 2 +- drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c | 2 +- drivers/net/ethernet/qlogic/qla3xxx.c | 2 +- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 8 +++----- drivers/net/ethernet/qlogic/qlge/qlge_main.c | 2 +- drivers/net/ethernet/realtek/r8169.c | 2 +- drivers/net/ethernet/renesas/sh_eth.c | 2 +- drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c | 2 +- drivers/net/ethernet/sfc/efx.c | 2 +- drivers/net/ethernet/sis/sis190.c | 2 +- drivers/net/ethernet/smsc/smc91c92_cs.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 2 +- drivers/net/ethernet/tehuti/tehuti.c | 2 +- drivers/net/ethernet/ti/cpsw.c | 4 ++-- drivers/net/ethernet/ti/davinci_emac.c | 2 +- drivers/net/hyperv/netvsc_drv.c | 2 +- drivers/net/ntb_netdev.c | 2 +- drivers/net/rionet.c | 2 +- drivers/net/usb/catc.c | 2 +- drivers/net/usb/hso.c | 2 +- drivers/net/usb/ipheth.c | 2 +- drivers/net/usb/kaweth.c | 2 +- drivers/net/usb/pegasus.c | 2 +- drivers/net/usb/r8152.c | 2 +- drivers/net/usb/rtl8150.c | 2 +- drivers/net/virtio_net.c | 2 +- drivers/net/vmxnet3/vmxnet3_ethtool.c | 2 +- drivers/net/vxlan.c | 2 +- drivers/net/wireless/hostap/hostap_main.c | 2 +- drivers/net/xen-netback/interface.c | 2 +- drivers/net/xen-netfront.c | 2 +- drivers/s390/net/qeth_l2_main.c | 7 +++---- drivers/s390/net/qeth_l3_main.c | 2 +- drivers/staging/et131x/et131x.c | 2 +- drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c | 2 +- drivers/staging/netlogic/xlr_net.c | 2 +- drivers/staging/octeon/ethernet.c | 2 +- drivers/usb/gadget/u_ether.c | 4 ++-- include/linux/netdevice.h | 3 --- net/batman-adv/soft-interface.c | 2 +- net/bridge/br_device.c | 2 +- net/dsa/slave.c | 2 +- net/openvswitch/vport-internal_dev.c | 2 +- 108 files changed, 118 insertions(+), 128 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index c4b3940845e6..078cadd6c797 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -105,5 +105,5 @@ static const struct ethtool_ops ipoib_ethtool_ops = { void ipoib_set_ethtool_ops(struct net_device *dev) { - SET_ETHTOOL_OPS(dev, &ipoib_ethtool_ops); + dev->ethtool_ops = &ipoib_ethtool_ops; } diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c index 35df0b9e6848..a968654b631d 100644 --- a/drivers/net/ethernet/3com/3c509.c +++ b/drivers/net/ethernet/3com/3c509.c @@ -534,7 +534,7 @@ static int el3_common_init(struct net_device *dev) /* The EL3-specific entries in the device structure. */ dev->netdev_ops = &netdev_ops; dev->watchdog_timeo = TX_TIMEOUT; - SET_ETHTOOL_OPS(dev, ðtool_ops); + dev->ethtool_ops = ðtool_ops; err = register_netdev(dev); if (err) { diff --git a/drivers/net/ethernet/3com/3c589_cs.c b/drivers/net/ethernet/3com/3c589_cs.c index 063557e037f2..f18647c23559 100644 --- a/drivers/net/ethernet/3com/3c589_cs.c +++ b/drivers/net/ethernet/3com/3c589_cs.c @@ -218,7 +218,7 @@ static int tc589_probe(struct pcmcia_device *link) dev->netdev_ops = &el3_netdev_ops; dev->watchdog_timeo = TX_TIMEOUT; - SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops); + dev->ethtool_ops = &netdev_ethtool_ops; return tc589_config(link); } diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c index 465cc7108d8a..e13b04624ded 100644 --- a/drivers/net/ethernet/3com/typhoon.c +++ b/drivers/net/ethernet/3com/typhoon.c @@ -2435,7 +2435,7 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netif_napi_add(dev, &tp->napi, typhoon_poll, 16); dev->watchdog_timeo = TX_TIMEOUT; - SET_ETHTOOL_OPS(dev, &typhoon_ethtool_ops); + dev->ethtool_ops = &typhoon_ethtool_ops; /* We can handle scatter gather, up to 16 entries, and * we can do IP checksumming (only version 4, doh...) diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c index 171d73c1d3c2..40dbbf740331 100644 --- a/drivers/net/ethernet/adaptec/starfire.c +++ b/drivers/net/ethernet/adaptec/starfire.c @@ -784,7 +784,7 @@ static int starfire_init_one(struct pci_dev *pdev, dev->netdev_ops = &netdev_ops; dev->watchdog_timeo = TX_TIMEOUT; - SET_ETHTOOL_OPS(dev, ðtool_ops); + dev->ethtool_ops = ðtool_ops; netif_napi_add(dev, &np->napi, netdev_poll, max_interrupt_work); diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c index 1517e9df5ba1..9a6991be9749 100644 --- a/drivers/net/ethernet/alteon/acenic.c +++ b/drivers/net/ethernet/alteon/acenic.c @@ -476,7 +476,7 @@ static int acenic_probe_one(struct pci_dev *pdev, dev->watchdog_timeo = 5*HZ; dev->netdev_ops = &ace_netdev_ops; - SET_ETHTOOL_OPS(dev, &ace_ethtool_ops); + dev->ethtool_ops = &ace_ethtool_ops; /* we only display this string ONCE */ if (!boards_found) diff --git a/drivers/net/ethernet/altera/altera_tse_ethtool.c b/drivers/net/ethernet/altera/altera_tse_ethtool.c index 76133caffa78..d817e285b266 100644 --- a/drivers/net/ethernet/altera/altera_tse_ethtool.c +++ b/drivers/net/ethernet/altera/altera_tse_ethtool.c @@ -237,5 +237,5 @@ static const struct ethtool_ops tse_ethtool_ops = { void altera_tse_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &tse_ethtool_ops); + netdev->ethtool_ops = &tse_ethtool_ops; } diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index 26efaaa5e73f..068dc7cad5fa 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -1900,7 +1900,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev, /* Initialize driver entry points */ dev->netdev_ops = &amd8111e_netdev_ops; - SET_ETHTOOL_OPS(dev, &ops); + dev->ethtool_ops = &ops; dev->irq =pdev->irq; dev->watchdog_timeo = AMD8111E_TX_TIMEOUT; netif_napi_add(dev, &lp->napi, amd8111e_rx_poll, 32); diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index a2bd91e3d302..a78e4c136959 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -1229,7 +1229,7 @@ static int au1000_probe(struct platform_device *pdev) dev->base_addr = base->start; dev->irq = irq; dev->netdev_ops = &au1000_netdev_ops; - SET_ETHTOOL_OPS(dev, &au1000_ethtool_ops); + dev->ethtool_ops = &au1000_ethtool_ops; dev->watchdog_timeo = ETH_TX_TIMEOUT; /* diff --git a/drivers/net/ethernet/amd/nmclan_cs.c b/drivers/net/ethernet/amd/nmclan_cs.c index 08569fe2b182..abf3b1581c82 100644 --- a/drivers/net/ethernet/amd/nmclan_cs.c +++ b/drivers/net/ethernet/amd/nmclan_cs.c @@ -457,7 +457,7 @@ static int nmclan_probe(struct pcmcia_device *link) lp->tx_free_frames=AM2150_MAX_TX_FRAMES; dev->netdev_ops = &mace_netdev_ops; - SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops); + dev->ethtool_ops = &netdev_ethtool_ops; dev->watchdog_timeo = TX_TIMEOUT; return nmclan_config(link); diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 17bb9ce96260..49faa97a30c3 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1302,7 +1302,7 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } netdev->netdev_ops = &alx_netdev_ops; - SET_ETHTOOL_OPS(netdev, &alx_ethtool_ops); + netdev->ethtool_ops = &alx_ethtool_ops; netdev->irq = pdev->irq; netdev->watchdog_timeo = ALX_WATCHDOG_TIME; diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c b/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c index 859ea844ba0f..ecacaaeb2b92 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c @@ -305,5 +305,5 @@ static const struct ethtool_ops atl1c_ethtool_ops = { void atl1c_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &atl1c_ethtool_ops); + netdev->ethtool_ops = &atl1c_ethtool_ops; } diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c b/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c index 82b23861bf55..206e9b7be431 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c @@ -388,5 +388,5 @@ static const struct ethtool_ops atl1e_ethtool_ops = { void atl1e_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &atl1e_ethtool_ops); + netdev->ethtool_ops = &atl1e_ethtool_ops; } diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c index 78befb522a52..2587fed7b02c 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.c +++ b/drivers/net/ethernet/atheros/atlx/atl2.c @@ -1396,7 +1396,7 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) atl2_setup_pcicmd(pdev); netdev->netdev_ops = &atl2_netdev_ops; - SET_ETHTOOL_OPS(netdev, &atl2_ethtool_ops); + netdev->ethtool_ops = &atl2_ethtool_ops; netdev->watchdog_timeo = 5 * HZ; strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 05ba62589017..ca5a20a48b14 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2380,7 +2380,7 @@ static int b44_init_one(struct ssb_device *sdev, netif_napi_add(dev, &bp->napi, b44_poll, 64); dev->watchdog_timeo = B44_TX_TIMEOUT; dev->irq = sdev->irq; - SET_ETHTOOL_OPS(dev, &b44_ethtool_ops); + dev->ethtool_ops = &b44_ethtool_ops; err = ssb_bus_powerup(sdev->bus, 0); if (err) { diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 8db34d389675..3e8d1a88ed3d 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1897,7 +1897,7 @@ static int bcm_enet_probe(struct platform_device *pdev) dev->netdev_ops = &bcm_enet_ops; netif_napi_add(dev, &priv->napi, bcm_enet_poll, 16); - SET_ETHTOOL_OPS(dev, &bcm_enet_ethtool_ops); + dev->ethtool_ops = &bcm_enet_ethtool_ops; SET_NETDEV_DEV(dev, &pdev->dev); ret = register_netdev(dev); @@ -2783,7 +2783,7 @@ static int bcm_enetsw_probe(struct platform_device *pdev) /* register netdevice */ dev->netdev_ops = &bcm_enetsw_ops; netif_napi_add(dev, &priv->napi, bcm_enet_poll, 16); - SET_ETHTOOL_OPS(dev, &bcm_enetsw_ethtool_ops); + dev->ethtool_ops = &bcm_enetsw_ethtool_ops; SET_NETDEV_DEV(dev, &pdev->dev); spin_lock_init(&priv->enetsw_mdio_lock); diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 4dc8d1e9829b..56b74a495181 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1540,7 +1540,7 @@ static int bcm_sysport_probe(struct platform_device *pdev) SET_NETDEV_DEV(dev, &pdev->dev); dev_set_drvdata(&pdev->dev, dev); - SET_ETHTOOL_OPS(dev, &bcm_sysport_ethtool_ops); + dev->ethtool_ops = &bcm_sysport_ethtool_ops; dev->netdev_ops = &bcm_sysport_netdev_ops; netif_napi_add(dev, &priv->napi, bcm_sysport_poll, 64); diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 0297a79a38e1..05c6af6c418f 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1436,7 +1436,7 @@ static int bgmac_probe(struct bcma_device *core) return -ENOMEM; net_dev->netdev_ops = &bgmac_netdev_ops; net_dev->irq = core->irq; - SET_ETHTOOL_OPS(net_dev, &bgmac_ethtool_ops); + net_dev->ethtool_ops = &bgmac_ethtool_ops; bgmac = netdev_priv(net_dev); bgmac->net_dev = net_dev; bgmac->core = core; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index b6de05e3149b..03224090ecf9 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -3506,8 +3506,6 @@ static const struct ethtool_ops bnx2x_vf_ethtool_ops = { void bnx2x_set_ethtool_ops(struct bnx2x *bp, struct net_device *netdev) { - if (IS_PF(bp)) - SET_ETHTOOL_OPS(netdev, &bnx2x_ethtool_ops); - else /* vf */ - SET_ETHTOOL_OPS(netdev, &bnx2x_vf_ethtool_ops); + netdev->ethtool_ops = (IS_PF(bp)) ? + &bnx2x_ethtool_ops : &bnx2x_vf_ethtool_ops; } diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 0966bd04375f..5ba1cfbd60da 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2481,7 +2481,7 @@ static int bcmgenet_probe(struct platform_device *pdev) dev_set_drvdata(&pdev->dev, dev); ether_addr_copy(dev->dev_addr, macaddr); dev->watchdog_timeo = 2 * HZ; - SET_ETHTOOL_OPS(dev, &bcmgenet_ethtool_ops); + dev->ethtool_ops = &bcmgenet_ethtool_ops; dev->netdev_ops = &bcmgenet_netdev_ops; netif_napi_add(dev, &priv->napi, bcmgenet_poll, 64); diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c index f9e150825bb5..adca62b72837 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c +++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c @@ -1137,5 +1137,5 @@ static const struct ethtool_ops bnad_ethtool_ops = { void bnad_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &bnad_ethtool_ops); + netdev->ethtool_ops = &bnad_ethtool_ops; } diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index 521dfea44b83..25d6b2a10e4e 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -1737,7 +1737,7 @@ static int xgmac_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ndev); ether_setup(ndev); ndev->netdev_ops = &xgmac_netdev_ops; - SET_ETHTOOL_OPS(ndev, &xgmac_ethtool_ops); + ndev->ethtool_ops = &xgmac_ethtool_ops; spin_lock_init(&priv->stats_lock); INIT_WORK(&priv->tx_timeout_work, xgmac_tx_timeout_work); diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c index 0fe7ff750d77..c1b2c1dbf015 100644 --- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c +++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c @@ -1100,7 +1100,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netif_napi_add(netdev, &adapter->napi, t1_poll, 64); - SET_ETHTOOL_OPS(netdev, &t1_ethtool_ops); + netdev->ethtool_ops = &t1_ethtool_ops; } if (t1_init_sw_modules(adapter, bi) < 0) { diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 07bbb711b7e5..3ed507947248 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -3291,7 +3291,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->features |= NETIF_F_HIGHDMA; netdev->netdev_ops = &cxgb_netdev_ops; - SET_ETHTOOL_OPS(netdev, &cxgb_ethtool_ops); + netdev->ethtool_ops = &cxgb_ethtool_ops; } pci_set_drvdata(pdev, adapter); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 0f1e886d89e3..266a5bc6aedf 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -6083,7 +6083,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->priv_flags |= IFF_UNICAST_FLT; netdev->netdev_ops = &cxgb4_netdev_ops; - SET_ETHTOOL_OPS(netdev, &cxgb_ethtool_ops); + netdev->ethtool_ops = &cxgb_ethtool_ops; } pci_set_drvdata(pdev, adapter); diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 52859288de7b..ff1cdd1788b5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -2664,7 +2664,7 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev, netdev->priv_flags |= IFF_UNICAST_FLT; netdev->netdev_ops = &cxgb4vf_netdev_ops; - SET_ETHTOOL_OPS(netdev, &cxgb4vf_ethtool_ops); + netdev->ethtool_ops = &cxgb4vf_ethtool_ops; /* * Initialize the hardware/software state for the port. diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index 47e3562f4866..58a8c67638e3 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -253,5 +253,5 @@ static const struct ethtool_ops enic_ethtool_ops = { void enic_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &enic_ethtool_ops); + netdev->ethtool_ops = &enic_ethtool_ops; } diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index 1642de78aac8..861660841ce2 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -1703,7 +1703,7 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) #ifdef CONFIG_TULIP_NAPI netif_napi_add(dev, &tp->napi, tulip_poll, 16); #endif - SET_ETHTOOL_OPS(dev, &ops); + dev->ethtool_ops = &ops; if (register_netdev(dev)) goto err_out_free_ring; diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index 4fb756d219f7..2324f2ddfd48 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -227,7 +227,7 @@ rio_probe1 (struct pci_dev *pdev, const struct pci_device_id *ent) } dev->netdev_ops = &netdev_ops; dev->watchdog_timeo = TX_TIMEOUT; - SET_ETHTOOL_OPS(dev, ðtool_ops); + dev->ethtool_ops = ðtool_ops; #if 0 dev->features = NETIF_F_IP_CSUM; #endif diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index d9e5ca0d48c1..433c1e185442 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -577,7 +577,7 @@ static int sundance_probe1(struct pci_dev *pdev, /* The chip-specific entries in the device structure. */ dev->netdev_ops = &netdev_ops; - SET_ETHTOOL_OPS(dev, ðtool_ops); + dev->ethtool_ops = ðtool_ops; dev->watchdog_timeo = TX_TIMEOUT; pci_set_drvdata(pdev, dev); diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 3f04356afa82..dcc5e5c69743 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4304,7 +4304,7 @@ static void be_netdev_init(struct net_device *netdev) netdev->netdev_ops = &be_netdev_ops; - SET_ETHTOOL_OPS(netdev, &be_ethtool_ops); + netdev->ethtool_ops = &be_ethtool_ops; } static void be_unmap_pci_bars(struct be_adapter *adapter) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 68069eabc4f8..c77fa4a69844 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1210,7 +1210,7 @@ static int ftgmac100_probe(struct platform_device *pdev) SET_NETDEV_DEV(netdev, &pdev->dev); - SET_ETHTOOL_OPS(netdev, &ftgmac100_ethtool_ops); + netdev->ethtool_ops = &ftgmac100_ethtool_ops; netdev->netdev_ops = &ftgmac100_netdev_ops; netdev->features = NETIF_F_IP_CSUM | NETIF_F_GRO; diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c index 8be5b40c0a12..4ff1adc6bfca 100644 --- a/drivers/net/ethernet/faraday/ftmac100.c +++ b/drivers/net/ethernet/faraday/ftmac100.c @@ -1085,7 +1085,7 @@ static int ftmac100_probe(struct platform_device *pdev) } SET_NETDEV_DEV(netdev, &pdev->dev); - SET_ETHTOOL_OPS(netdev, &ftmac100_ethtool_ops); + netdev->ethtool_ops = &ftmac100_ethtool_ops; netdev->netdev_ops = &ftmac100_netdev_ops; platform_set_drvdata(pdev, netdev); diff --git a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c index 413329eff2ff..cc83350d56ba 100644 --- a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c +++ b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c @@ -417,5 +417,5 @@ static const struct ethtool_ops uec_ethtool_ops = { void uec_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &uec_ethtool_ops); + netdev->ethtool_ops = &uec_ethtool_ops; } diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c index 7becab1aa3e4..cfe7a7431730 100644 --- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c +++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c @@ -256,7 +256,7 @@ static int fmvj18x_probe(struct pcmcia_device *link) dev->netdev_ops = &fjn_netdev_ops; dev->watchdog_timeo = TX_TIMEOUT; - SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops); + dev->ethtool_ops = &netdev_ethtool_ops; return fmvj18x_config(link); } /* fmvj18x_attach */ diff --git a/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c b/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c index 95837b99a464..6055e3eaf49c 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c @@ -278,5 +278,5 @@ static const struct ethtool_ops ehea_ethtool_ops = { void ehea_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &ehea_ethtool_ops); + netdev->ethtool_ops = &ehea_ethtool_ops; } diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index ae342fdb42c8..87bd953cc2ee 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -2879,7 +2879,7 @@ static int emac_probe(struct platform_device *ofdev) dev->commac.ops = &emac_commac_sg_ops; } else ndev->netdev_ops = &emac_netdev_ops; - SET_ETHTOOL_OPS(ndev, &emac_ethtool_ops); + ndev->ethtool_ops = &emac_ethtool_ops; netif_carrier_off(ndev); diff --git a/drivers/net/ethernet/icplus/ipg.c b/drivers/net/ethernet/icplus/ipg.c index 25045ae07171..5727779a7df2 100644 --- a/drivers/net/ethernet/icplus/ipg.c +++ b/drivers/net/ethernet/icplus/ipg.c @@ -2245,7 +2245,7 @@ static int ipg_probe(struct pci_dev *pdev, const struct pci_device_id *id) */ dev->netdev_ops = &ipg_netdev_ops; SET_NETDEV_DEV(dev, &pdev->dev); - SET_ETHTOOL_OPS(dev, &ipg_ethtool_ops); + dev->ethtool_ops = &ipg_ethtool_ops; rc = pci_request_regions(pdev, DRV_NAME); if (rc) diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index b56461ce674c..9d979d7debef 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -2854,7 +2854,7 @@ static int e100_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features |= NETIF_F_RXALL; netdev->netdev_ops = &e100_netdev_ops; - SET_ETHTOOL_OPS(netdev, &e100_ethtool_ops); + netdev->ethtool_ops = &e100_ethtool_ops; netdev->watchdog_timeo = E100_WATCHDOG_PERIOD; strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index 73a8aeefb92a..341889a4ef7f 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -1905,5 +1905,5 @@ static const struct ethtool_ops e1000_ethtool_ops = { void e1000_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &e1000_ethtool_ops); + netdev->ethtool_ops = &e1000_ethtool_ops; } diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index 4e5ad7ebe1f2..e9a48bb5caac 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -2318,5 +2318,5 @@ static const struct ethtool_ops e1000_ethtool_ops = { void e1000e_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &e1000_ethtool_ops); + netdev->ethtool_ops = &e1000_ethtool_ops; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 0cf47c958081..f62929419a09 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1700,5 +1700,5 @@ static const struct ethtool_ops i40e_ethtool_ops = { void i40e_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &i40e_ethtool_ops); + netdev->ethtool_ops = &i40e_ethtool_ops; } diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index a46be016039e..77e786d2d0e0 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -705,5 +705,5 @@ static struct ethtool_ops i40evf_ethtool_ops = { **/ void i40evf_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &i40evf_ethtool_ops); + netdev->ethtool_ops = &i40evf_ethtool_ops; } diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 333a2b0bbada..a84297c85fb1 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -3035,5 +3035,5 @@ static const struct ethtool_ops igb_ethtool_ops = { void igb_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &igb_ethtool_ops); + netdev->ethtool_ops = &igb_ethtool_ops; } diff --git a/drivers/net/ethernet/intel/igbvf/ethtool.c b/drivers/net/ethernet/intel/igbvf/ethtool.c index 90eef07943f4..f58170bae18b 100644 --- a/drivers/net/ethernet/intel/igbvf/ethtool.c +++ b/drivers/net/ethernet/intel/igbvf/ethtool.c @@ -476,5 +476,5 @@ static const struct ethtool_ops igbvf_ethtool_ops = { void igbvf_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &igbvf_ethtool_ops); + netdev->ethtool_ops = &igbvf_ethtool_ops; } diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c b/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c index dbb7dd2f8e36..1da2d987d370 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c @@ -656,5 +656,5 @@ static const struct ethtool_ops ixgb_ethtool_ops = { void ixgb_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &ixgb_ethtool_ops); + netdev->ethtool_ops = &ixgb_ethtool_ops; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 6c55c14d082a..31d7268401e7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -3099,5 +3099,5 @@ static const struct ethtool_ops ixgbe_ethtool_ops = { void ixgbe_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &ixgbe_ethtool_ops); + netdev->ethtool_ops = &ixgbe_ethtool_ops; } diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index 1baecb60f065..a757f0734719 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -813,5 +813,5 @@ static const struct ethtool_ops ixgbevf_ethtool_ops = { void ixgbevf_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &ixgbevf_ethtool_ops); + netdev->ethtool_ops = &ixgbevf_ethtool_ops; } diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index b7b8d74c22d9..df1d1b97187f 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2889,7 +2889,7 @@ static int mv643xx_eth_probe(struct platform_device *pdev) if (err) goto out; - SET_ETHTOOL_OPS(dev, &mv643xx_eth_ethtool_ops); + dev->ethtool_ops = &mv643xx_eth_ethtool_ops; init_pscr(mp, pd->speed, pd->duplex); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 14786c8bf99e..72bc47f2d2e9 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2813,7 +2813,7 @@ static int mvneta_probe(struct platform_device *pdev) dev->watchdog_timeo = 5 * HZ; dev->netdev_ops = &mvneta_netdev_ops; - SET_ETHTOOL_OPS(dev, &mvneta_eth_tool_ops); + dev->ethtool_ops = &mvneta_eth_tool_ops; pp = netdev_priv(dev); diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index b358c2f6f4bd..8f5aa7c62b18 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1488,7 +1488,7 @@ static int pxa168_eth_probe(struct platform_device *pdev) dev->netdev_ops = &pxa168_eth_netdev_ops; dev->watchdog_timeo = 2 * HZ; dev->base_addr = 0; - SET_ETHTOOL_OPS(dev, &pxa168_ethtool_ops); + dev->ethtool_ops = &pxa168_ethtool_ops; INIT_WORK(&pep->tx_timeout_task, pxa168_eth_tx_timeout_task); diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index b81106451a0a..69693384b58c 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4760,7 +4760,7 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port, SET_NETDEV_DEV(dev, &hw->pdev->dev); dev->irq = hw->pdev->irq; - SET_ETHTOOL_OPS(dev, &sky2_ethtool_ops); + dev->ethtool_ops = &sky2_ethtool_ops; dev->watchdog_timeo = TX_WATCHDOG; dev->netdev_ops = &sky2_netdev_ops[port]; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index fba3c8e77626..79c6f467d17e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2539,7 +2539,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, netif_set_real_num_tx_queues(dev, priv->tx_ring_num); netif_set_real_num_rx_queues(dev, priv->rx_ring_num); - SET_ETHTOOL_OPS(dev, &mlx4_en_ethtool_ops); + dev->ethtool_ops = &mlx4_en_ethtool_ops; /* * Set driver features diff --git a/drivers/net/ethernet/micrel/ks8695net.c b/drivers/net/ethernet/micrel/ks8695net.c index 16435b3cfa9f..6c7c78baedca 100644 --- a/drivers/net/ethernet/micrel/ks8695net.c +++ b/drivers/net/ethernet/micrel/ks8695net.c @@ -1504,15 +1504,15 @@ ks8695_probe(struct platform_device *pdev) if (ksp->phyiface_regs && ksp->link_irq == -1) { ks8695_init_switch(ksp); ksp->dtype = KS8695_DTYPE_LAN; - SET_ETHTOOL_OPS(ndev, &ks8695_ethtool_ops); + ndev->ethtool_ops = &ks8695_ethtool_ops; } else if (ksp->phyiface_regs && ksp->link_irq != -1) { ks8695_init_wan_phy(ksp); ksp->dtype = KS8695_DTYPE_WAN; - SET_ETHTOOL_OPS(ndev, &ks8695_wan_ethtool_ops); + ndev->ethtool_ops = &ks8695_wan_ethtool_ops; } else { /* No initialisation since HPNA does not have a PHY */ ksp->dtype = KS8695_DTYPE_HPNA; - SET_ETHTOOL_OPS(ndev, &ks8695_ethtool_ops); + ndev->ethtool_ops = &ks8695_ethtool_ops; } /* And bring up the net_device with the net core */ diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c index e0c92e0e5e1d..13767eb36a48 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851.c @@ -1471,7 +1471,7 @@ static int ks8851_probe(struct spi_device *spi) skb_queue_head_init(&ks->txq); - SET_ETHTOOL_OPS(ndev, &ks8851_ethtool_ops); + ndev->ethtool_ops = &ks8851_ethtool_ops; SET_NETDEV_DEV(ndev, &spi->dev); spi_set_drvdata(spi, ks); diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c index 14ac0e2bc09f..4b9592c1fb40 100644 --- a/drivers/net/ethernet/micrel/ksz884x.c +++ b/drivers/net/ethernet/micrel/ksz884x.c @@ -7106,7 +7106,7 @@ static int pcidev_init(struct pci_dev *pdev, const struct pci_device_id *id) } dev->netdev_ops = &netdev_ops; - SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops); + dev->ethtool_ops = &netdev_ethtool_ops; if (register_netdev(dev)) goto pcidev_init_reg_err; port_set_power_saving(port, true); diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c index c7b40aa21f22..b1b5f66b8b69 100644 --- a/drivers/net/ethernet/microchip/enc28j60.c +++ b/drivers/net/ethernet/microchip/enc28j60.c @@ -1593,7 +1593,7 @@ static int enc28j60_probe(struct spi_device *spi) dev->irq = spi->irq; dev->netdev_ops = &enc28j60_netdev_ops; dev->watchdog_timeo = TX_TIMEOUT; - SET_ETHTOOL_OPS(dev, &enc28j60_ethtool_ops); + dev->ethtool_ops = &enc28j60_ethtool_ops; enc28j60_lowpower(priv, true); diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 130f6b204efa..f3d5d79f1cd1 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -4112,7 +4112,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) setup_timer(&mgp->watchdog_timer, myri10ge_watchdog_timer, (unsigned long)mgp); - SET_ETHTOOL_OPS(netdev, &myri10ge_ethtool_ops); + netdev->ethtool_ops = &myri10ge_ethtool_ops; INIT_WORK(&mgp->watchdog_work, myri10ge_watchdog); status = register_netdev(netdev); if (status != 0) { diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index 64ec2a437f46..291fba8b9f07 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -927,7 +927,7 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) dev->netdev_ops = &natsemi_netdev_ops; dev->watchdog_timeo = TX_TIMEOUT; - SET_ETHTOOL_OPS(dev, ðtool_ops); + dev->ethtool_ops = ðtool_ops; if (mtu) dev->mtu = mtu; diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index dbccf1de49ec..19bb8244b9e3 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -2030,7 +2030,7 @@ static int ns83820_init_one(struct pci_dev *pci_dev, pci_dev->subsystem_vendor, pci_dev->subsystem_device); ndev->netdev_ops = &netdev_ops; - SET_ETHTOOL_OPS(ndev, &ops); + ndev->ethtool_ops = &ops; ndev->watchdog_timeo = 5 * HZ; pci_set_drvdata(pci_dev, ndev); diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index e900c1abdef7..e3cf38e6ce3c 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -7910,7 +7910,7 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) /* Driver entry points */ dev->netdev_ops = &s2io_netdev_ops; - SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops); + dev->ethtool_ops = &netdev_ethtool_ops; dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_RXCSUM | NETIF_F_LRO; diff --git a/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c b/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c index f8f073880f84..ddcc81ad1ae1 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c @@ -1128,5 +1128,5 @@ static const struct ethtool_ops vxge_ethtool_ops = { void vxge_initialize_ethtool_ops(struct net_device *ndev) { - SET_ETHTOOL_OPS(ndev, &vxge_ethtool_ops); + ndev->ethtool_ops = &vxge_ethtool_ops; } diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index fddb464aeab3..e8235c5c5e69 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -5766,7 +5766,7 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) dev->netdev_ops = &nv_netdev_ops_optimized; netif_napi_add(dev, &np->napi, nv_napi_poll, RX_WORK_PER_LOOP); - SET_ETHTOOL_OPS(dev, &ops); + dev->ethtool_ops = &ops; dev->watchdog_timeo = NV_WATCHDOG_TIMEO; pci_set_drvdata(pci_dev, dev); diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c index 826f0ccdc23c..114d2fe52cc2 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c @@ -508,5 +508,5 @@ static const struct ethtool_ops pch_gbe_ethtool_ops = { void pch_gbe_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &pch_gbe_ethtool_ops); + netdev->ethtool_ops = &pch_gbe_ethtool_ops; } diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c index b6bdeb3c1971..9a997e4c3e08 100644 --- a/drivers/net/ethernet/packetengines/hamachi.c +++ b/drivers/net/ethernet/packetengines/hamachi.c @@ -724,10 +724,8 @@ static int hamachi_init_one(struct pci_dev *pdev, /* The Hamachi-specific entries in the device structure. */ dev->netdev_ops = &hamachi_netdev_ops; - if (chip_tbl[hmp->chip_id].flags & CanHaveMII) - SET_ETHTOOL_OPS(dev, ðtool_ops); - else - SET_ETHTOOL_OPS(dev, ðtool_ops_no_mii); + dev->ethtool_ops = (chip_tbl[hmp->chip_id].flags & CanHaveMII) ? + ðtool_ops : ðtool_ops_no_mii; dev->watchdog_timeo = TX_TIMEOUT; if (mtu) dev->mtu = mtu; diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c index 9a6cb482dcd0..69a8dc095072 100644 --- a/drivers/net/ethernet/packetengines/yellowfin.c +++ b/drivers/net/ethernet/packetengines/yellowfin.c @@ -472,7 +472,7 @@ static int yellowfin_init_one(struct pci_dev *pdev, /* The Yellowfin-specific entries in the device structure. */ dev->netdev_ops = &netdev_ops; - SET_ETHTOOL_OPS(dev, ðtool_ops); + dev->ethtool_ops = ðtool_ops; dev->watchdog_timeo = TX_TIMEOUT; if (mtu) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index f09c35d669b3..5bf05818a12c 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -1373,7 +1373,7 @@ netxen_setup_netdev(struct netxen_adapter *adapter, netxen_nic_change_mtu(netdev, netdev->mtu); - SET_ETHTOOL_OPS(netdev, &netxen_nic_ethtool_ops); + netdev->ethtool_ops = &netxen_nic_ethtool_ops; netdev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO | NETIF_F_RXCSUM; diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 2eabd44f8914..b5d6bc1a8b00 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -3838,7 +3838,7 @@ static int ql3xxx_probe(struct pci_dev *pdev, /* Set driver entry points */ ndev->netdev_ops = &ql3xxx_netdev_ops; - SET_ETHTOOL_OPS(ndev, &ql3xxx_ethtool_ops); + ndev->ethtool_ops = &ql3xxx_ethtool_ops; ndev->watchdog_timeo = 5 * HZ; netif_napi_add(ndev, &qdev->napi, ql_poll, 64); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 8a2aeb85e320..f0a285359e66 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -2265,10 +2265,8 @@ qlcnic_setup_netdev(struct qlcnic_adapter *adapter, struct net_device *netdev, qlcnic_change_mtu(netdev, netdev->mtu); - if (qlcnic_sriov_vf_check(adapter)) - SET_ETHTOOL_OPS(netdev, &qlcnic_sriov_vf_ethtool_ops); - else - SET_ETHTOOL_OPS(netdev, &qlcnic_ethtool_ops); + netdev->ethtool_ops = (qlcnic_sriov_vf_check(adapter)) ? + &qlcnic_sriov_vf_ethtool_ops : &qlcnic_ethtool_ops; netdev->features |= (NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_IPV6_CSUM | NETIF_F_GRO | @@ -2682,7 +2680,7 @@ err_out_disable_pdev: err_out_maintenance_mode: set_bit(__QLCNIC_MAINTENANCE_MODE, &adapter->state); netdev->netdev_ops = &qlcnic_netdev_failed_ops; - SET_ETHTOOL_OPS(netdev, &qlcnic_ethtool_failed_ops); + netdev->ethtool_ops = &qlcnic_ethtool_failed_ops; ahw->port_type = QLCNIC_XGBE; if (qlcnic_83xx_check(adapter)) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index 6e36fe14b848..b40050e03a56 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -4770,7 +4770,7 @@ static int qlge_probe(struct pci_dev *pdev, ndev->irq = pdev->irq; ndev->netdev_ops = &qlge_netdev_ops; - SET_ETHTOOL_OPS(ndev, &qlge_ethtool_ops); + ndev->ethtool_ops = &qlge_ethtool_ops; ndev->watchdog_timeo = 10 * HZ; err = register_netdev(ndev); diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index aa1c079f231d..be425ad5e824 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -7125,7 +7125,7 @@ rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) for (i = 0; i < ETH_ALEN; i++) dev->dev_addr[i] = RTL_R8(MAC0 + i); - SET_ETHTOOL_OPS(dev, &rtl8169_ethtool_ops); + dev->ethtool_ops = &rtl8169_ethtool_ops; dev->watchdog_timeo = RTL8169_TX_TIMEOUT; netif_napi_add(dev, &tp->napi, rtl8169_poll, R8169_NAPI_WEIGHT); diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 6a9509ccd33b..967314cade95 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2843,7 +2843,7 @@ static int sh_eth_drv_probe(struct platform_device *pdev) ndev->netdev_ops = &sh_eth_netdev_ops_tsu; else ndev->netdev_ops = &sh_eth_netdev_ops; - SET_ETHTOOL_OPS(ndev, &sh_eth_ethtool_ops); + ndev->ethtool_ops = &sh_eth_ethtool_ops; ndev->watchdog_timeo = TX_TIMEOUT; /* debug message level */ diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c index 0415fa50eeb7..c0981ae45874 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c @@ -520,5 +520,5 @@ static const struct ethtool_ops sxgbe_ethtool_ops = { void sxgbe_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &sxgbe_ethtool_ops); + netdev->ethtool_ops = &sxgbe_ethtool_ops; } diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 63d595fd3cc5..1e274045970f 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -2248,7 +2248,7 @@ static int efx_register_netdev(struct efx_nic *efx) } else { net_dev->netdev_ops = &efx_farch_netdev_ops; } - SET_ETHTOOL_OPS(net_dev, &efx_ethtool_ops); + net_dev->ethtool_ops = &efx_ethtool_ops; net_dev->gso_max_segs = EFX_TSO_MAX_SEGS; rtnl_lock(); diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c index acbbe48a519c..a86339903b9b 100644 --- a/drivers/net/ethernet/sis/sis190.c +++ b/drivers/net/ethernet/sis/sis190.c @@ -1877,7 +1877,7 @@ static int sis190_init_one(struct pci_dev *pdev, dev->netdev_ops = &sis190_netdev_ops; - SET_ETHTOOL_OPS(dev, &sis190_ethtool_ops); + dev->ethtool_ops = &sis190_ethtool_ops; dev->watchdog_timeo = SIS190_TX_TIMEOUT; spin_lock_init(&tp->lock); diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c index c7a4868571f9..6b33127ab352 100644 --- a/drivers/net/ethernet/smsc/smc91c92_cs.c +++ b/drivers/net/ethernet/smsc/smc91c92_cs.c @@ -318,7 +318,7 @@ static int smc91c92_probe(struct pcmcia_device *link) /* The SMC91c92-specific entries in the device structure. */ dev->netdev_ops = &smc_netdev_ops; - SET_ETHTOOL_OPS(dev, ðtool_ops); + dev->ethtool_ops = ðtool_ops; dev->watchdog_timeo = TX_TIMEOUT; smc->mii_if.dev = dev; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index c5f9cb85c8ef..c963394ded6c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -784,5 +784,5 @@ static const struct ethtool_ops stmmac_ethtool_ops = { void stmmac_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &stmmac_ethtool_ops); + netdev->ethtool_ops = &stmmac_ethtool_ops; } diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c index 2ead87759ab4..38da73a2a886 100644 --- a/drivers/net/ethernet/tehuti/tehuti.c +++ b/drivers/net/ethernet/tehuti/tehuti.c @@ -2413,7 +2413,7 @@ static void bdx_set_ethtool_ops(struct net_device *netdev) .get_ethtool_stats = bdx_get_ethtool_stats, }; - SET_ETHTOOL_OPS(netdev, &bdx_ethtool_ops); + netdev->ethtool_ops = &bdx_ethtool_ops; } /** diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 91499be03c6f..e3d871055d63 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1998,7 +1998,7 @@ static int cpsw_probe_dual_emac(struct platform_device *pdev, ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; ndev->netdev_ops = &cpsw_netdev_ops; - SET_ETHTOOL_OPS(ndev, &cpsw_ethtool_ops); + ndev->ethtool_ops = &cpsw_ethtool_ops; netif_napi_add(ndev, &priv_sl2->napi, cpsw_poll, CPSW_POLL_WEIGHT); /* register the network device */ @@ -2227,7 +2227,7 @@ static int cpsw_probe(struct platform_device *pdev) ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; ndev->netdev_ops = &cpsw_netdev_ops; - SET_ETHTOOL_OPS(ndev, &cpsw_ethtool_ops); + ndev->ethtool_ops = &cpsw_ethtool_ops; netif_napi_add(ndev, &priv->napi, cpsw_poll, CPSW_POLL_WEIGHT); /* register the network device */ diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 8f0e69ce07ca..e76eae541151 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1980,7 +1980,7 @@ static int davinci_emac_probe(struct platform_device *pdev) } ndev->netdev_ops = &emac_netdev_ops; - SET_ETHTOOL_OPS(ndev, ðtool_ops); + ndev->ethtool_ops = ðtool_ops; netif_napi_add(ndev, &priv->napi, emac_poll, EMAC_POLL_WEIGHT); /* register the network device */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 1de3ef5dd5d2..2e967a7bdb33 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -810,7 +810,7 @@ static int netvsc_probe(struct hv_device *dev, net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO; - SET_ETHTOOL_OPS(net, ðtool_ops); + net->ethtool_ops = ðtool_ops; SET_NETDEV_DEV(net, &dev->device); /* Notify the netvsc driver of the new device */ diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index 63aa9d9e34c5..27536aa89199 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -348,7 +348,7 @@ static int ntb_netdev_probe(struct pci_dev *pdev) memcpy(ndev->dev_addr, ndev->perm_addr, ndev->addr_len); ndev->netdev_ops = &ntb_netdev_ops; - SET_ETHTOOL_OPS(ndev, &ntb_ethtool_ops); + ndev->ethtool_ops = &ntb_ethtool_ops; dev->qp = ntb_transport_create_queue(ndev, pdev, &ntb_netdev_handlers); if (!dev->qp) { diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index a8497183ff8b..dac7a0d9bb46 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -494,7 +494,7 @@ static int rionet_setup_netdev(struct rio_mport *mport, struct net_device *ndev) ndev->mtu = RIO_MAX_MSG_SIZE - 14; ndev->features = NETIF_F_LLTX; SET_NETDEV_DEV(ndev, &mport->dev); - SET_ETHTOOL_OPS(ndev, &rionet_ethtool_ops); + ndev->ethtool_ops = &rionet_ethtool_ops; spin_lock_init(&rnet->lock); spin_lock_init(&rnet->tx_lock); diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c index 630caf48f63a..8cfc3bb0c6a6 100644 --- a/drivers/net/usb/catc.c +++ b/drivers/net/usb/catc.c @@ -793,7 +793,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id netdev->netdev_ops = &catc_netdev_ops; netdev->watchdog_timeo = TX_TIMEOUT; - SET_ETHTOOL_OPS(netdev, &ops); + netdev->ethtool_ops = &ops; catc->usbdev = usbdev; catc->netdev = netdev; diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 660bd5ea9fc0..a3a05869309d 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2425,7 +2425,7 @@ static void hso_net_init(struct net_device *net) net->type = ARPHRD_NONE; net->mtu = DEFAULT_MTU - 14; net->tx_queue_len = 10; - SET_ETHTOOL_OPS(net, &ops); + net->ethtool_ops = &ops; /* and initialize the semaphore */ spin_lock_init(&hso_net->net_lock); diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 421934c83f1c..f72570708edb 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -524,7 +524,7 @@ static int ipheth_probe(struct usb_interface *intf, usb_set_intfdata(intf, dev); SET_NETDEV_DEV(netdev, &intf->dev); - SET_ETHTOOL_OPS(netdev, &ops); + netdev->ethtool_ops = &ops; retval = register_netdev(netdev); if (retval) { diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index a359d3bb7c5b..dcb6d33141e0 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -1171,7 +1171,7 @@ err_fw: netdev->netdev_ops = &kaweth_netdev_ops; netdev->watchdog_timeo = KAWETH_TX_TIMEOUT; netdev->mtu = le16_to_cpu(kaweth->configuration.segment_size); - SET_ETHTOOL_OPS(netdev, &ops); + netdev->ethtool_ops = &ops; /* kaweth is zeroed as part of alloc_netdev */ INIT_DELAYED_WORK(&kaweth->lowmem_work, kaweth_resubmit_tl); diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 03e8a15d7deb..f84080215915 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -1159,7 +1159,7 @@ static int pegasus_probe(struct usb_interface *intf, net->watchdog_timeo = PEGASUS_TX_TIMEOUT; net->netdev_ops = &pegasus_netdev_ops; - SET_ETHTOOL_OPS(net, &ops); + net->ethtool_ops = &ops; pegasus->mii.dev = net; pegasus->mii.mdio_read = mdio_read; pegasus->mii.mdio_write = mdio_write; diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 3fbfb0869030..9f91c7aba4b0 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3452,7 +3452,7 @@ static int rtl8152_probe(struct usb_interface *intf, NETIF_F_TSO | NETIF_F_FRAGLIST | NETIF_F_IPV6_CSUM | NETIF_F_TSO6; - SET_ETHTOOL_OPS(netdev, &ops); + netdev->ethtool_ops = &ops; netif_set_gso_max_size(netdev, RTL_LIMITED_TSO_SIZE); tp->mii.dev = netdev; diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index da2c4583bd2d..6e87e5710048 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -878,7 +878,7 @@ static int rtl8150_probe(struct usb_interface *intf, dev->netdev = netdev; netdev->netdev_ops = &rtl8150_netdev_ops; netdev->watchdog_timeo = RTL8150_TX_TIMEOUT; - SET_ETHTOOL_OPS(netdev, &ops); + netdev->ethtool_ops = &ops; dev->intr_interval = 100; /* 100ms */ if (!alloc_all_urbs(dev)) { diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index b852bb368acc..7d9f84a91f37 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1646,7 +1646,7 @@ static int virtnet_probe(struct virtio_device *vdev) dev->netdev_ops = &virtnet_netdev; dev->features = NETIF_F_HIGHDMA; - SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops); + dev->ethtool_ops = &virtnet_ethtool_ops; SET_NETDEV_DEV(dev, &vdev->dev); /* Do we support "hardware" checksums? */ diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index 600ab56c0008..00e120296e92 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -635,5 +635,5 @@ static const struct ethtool_ops vmxnet3_ethtool_ops = { void vmxnet3_set_ethtool_ops(struct net_device *netdev) { - SET_ETHTOOL_OPS(netdev, &vmxnet3_ethtool_ops); + netdev->ethtool_ops = &vmxnet3_ethtool_ops; } diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 1dfee9a7fbf7..e68c8eb4ea8e 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2716,7 +2716,7 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, return -EEXIST; } - SET_ETHTOOL_OPS(dev, &vxlan_ethtool_ops); + dev->ethtool_ops = &vxlan_ethtool_ops; /* create an fdb entry for a valid default destination */ if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) { diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c index 67db34e56d7e..52919ad42726 100644 --- a/drivers/net/wireless/hostap/hostap_main.c +++ b/drivers/net/wireless/hostap/hostap_main.c @@ -882,7 +882,7 @@ void hostap_setup_dev(struct net_device *dev, local_info_t *local, dev->mtu = local->mtu; - SET_ETHTOOL_OPS(dev, &prism2_ethtool_ops); + dev->ethtool_ops = &prism2_ethtool_ops; } diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index ef05c5c49d41..a7557331699f 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -386,7 +386,7 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6; dev->features = dev->hw_features | NETIF_F_RXCSUM; - SET_ETHTOOL_OPS(dev, &xenvif_ethtool_ops); + dev->ethtool_ops = &xenvif_ethtool_ops; dev->tx_queue_len = XENVIF_QUEUE_LENGTH; diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 158b5e639fc7..895355de8ac4 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1332,7 +1332,7 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) */ netdev->features |= netdev->hw_features; - SET_ETHTOOL_OPS(netdev, &xennet_ethtool_ops); + netdev->ethtool_ops = &xennet_ethtool_ops; SET_NETDEV_DEV(netdev, &dev->dev); netif_set_gso_max_size(netdev, XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index e232ceca38fe..5ef5b4f45758 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -969,10 +969,9 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) card->dev->watchdog_timeo = QETH_TX_TIMEOUT; card->dev->mtu = card->info.initial_mtu; card->dev->netdev_ops = &qeth_l2_netdev_ops; - if (card->info.type != QETH_CARD_TYPE_OSN) - SET_ETHTOOL_OPS(card->dev, &qeth_l2_ethtool_ops); - else - SET_ETHTOOL_OPS(card->dev, &qeth_l2_osn_ops); + card->dev->ethtool_ops = + (card->info.type != QETH_CARD_TYPE_OSN) ? + &qeth_l2_ethtool_ops : &qeth_l2_osn_ops; card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; card->info.broadcast_capable = 1; qeth_l2_request_initial_mac(card); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index bc2499a24884..c58f82af3658 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3301,7 +3301,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) card->dev->ml_priv = card; card->dev->watchdog_timeo = QETH_TX_TIMEOUT; card->dev->mtu = card->info.initial_mtu; - SET_ETHTOOL_OPS(card->dev, &qeth_l3_ethtool_ops); + card->dev->ethtool_ops = &qeth_l3_ethtool_ops; card->dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; diff --git a/drivers/staging/et131x/et131x.c b/drivers/staging/et131x/et131x.c index d329cf314360..15e0f4da3ce0 100644 --- a/drivers/staging/et131x/et131x.c +++ b/drivers/staging/et131x/et131x.c @@ -4604,7 +4604,7 @@ static int et131x_pci_setup(struct pci_dev *pdev, netdev->netdev_ops = &et131x_netdev_ops; SET_NETDEV_DEV(netdev, &pdev->dev); - SET_ETHTOOL_OPS(netdev, &et131x_ethtool_ops); + netdev->ethtool_ops = &et131x_ethtool_ops; adapter = et131x_adapter_init(netdev, pdev); diff --git a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c index d6421b9b5981..a6158bef58e5 100644 --- a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c +++ b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c @@ -2249,7 +2249,7 @@ struct net_device *init_ft1000_card(struct pcmcia_device *link, ft1000InitProc(dev); ft1000_card_present = 1; - SET_ETHTOOL_OPS(dev, &ops); + dev->ethtool_ops = &ops; printk(KERN_INFO "ft1000: %s: addr 0x%04lx irq %d, MAC addr %pM\n", dev->name, dev->base_addr, dev->irq, dev->dev_addr); return dev; diff --git a/drivers/staging/netlogic/xlr_net.c b/drivers/staging/netlogic/xlr_net.c index c83e3375104b..9d957615e32a 100644 --- a/drivers/staging/netlogic/xlr_net.c +++ b/drivers/staging/netlogic/xlr_net.c @@ -1066,7 +1066,7 @@ static int xlr_net_probe(struct platform_device *pdev) xlr_set_rx_mode(ndev); priv->num_rx_desc += MAX_NUM_DESC_SPILL; - SET_ETHTOOL_OPS(ndev, &xlr_ethtool_ops); + ndev->ethtool_ops = &xlr_ethtool_ops; SET_NETDEV_DEV(ndev, &pdev->dev); /* Common registers, do one time initialization */ diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c index ff7214aac9dd..da9dd6bc5660 100644 --- a/drivers/staging/octeon/ethernet.c +++ b/drivers/staging/octeon/ethernet.c @@ -469,7 +469,7 @@ int cvm_oct_common_init(struct net_device *dev) /* We do our own locking, Linux doesn't need to */ dev->features |= NETIF_F_LLTX; - SET_ETHTOOL_OPS(dev, &cvm_oct_ethtool_ops); + dev->ethtool_ops = &cvm_oct_ethtool_ops; cvm_oct_phy_setup_device(dev); cvm_oct_set_mac_filter(dev); diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c index b7d4f82872b7..ce8e28146162 100644 --- a/drivers/usb/gadget/u_ether.c +++ b/drivers/usb/gadget/u_ether.c @@ -793,7 +793,7 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g, net->netdev_ops = ð_netdev_ops; - SET_ETHTOOL_OPS(net, &ops); + net->ethtool_ops = &ops; dev->gadget = g; SET_NETDEV_DEV(net, &g->dev); @@ -850,7 +850,7 @@ struct net_device *gether_setup_name_default(const char *netname) net->netdev_ops = ð_netdev_ops; - SET_ETHTOOL_OPS(net, &ops); + net->ethtool_ops = &ops; SET_NETDEV_DEVTYPE(net, &gadget_type); return net; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index adc4658e9873..2dea98cbbdba 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -56,9 +56,6 @@ struct device; struct phy_device; /* 802.11 specific */ struct wireless_dev; - /* source back-compat hooks */ -#define SET_ETHTOOL_OPS(netdev,ops) \ - ( (netdev)->ethtool_ops = (ops) ) void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 744a59b85e15..e7ee65dc20bf 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -884,7 +884,7 @@ static void batadv_softif_init_early(struct net_device *dev) /* generate random address */ eth_hw_addr_random(dev); - SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops); + dev->ethtool_ops = &batadv_ethtool_ops; memset(priv, 0, sizeof(*priv)); } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 3e2da2cb72db..9212015abc8f 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -348,7 +348,7 @@ void br_dev_setup(struct net_device *dev) dev->netdev_ops = &br_netdev_ops; dev->destructor = br_dev_free; - SET_ETHTOOL_OPS(dev, &br_ethtool_ops); + dev->ethtool_ops = &br_ethtool_ops; SET_NETDEV_DEVTYPE(dev, &br_type); dev->tx_queue_len = 0; dev->priv_flags = IFF_EBRIDGE; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 02c0e1716f64..64c5af0a10dd 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -346,7 +346,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, return slave_dev; slave_dev->features = master->vlan_features; - SET_ETHTOOL_OPS(slave_dev, &dsa_slave_ethtool_ops); + slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; eth_hw_addr_inherit(slave_dev, master); slave_dev->tx_queue_len = 0; diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 729c68763fe7..789af9280e77 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -130,7 +130,7 @@ static void do_setup(struct net_device *netdev) netdev->priv_flags &= ~IFF_TX_SKB_SHARING; netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE; netdev->destructor = internal_dev_destructor; - SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops); + netdev->ethtool_ops = &internal_dev_ethtool_ops; netdev->tx_queue_len = 0; netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST | -- cgit v1.2.3-71-gd317 From 50a0ffaf75e9d2d97200b523f2c600f40e9756b1 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Sun, 11 May 2014 10:47:15 +0200 Subject: net: cdc_ncm/cdc_mbim: rework probing of NCM/MBIM functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The NCM class match in the cdc_mbim driver is confusing and cause unexpected behaviour. The USB core guarantees that a USB interface is in altsetting 0 when probing starts. This means that devices implementing a NCM 1.0 backwards compatible MBIM function (a "NCM/MBIM function") always hit the NCM entry in the cdc_mbim driver match table. Such functions will never match any of the MBIM entries. This causes unexpeced behaviour for cases where the NCM and MBIM entries are differet, which is currently the case for all except Ericsson devices. Improve the probing of NCM/MBIM functions by looking up the device again in the cdc_mbim match table after switching to the MBIM identity. The shared altsetting selection is updated to better accommodate the new probing logic, returning the preferred altsetting for the control interface instead of the data interface. The control interface altsetting update is moved to the cdc_mbim driver. It is never necessary to change the control interface altsetting for NCM. Cc: Greg Suarez Reported by: Yu-an Shih Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_mbim.c | 43 +++++++++++++++++++++++++++++++++++++++++-- drivers/net/usb/cdc_ncm.c | 27 +++++++++++++-------------- include/linux/usb/cdc_ncm.h | 2 +- 3 files changed, 55 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index 80d27719ba38..bc23273d0455 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -107,15 +107,54 @@ static const struct net_device_ops cdc_mbim_netdev_ops = { .ndo_vlan_rx_kill_vid = cdc_mbim_rx_kill_vid, }; +/* Change the control interface altsetting and update the .driver_info + * pointer if the matching entry after changing class codes points to + * a different struct + */ +static int cdc_mbim_set_ctrlalt(struct usbnet *dev, struct usb_interface *intf, u8 alt) +{ + struct usb_driver *driver = to_usb_driver(intf->dev.driver); + const struct usb_device_id *id; + struct driver_info *info; + int ret; + + ret = usb_set_interface(dev->udev, + intf->cur_altsetting->desc.bInterfaceNumber, + alt); + if (ret) + return ret; + + id = usb_match_id(intf, driver->id_table); + if (!id) + return -ENODEV; + + info = (struct driver_info *)id->driver_info; + if (info != dev->driver_info) { + dev_dbg(&intf->dev, "driver_info updated to '%s'\n", + info->description); + dev->driver_info = info; + } + return 0; +} + static int cdc_mbim_bind(struct usbnet *dev, struct usb_interface *intf) { struct cdc_ncm_ctx *ctx; struct usb_driver *subdriver = ERR_PTR(-ENODEV); int ret = -ENODEV; - u8 data_altsetting = cdc_ncm_select_altsetting(dev, intf); + u8 data_altsetting = 1; struct cdc_mbim_state *info = (void *)&dev->data; - /* Probably NCM, defer for cdc_ncm_bind */ + /* should we change control altsetting on a NCM/MBIM function? */ + if (cdc_ncm_select_altsetting(intf) == CDC_NCM_COMM_ALTSETTING_MBIM) { + data_altsetting = CDC_NCM_DATA_ALTSETTING_MBIM; + ret = cdc_mbim_set_ctrlalt(dev, intf, CDC_NCM_COMM_ALTSETTING_MBIM); + if (ret) + goto err; + ret = -ENODEV; + } + + /* we will hit this for NCM/MBIM functions if prefer_mbim is false */ if (!cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting)) goto err; diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 9a2bd11943eb..d23bca57a23f 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -541,10 +541,10 @@ void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf) } EXPORT_SYMBOL_GPL(cdc_ncm_unbind); -/* Select the MBIM altsetting iff it is preferred and available, - * returning the number of the corresponding data interface altsetting +/* Return the number of the MBIM control interface altsetting iff it + * is preferred and available, */ -u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf) +u8 cdc_ncm_select_altsetting(struct usb_interface *intf) { struct usb_host_interface *alt; @@ -563,15 +563,15 @@ u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf) * the rules given in section 6 (USB Device Model) of this * specification." */ - if (prefer_mbim && intf->num_altsetting == 2) { + if (intf->num_altsetting < 2) + return intf->cur_altsetting->desc.bAlternateSetting; + + if (prefer_mbim) { alt = usb_altnum_to_altsetting(intf, CDC_NCM_COMM_ALTSETTING_MBIM); - if (alt && cdc_ncm_comm_intf_is_mbim(alt) && - !usb_set_interface(dev->udev, - intf->cur_altsetting->desc.bInterfaceNumber, - CDC_NCM_COMM_ALTSETTING_MBIM)) - return CDC_NCM_DATA_ALTSETTING_MBIM; + if (alt && cdc_ncm_comm_intf_is_mbim(alt)) + return CDC_NCM_COMM_ALTSETTING_MBIM; } - return CDC_NCM_DATA_ALTSETTING_NCM; + return CDC_NCM_COMM_ALTSETTING_NCM; } EXPORT_SYMBOL_GPL(cdc_ncm_select_altsetting); @@ -580,12 +580,11 @@ static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf) int ret; /* MBIM backwards compatible function? */ - cdc_ncm_select_altsetting(dev, intf); - if (cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting)) + if (cdc_ncm_select_altsetting(intf) != CDC_NCM_COMM_ALTSETTING_NCM) return -ENODEV; - /* NCM data altsetting is always 1 */ - ret = cdc_ncm_bind_common(dev, intf, 1); + /* The NCM data altsetting is fixed */ + ret = cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM); /* * We should get an event when network connection is "connected" or diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 44b38b92236a..55b6feead93b 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -121,7 +121,7 @@ struct cdc_ncm_ctx { u16 connected; }; -u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf); +u8 cdc_ncm_select_altsetting(struct usb_interface *intf); int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting); void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf); struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign); -- cgit v1.2.3-71-gd317 From 5b7ed0892f2af4e60b9a8d2c71c77774512a6cb9 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Sun, 11 May 2014 20:22:09 -0700 Subject: tcp: move fastopen functions to tcp_fastopen.c Move common TFO functions that will be used by both v4 and v6 to tcp_fastopen.c. Create a helper tcp_fastopen_queue_check(). Signed-off-by: Yuchung Cheng Signed-off-by: Daniel Lee Signed-off-by: Jerry Chu Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 10 ++- net/ipv4/tcp_fastopen.c | 190 ++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 185 +--------------------------------------------- 3 files changed, 200 insertions(+), 185 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 3c9418456640..012236838583 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1329,8 +1329,14 @@ void tcp_free_fastopen_req(struct tcp_sock *tp); extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; int tcp_fastopen_reset_cipher(void *key, unsigned int len); -void tcp_fastopen_cookie_gen(__be32 src, __be32 dst, - struct tcp_fastopen_cookie *foc); +int tcp_fastopen_create_child(struct sock *sk, + struct sk_buff *skb, + struct sk_buff *skb_synack, + struct request_sock *req); +bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct tcp_fastopen_cookie *foc, + struct tcp_fastopen_cookie *valid_foc); void tcp_fastopen_init_key_once(bool publish); #define TCP_FASTOPEN_KEY_LENGTH 16 diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index f195d9316e55..0606c91d9d0b 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -94,3 +94,193 @@ void tcp_fastopen_cookie_gen(__be32 src, __be32 dst, } rcu_read_unlock(); } + +int tcp_fastopen_create_child(struct sock *sk, + struct sk_buff *skb, + struct sk_buff *skb_synack, + struct request_sock *req) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; + const struct inet_request_sock *ireq = inet_rsk(req); + struct sock *child; + int err; + + req->num_retrans = 0; + req->num_timeout = 0; + req->sk = NULL; + + child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); + if (child == NULL) { + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPFASTOPENPASSIVEFAIL); + kfree_skb(skb_synack); + return -1; + } + err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, ireq->opt); + err = net_xmit_eval(err); + if (!err) + tcp_rsk(req)->snt_synack = tcp_time_stamp; + /* XXX (TFO) - is it ok to ignore error and continue? */ + + spin_lock(&queue->fastopenq->lock); + queue->fastopenq->qlen++; + spin_unlock(&queue->fastopenq->lock); + + /* Initialize the child socket. Have to fix some values to take + * into account the child is a Fast Open socket and is created + * only out of the bits carried in the SYN packet. + */ + tp = tcp_sk(child); + + tp->fastopen_rsk = req; + /* Do a hold on the listner sk so that if the listener is being + * closed, the child that has been accepted can live on and still + * access listen_lock. + */ + sock_hold(sk); + tcp_rsk(req)->listener = sk; + + /* RFC1323: The window in SYN & SYN/ACK segments is never + * scaled. So correct it appropriately. + */ + tp->snd_wnd = ntohs(tcp_hdr(skb)->window); + + /* Activate the retrans timer so that SYNACK can be retransmitted. + * The request socket is not added to the SYN table of the parent + * because it's been added to the accept queue directly. + */ + inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, + TCP_TIMEOUT_INIT, TCP_RTO_MAX); + + /* Add the child socket directly into the accept queue */ + inet_csk_reqsk_queue_add(sk, req, child); + + /* Now finish processing the fastopen child socket. */ + inet_csk(child)->icsk_af_ops->rebuild_header(child); + tcp_init_congestion_control(child); + tcp_mtup_init(child); + tcp_init_metrics(child); + tcp_init_buffer_space(child); + + /* Queue the data carried in the SYN packet. We need to first + * bump skb's refcnt because the caller will attempt to free it. + * + * XXX (TFO) - we honor a zero-payload TFO request for now. + * (Any reason not to?) + */ + if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) { + /* Don't queue the skb if there is no payload in SYN. + * XXX (TFO) - How about SYN+FIN? + */ + tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + } else { + skb = skb_get(skb); + skb_dst_drop(skb); + __skb_pull(skb, tcp_hdr(skb)->doff * 4); + skb_set_owner_r(skb, child); + __skb_queue_tail(&child->sk_receive_queue, skb); + tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + tp->syn_data_acked = 1; + } + sk->sk_data_ready(sk); + bh_unlock_sock(child); + sock_put(child); + WARN_ON(req->sk == NULL); + return 0; +} +EXPORT_SYMBOL(tcp_fastopen_create_child); + +static bool tcp_fastopen_queue_check(struct sock *sk) +{ + struct fastopen_queue *fastopenq; + + /* Make sure the listener has enabled fastopen, and we don't + * exceed the max # of pending TFO requests allowed before trying + * to validating the cookie in order to avoid burning CPU cycles + * unnecessarily. + * + * XXX (TFO) - The implication of checking the max_qlen before + * processing a cookie request is that clients can't differentiate + * between qlen overflow causing Fast Open to be disabled + * temporarily vs a server not supporting Fast Open at all. + */ + fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq; + if (fastopenq == NULL || fastopenq->max_qlen == 0) + return false; + + if (fastopenq->qlen >= fastopenq->max_qlen) { + struct request_sock *req1; + spin_lock(&fastopenq->lock); + req1 = fastopenq->rskq_rst_head; + if ((req1 == NULL) || time_after(req1->expires, jiffies)) { + spin_unlock(&fastopenq->lock); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); + return false; + } + fastopenq->rskq_rst_head = req1->dl_next; + fastopenq->qlen--; + spin_unlock(&fastopenq->lock); + reqsk_free(req1); + } + return true; +} + +bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct tcp_fastopen_cookie *foc, + struct tcp_fastopen_cookie *valid_foc) +{ + bool skip_cookie = false; + + if (likely(!fastopen_cookie_present(foc))) { + /* See include/net/tcp.h for the meaning of these knobs */ + if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) || + ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) && + (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1))) + skip_cookie = true; /* no cookie to validate */ + else + return false; + } + /* A FO option is present; bump the counter. */ + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE); + + if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 || + !tcp_fastopen_queue_check(sk)) + return false; + + if (skip_cookie) { + tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + return true; + } + + if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) { + if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) { + tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, valid_foc); + if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) || + memcmp(&foc->val[0], &valid_foc->val[0], + TCP_FASTOPEN_COOKIE_SIZE) != 0) + return false; + valid_foc->len = -1; + } + /* Acknowledge the data received from the peer. */ + tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + return true; + } else if (foc->len == 0) { /* Client requesting a cookie */ + tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, valid_foc); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPFASTOPENCOOKIEREQD); + } else { + /* Client sent a cookie with wrong size. Treat it + * the same as invalid and return a valid one. + */ + tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, valid_foc); + } + return false; +} +EXPORT_SYMBOL(tcp_fastopen_check); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ad166dcc278f..032fcaee164a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1260,187 +1260,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { }; #endif -static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct tcp_fastopen_cookie *foc, - struct tcp_fastopen_cookie *valid_foc) -{ - bool skip_cookie = false; - struct fastopen_queue *fastopenq; - - if (likely(!fastopen_cookie_present(foc))) { - /* See include/net/tcp.h for the meaning of these knobs */ - if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) || - ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) && - (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1))) - skip_cookie = true; /* no cookie to validate */ - else - return false; - } - fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq; - /* A FO option is present; bump the counter. */ - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE); - - /* Make sure the listener has enabled fastopen, and we don't - * exceed the max # of pending TFO requests allowed before trying - * to validating the cookie in order to avoid burning CPU cycles - * unnecessarily. - * - * XXX (TFO) - The implication of checking the max_qlen before - * processing a cookie request is that clients can't differentiate - * between qlen overflow causing Fast Open to be disabled - * temporarily vs a server not supporting Fast Open at all. - */ - if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 || - fastopenq == NULL || fastopenq->max_qlen == 0) - return false; - - if (fastopenq->qlen >= fastopenq->max_qlen) { - struct request_sock *req1; - spin_lock(&fastopenq->lock); - req1 = fastopenq->rskq_rst_head; - if ((req1 == NULL) || time_after(req1->expires, jiffies)) { - spin_unlock(&fastopenq->lock); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); - /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/ - foc->len = -1; - return false; - } - fastopenq->rskq_rst_head = req1->dl_next; - fastopenq->qlen--; - spin_unlock(&fastopenq->lock); - reqsk_free(req1); - } - if (skip_cookie) { - tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - return true; - } - - if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) { - if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) { - tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, valid_foc); - if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) || - memcmp(&foc->val[0], &valid_foc->val[0], - TCP_FASTOPEN_COOKIE_SIZE) != 0) - return false; - valid_foc->len = -1; - } - /* Acknowledge the data received from the peer. */ - tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - return true; - } else if (foc->len == 0) { /* Client requesting a cookie */ - tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, valid_foc); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENCOOKIEREQD); - } else { - /* Client sent a cookie with wrong size. Treat it - * the same as invalid and return a valid one. - */ - tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, valid_foc); - } - return false; -} - -static int tcp_v4_conn_req_fastopen(struct sock *sk, - struct sk_buff *skb, - struct sk_buff *skb_synack, - struct request_sock *req) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; - const struct inet_request_sock *ireq = inet_rsk(req); - struct sock *child; - int err; - - req->num_retrans = 0; - req->num_timeout = 0; - req->sk = NULL; - - child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); - if (child == NULL) { - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENPASSIVEFAIL); - kfree_skb(skb_synack); - return -1; - } - err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr, - ireq->ir_rmt_addr, ireq->opt); - err = net_xmit_eval(err); - if (!err) - tcp_rsk(req)->snt_synack = tcp_time_stamp; - /* XXX (TFO) - is it ok to ignore error and continue? */ - - spin_lock(&queue->fastopenq->lock); - queue->fastopenq->qlen++; - spin_unlock(&queue->fastopenq->lock); - - /* Initialize the child socket. Have to fix some values to take - * into account the child is a Fast Open socket and is created - * only out of the bits carried in the SYN packet. - */ - tp = tcp_sk(child); - - tp->fastopen_rsk = req; - /* Do a hold on the listner sk so that if the listener is being - * closed, the child that has been accepted can live on and still - * access listen_lock. - */ - sock_hold(sk); - tcp_rsk(req)->listener = sk; - - /* RFC1323: The window in SYN & SYN/ACK segments is never - * scaled. So correct it appropriately. - */ - tp->snd_wnd = ntohs(tcp_hdr(skb)->window); - - /* Activate the retrans timer so that SYNACK can be retransmitted. - * The request socket is not added to the SYN table of the parent - * because it's been added to the accept queue directly. - */ - inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, - TCP_TIMEOUT_INIT, TCP_RTO_MAX); - - /* Add the child socket directly into the accept queue */ - inet_csk_reqsk_queue_add(sk, req, child); - - /* Now finish processing the fastopen child socket. */ - inet_csk(child)->icsk_af_ops->rebuild_header(child); - tcp_init_congestion_control(child); - tcp_mtup_init(child); - tcp_init_metrics(child); - tcp_init_buffer_space(child); - - /* Queue the data carried in the SYN packet. We need to first - * bump skb's refcnt because the caller will attempt to free it. - * - * XXX (TFO) - we honor a zero-payload TFO request for now. - * (Any reason not to?) - */ - if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) { - /* Don't queue the skb if there is no payload in SYN. - * XXX (TFO) - How about SYN+FIN? - */ - tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - } else { - skb = skb_get(skb); - skb_dst_drop(skb); - __skb_pull(skb, tcp_hdr(skb)->doff * 4); - skb_set_owner_r(skb, child); - __skb_queue_tail(&child->sk_receive_queue, skb); - tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - tp->syn_data_acked = 1; - } - sk->sk_data_ready(sk); - bh_unlock_sock(child); - sock_put(child); - WARN_ON(req->sk == NULL); - return 0; -} - int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { struct tcp_options_received tmp_opt; @@ -1599,8 +1418,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (fastopen_cookie_present(&foc) && foc.len != 0) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); - } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req)) - goto drop_and_free; + } else if (tcp_fastopen_create_child(sk, skb, skb_synack, req)) + goto drop_and_release; return 0; -- cgit v1.2.3-71-gd317 From 89278c9dc922272df921042aafa18311f3398c6c Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Sun, 11 May 2014 20:22:10 -0700 Subject: tcp: simplify fast open cookie processing Consolidate various cookie checking and generation code to simplify the fast open processing. The main goal is to reduce code duplication in tcp_v4_conn_request() for IPv6 support. Removes two experimental sysctl flags TFO_SERVER_ALWAYS and TFO_SERVER_COOKIE_NOT_CHKD used primarily for developmental debugging purposes. Signed-off-by: Yuchung Cheng Signed-off-by: Daniel Lee Signed-off-by: Jerry Chu Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 5 ---- include/net/tcp.h | 9 +------ net/ipv4/tcp_fastopen.c | 71 +++++++++++++++++++------------------------------ net/ipv4/tcp_ipv4.c | 10 +++---- net/ipv4/tcp_output.c | 2 +- 5 files changed, 33 insertions(+), 64 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 4e37c71ecd74..bc35e4709e8e 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -366,11 +366,6 @@ static inline bool tcp_passive_fastopen(const struct sock *sk) tcp_sk(sk)->fastopen_rsk != NULL); } -static inline bool fastopen_cookie_present(struct tcp_fastopen_cookie *foc) -{ - return foc->len != -1; -} - extern void tcp_sock_destruct(struct sock *sk); static inline int fastopen_init_queue(struct sock *sk, int backlog) diff --git a/include/net/tcp.h b/include/net/tcp.h index 012236838583..17d7c6a3d037 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -220,8 +220,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TFO_SERVER_ENABLE 2 #define TFO_CLIENT_NO_COOKIE 4 /* Data in SYN w/o cookie option */ -/* Process SYN data but skip cookie validation */ -#define TFO_SERVER_COOKIE_NOT_CHKED 0x100 /* Accept SYN data w/o any cookie option */ #define TFO_SERVER_COOKIE_NOT_REQD 0x200 @@ -230,10 +228,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); */ #define TFO_SERVER_WO_SOCKOPT1 0x400 #define TFO_SERVER_WO_SOCKOPT2 0x800 -/* Always create TFO child sockets on a TFO listener even when - * cookie/data not present. (For testing purpose!) - */ -#define TFO_SERVER_ALWAYS 0x1000 extern struct inet_timewait_death_row tcp_death_row; @@ -1335,8 +1329,7 @@ int tcp_fastopen_create_child(struct sock *sk, struct request_sock *req); bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct tcp_fastopen_cookie *foc, - struct tcp_fastopen_cookie *valid_foc); + struct tcp_fastopen_cookie *foc); void tcp_fastopen_init_key_once(bool publish); #define TCP_FASTOPEN_KEY_LENGTH 16 diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 0606c91d9d0b..5a98277b9a82 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -228,59 +228,44 @@ static bool tcp_fastopen_queue_check(struct sock *sk) return true; } +/* Returns true if we should perform Fast Open on the SYN. The cookie (foc) + * may be updated and return the client in the SYN-ACK later. E.g., Fast Open + * cookie request (foc->len == 0). + */ bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct tcp_fastopen_cookie *foc, - struct tcp_fastopen_cookie *valid_foc) + struct tcp_fastopen_cookie *foc) { - bool skip_cookie = false; - - if (likely(!fastopen_cookie_present(foc))) { - /* See include/net/tcp.h for the meaning of these knobs */ - if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) || - ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) && - (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1))) - skip_cookie = true; /* no cookie to validate */ - else - return false; - } - /* A FO option is present; bump the counter. */ - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE); + struct tcp_fastopen_cookie valid_foc = { .len = -1 }; + bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; - if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 || - !tcp_fastopen_queue_check(sk)) + if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && + (syn_data || foc->len >= 0) && + tcp_fastopen_queue_check(sk))) { + foc->len = -1; return false; - - if (skip_cookie) { - tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - return true; } - if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) { - if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) { - tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, valid_foc); - if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) || - memcmp(&foc->val[0], &valid_foc->val[0], - TCP_FASTOPEN_COOKIE_SIZE) != 0) - return false; - valid_foc->len = -1; - } - /* Acknowledge the data received from the peer. */ + if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD)) + goto fastopen; + + tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, &valid_foc); + + if (foc->len == TCP_FASTOPEN_COOKIE_SIZE && + foc->len == valid_foc.len && + !memcmp(foc->val, valid_foc.val, foc->len)) { +fastopen: tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + foc->len = -1; + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE); return true; - } else if (foc->len == 0) { /* Client requesting a cookie */ - tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, valid_foc); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENCOOKIEREQD); - } else { - /* Client sent a cookie with wrong size. Treat it - * the same as invalid and return a valid one. - */ - tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, valid_foc); } + + NET_INC_STATS_BH(sock_net(sk), foc->len ? + LINUX_MIB_TCPFASTOPENPASSIVEFAIL : + LINUX_MIB_TCPFASTOPENCOOKIEREQD); + *foc = valid_foc; return false; } EXPORT_SYMBOL(tcp_fastopen_check); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 032fcaee164a..5ea0949dadfd 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1273,7 +1273,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) bool want_cookie = false; struct flowi4 fl4; struct tcp_fastopen_cookie foc = { .len = -1 }; - struct tcp_fastopen_cookie valid_foc = { .len = -1 }; struct sk_buff *skb_synack; int do_fastopen; @@ -1381,7 +1380,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (dst == NULL) goto drop_and_free; } - do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc); + do_fastopen = !want_cookie && + tcp_fastopen_check(sk, skb, req, &foc); /* We don't call tcp_v4_send_synack() directly because we need * to make sure a child socket can be created successfully before @@ -1394,8 +1394,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * latter to remove its dependency on the current implementation * of tcp_v4_send_synack()->tcp_select_initial_window(). */ - skb_synack = tcp_make_synack(sk, dst, req, - fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL); + skb_synack = tcp_make_synack(sk, dst, req, &foc); if (skb_synack) { __tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr); @@ -1415,9 +1414,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_rsk(req)->listener = NULL; /* Add the request_sock to the SYN table */ inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); - if (fastopen_cookie_present(&foc) && foc.len != 0) - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENPASSIVEFAIL); } else if (tcp_fastopen_create_child(sk, skb, skb_synack, req)) goto drop_and_release; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 694711a140d4..b20fc02920f9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -627,7 +627,7 @@ static unsigned int tcp_synack_options(struct sock *sk, if (unlikely(!ireq->tstamp_ok)) remaining -= TCPOLEN_SACKPERM_ALIGNED; } - if (foc != NULL) { + if (foc != NULL && foc->len >= 0) { u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; need = (need + 3) & ~3U; /* Align to 32 bits */ if (remaining >= need) { -- cgit v1.2.3-71-gd317 From 843f4a55e336e6d0c7bb92e7f9621535bc8d5fcd Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Sun, 11 May 2014 20:22:11 -0700 Subject: tcp: use tcp_v4_send_synack on first SYN-ACK To avoid large code duplication in IPv6, we need to first simplify the complicate SYN-ACK sending code in tcp_v4_conn_request(). To use tcp_v4(6)_send_synack() to send all SYN-ACKs, we need to initialize the mini socket's receive window before trying to create the child socket and/or building the SYN-ACK packet. So we move that initialization from tcp_make_synack() to tcp_v4_conn_request() as a new function tcp_openreq_init_req_rwin(). After this refactoring the SYN-ACK sending code is simpler and easier to implement Fast Open for IPv6. Signed-off-by: Yuchung Cheng Signed-off-by: Daniel Lee Signed-off-by: Jerry Chu Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 14 +++++----- net/ipv4/tcp_fastopen.c | 67 ++++++++++++++++++++++-------------------------- net/ipv4/tcp_ipv4.c | 57 ++++++++++++---------------------------- net/ipv4/tcp_minisocks.c | 31 ++++++++++++++++++++++ net/ipv4/tcp_output.c | 21 --------------- 5 files changed, 85 insertions(+), 105 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 17d7c6a3d037..f5d6ca4a9d28 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1114,6 +1114,9 @@ static inline void tcp_openreq_init(struct request_sock *req, ireq->ir_num = ntohs(tcp_hdr(skb)->dest); } +extern void tcp_openreq_init_rwin(struct request_sock *req, + struct sock *sk, struct dst_entry *dst); + void tcp_enter_memory_pressure(struct sock *sk); static inline int keepalive_intvl_when(const struct tcp_sock *tp) @@ -1323,13 +1326,10 @@ void tcp_free_fastopen_req(struct tcp_sock *tp); extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; int tcp_fastopen_reset_cipher(void *key, unsigned int len); -int tcp_fastopen_create_child(struct sock *sk, - struct sk_buff *skb, - struct sk_buff *skb_synack, - struct request_sock *req); -bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct tcp_fastopen_cookie *foc); +bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct tcp_fastopen_cookie *foc, + struct dst_entry *dst); void tcp_fastopen_init_key_once(bool publish); #define TCP_FASTOPEN_KEY_LENGTH 16 diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 5a98277b9a82..9b947a9aaf6e 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -95,34 +95,22 @@ void tcp_fastopen_cookie_gen(__be32 src, __be32 dst, rcu_read_unlock(); } -int tcp_fastopen_create_child(struct sock *sk, - struct sk_buff *skb, - struct sk_buff *skb_synack, - struct request_sock *req) +static bool tcp_fastopen_create_child(struct sock *sk, + struct sk_buff *skb, + struct dst_entry *dst, + struct request_sock *req) { struct tcp_sock *tp = tcp_sk(sk); struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; - const struct inet_request_sock *ireq = inet_rsk(req); struct sock *child; - int err; req->num_retrans = 0; req->num_timeout = 0; req->sk = NULL; child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); - if (child == NULL) { - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENPASSIVEFAIL); - kfree_skb(skb_synack); - return -1; - } - err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr, - ireq->ir_rmt_addr, ireq->opt); - err = net_xmit_eval(err); - if (!err) - tcp_rsk(req)->snt_synack = tcp_time_stamp; - /* XXX (TFO) - is it ok to ignore error and continue? */ + if (child == NULL) + return false; spin_lock(&queue->fastopenq->lock); queue->fastopenq->qlen++; @@ -167,28 +155,24 @@ int tcp_fastopen_create_child(struct sock *sk, /* Queue the data carried in the SYN packet. We need to first * bump skb's refcnt because the caller will attempt to free it. * - * XXX (TFO) - we honor a zero-payload TFO request for now. - * (Any reason not to?) + * XXX (TFO) - we honor a zero-payload TFO request for now, + * (any reason not to?) but no need to queue the skb since + * there is no data. How about SYN+FIN? */ - if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) { - /* Don't queue the skb if there is no payload in SYN. - * XXX (TFO) - How about SYN+FIN? - */ - tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - } else { + if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1) { skb = skb_get(skb); skb_dst_drop(skb); __skb_pull(skb, tcp_hdr(skb)->doff * 4); skb_set_owner_r(skb, child); __skb_queue_tail(&child->sk_receive_queue, skb); - tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; tp->syn_data_acked = 1; } + tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; sk->sk_data_ready(sk); bh_unlock_sock(child); sock_put(child); WARN_ON(req->sk == NULL); - return 0; + return true; } EXPORT_SYMBOL(tcp_fastopen_create_child); @@ -232,9 +216,10 @@ static bool tcp_fastopen_queue_check(struct sock *sk) * may be updated and return the client in the SYN-ACK later. E.g., Fast Open * cookie request (foc->len == 0). */ -bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct tcp_fastopen_cookie *foc) +bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct tcp_fastopen_cookie *foc, + struct dst_entry *dst) { struct tcp_fastopen_cookie valid_foc = { .len = -1 }; bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; @@ -255,11 +240,21 @@ bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, if (foc->len == TCP_FASTOPEN_COOKIE_SIZE && foc->len == valid_foc.len && !memcmp(foc->val, valid_foc.val, foc->len)) { + /* Cookie is valid. Create a (full) child socket to accept + * the data in SYN before returning a SYN-ACK to ack the + * data. If we fail to create the socket, fall back and + * ack the ISN only but includes the same cookie. + * + * Note: Data-less SYN with valid cookie is allowed to send + * data in SYN_RECV state. + */ fastopen: - tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - foc->len = -1; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE); - return true; + if (tcp_fastopen_create_child(sk, skb, dst, req)) { + foc->len = -1; + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPFASTOPENPASSIVE); + return true; + } } NET_INC_STATS_BH(sock_net(sk), foc->len ? @@ -268,4 +263,4 @@ fastopen: *foc = valid_foc; return false; } -EXPORT_SYMBOL(tcp_fastopen_check); +EXPORT_SYMBOL(tcp_try_fastopen); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5ea0949dadfd..1665f0f84233 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -822,7 +822,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, */ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, - u16 queue_mapping) + u16 queue_mapping, + struct tcp_fastopen_cookie *foc) { const struct inet_request_sock *ireq = inet_rsk(req); struct flowi4 fl4; @@ -833,7 +834,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) return -1; - skb = tcp_make_synack(sk, dst, req, NULL); + skb = tcp_make_synack(sk, dst, req, foc); if (skb) { __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); @@ -852,7 +853,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) { - int res = tcp_v4_send_synack(sk, NULL, req, 0); + int res = tcp_v4_send_synack(sk, NULL, req, 0, NULL); if (!res) { TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); @@ -1270,11 +1271,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) __be32 saddr = ip_hdr(skb)->saddr; __be32 daddr = ip_hdr(skb)->daddr; __u32 isn = TCP_SKB_CB(skb)->when; - bool want_cookie = false; + bool want_cookie = false, fastopen; struct flowi4 fl4; struct tcp_fastopen_cookie foc = { .len = -1 }; - struct sk_buff *skb_synack; - int do_fastopen; + int err; /* Never answer to SYNs send to broadcast or multicast */ if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) @@ -1373,49 +1373,24 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) isn = tcp_v4_init_sequence(skb); } - tcp_rsk(req)->snt_isn = isn; - - if (dst == NULL) { - dst = inet_csk_route_req(sk, &fl4, req); - if (dst == NULL) - goto drop_and_free; - } - do_fastopen = !want_cookie && - tcp_fastopen_check(sk, skb, req, &foc); - - /* We don't call tcp_v4_send_synack() directly because we need - * to make sure a child socket can be created successfully before - * sending back synack! - * - * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack() - * (or better yet, call tcp_send_synack() in the child context - * directly, but will have to fix bunch of other code first) - * after syn_recv_sock() except one will need to first fix the - * latter to remove its dependency on the current implementation - * of tcp_v4_send_synack()->tcp_select_initial_window(). - */ - skb_synack = tcp_make_synack(sk, dst, req, &foc); - - if (skb_synack) { - __tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr); - skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb)); - } else + if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) goto drop_and_free; - if (likely(!do_fastopen)) { - int err; - err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr, - ireq->ir_rmt_addr, ireq->opt); - err = net_xmit_eval(err); + tcp_rsk(req)->snt_isn = isn; + tcp_rsk(req)->snt_synack = tcp_time_stamp; + tcp_openreq_init_rwin(req, sk, dst); + fastopen = !want_cookie && + tcp_try_fastopen(sk, skb, req, &foc, dst); + err = tcp_v4_send_synack(sk, dst, req, + skb_get_queue_mapping(skb), &foc); + if (!fastopen) { if (err || want_cookie) goto drop_and_free; tcp_rsk(req)->snt_synack = tcp_time_stamp; tcp_rsk(req)->listener = NULL; - /* Add the request_sock to the SYN table */ inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); - } else if (tcp_fastopen_create_child(sk, skb, skb_synack, req)) - goto drop_and_release; + } return 0; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 05c1b155251d..e68e0d4af6c9 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -362,6 +362,37 @@ void tcp_twsk_destructor(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_twsk_destructor); +void tcp_openreq_init_rwin(struct request_sock *req, + struct sock *sk, struct dst_entry *dst) +{ + struct inet_request_sock *ireq = inet_rsk(req); + struct tcp_sock *tp = tcp_sk(sk); + __u8 rcv_wscale; + int mss = dst_metric_advmss(dst); + + if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) + mss = tp->rx_opt.user_mss; + + /* Set this up on the first call only */ + req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); + + /* limit the window selection if the user enforce a smaller rx buffer */ + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && + (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0)) + req->window_clamp = tcp_full_space(sk); + + /* tcp_full_space because it is guaranteed to be the first packet */ + tcp_select_initial_window(tcp_full_space(sk), + mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), + &req->rcv_wnd, + &req->window_clamp, + ireq->wscale_ok, + &rcv_wscale, + dst_metric(dst, RTAX_INITRWND)); + ireq->rcv_wscale = rcv_wscale; +} +EXPORT_SYMBOL(tcp_openreq_init_rwin); + static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, struct request_sock *req) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b20fc02920f9..3d61c52bdf79 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2803,27 +2803,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) mss = tp->rx_opt.user_mss; - if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ - __u8 rcv_wscale; - /* Set this up on the first call only */ - req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); - - /* limit the window selection if the user enforce a smaller rx buffer */ - if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && - (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0)) - req->window_clamp = tcp_full_space(sk); - - /* tcp_full_space because it is guaranteed to be the first packet */ - tcp_select_initial_window(tcp_full_space(sk), - mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), - &req->rcv_wnd, - &req->window_clamp, - ireq->wscale_ok, - &rcv_wscale, - dst_metric(dst, RTAX_INITRWND)); - ireq->rcv_wscale = rcv_wscale; - } - memset(&opts, 0, sizeof(opts)); #ifdef CONFIG_SYN_COOKIES if (unlikely(req->cookie_ts)) -- cgit v1.2.3-71-gd317 From e110861f86094cd78cc85593b873970092deb43a Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Tue, 13 May 2014 10:17:33 -0700 Subject: net: add a sysctl to reflect the fwmark on replies Kernel-originated IP packets that have no user socket associated with them (e.g., ICMP errors and echo replies, TCP RSTs, etc.) are emitted with a mark of zero. Add a sysctl to make them have the same mark as the packet they are replying to. This allows an administrator that wishes to do so to use mark-based routing, firewalling, etc. for these replies by marking the original packets inbound. Tested using user-mode linux: - ICMP/ICMPv6 echo replies and errors. - TCP RST packets (IPv4 and IPv6). Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- include/net/ip.h | 3 +++ include/net/ipv6.h | 3 +++ include/net/netns/ipv4.h | 2 ++ include/net/netns/ipv6.h | 1 + net/ipv4/icmp.c | 11 +++++++++-- net/ipv4/ip_output.c | 3 ++- net/ipv4/sysctl_net_ipv4.c | 7 +++++++ net/ipv6/icmp.c | 6 ++++++ net/ipv6/sysctl_net_ipv6.c | 7 +++++++ net/ipv6/tcp_ipv6.c | 1 + 10 files changed, 41 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 55752985c144..14c50a1650ef 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -231,6 +231,9 @@ void ipfrag_init(void); void ip_static_sysctl_init(void); +#define IP4_REPLY_MARK(net, mark) \ + ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0) + static inline bool ip_is_fragment(const struct iphdr *iph) { return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 5b40ad297b8c..ba810d0546bc 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -113,6 +113,9 @@ struct frag_hdr { #define IP6_MF 0x0001 #define IP6_OFFSET 0xFFF8 +#define IP6_REPLY_MARK(net, mark) \ + ((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0) + #include /* sysctls */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index b2704fd0ec80..a32fc4d705da 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -77,6 +77,8 @@ struct netns_ipv4 { int sysctl_ip_no_pmtu_disc; int sysctl_ip_fwd_use_pmtu; + int sysctl_fwmark_reflect; + struct ping_group_range ping_group_range; atomic_t dev_addr_genid; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 21edaf1f7916..19d3446e59d2 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -30,6 +30,7 @@ struct netns_sysctl_ipv6 { int flowlabel_consistency; int icmpv6_time; int anycast_src_echo_reply; + int fwmark_reflect; }; struct netns_ipv6 { diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index fe52666dc43c..79c3d947a481 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -337,6 +337,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct sock *sk; struct inet_sock *inet; __be32 daddr, saddr; + u32 mark = IP4_REPLY_MARK(net, skb->mark); if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) return; @@ -349,6 +350,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) icmp_param->data.icmph.checksum = 0; inet->tos = ip_hdr(skb)->tos; + sk->sk_mark = mark; daddr = ipc.addr = ip_hdr(skb)->saddr; saddr = fib_compute_spec_dst(skb); ipc.opt = NULL; @@ -364,6 +366,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; fl4.saddr = saddr; + fl4.flowi4_mark = mark; fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); @@ -382,7 +385,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, struct sk_buff *skb_in, const struct iphdr *iph, - __be32 saddr, u8 tos, + __be32 saddr, u8 tos, u32 mark, int type, int code, struct icmp_bxm *param) { @@ -394,6 +397,7 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->daddr = (param->replyopts.opt.opt.srr ? param->replyopts.opt.opt.faddr : iph->saddr); fl4->saddr = saddr; + fl4->flowi4_mark = mark; fl4->flowi4_tos = RT_TOS(tos); fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; @@ -491,6 +495,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) struct flowi4 fl4; __be32 saddr; u8 tos; + u32 mark; struct net *net; struct sock *sk; @@ -592,6 +597,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL) : iph->tos; + mark = IP4_REPLY_MARK(net, skb_in->mark); if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in)) goto out_unlock; @@ -608,13 +614,14 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) icmp_param->skb = skb_in; icmp_param->offset = skb_network_offset(skb_in); inet_sk(sk)->tos = tos; + sk->sk_mark = mark; ipc.addr = iph->saddr; ipc.opt = &icmp_param->replyopts.opt; ipc.tx_flags = 0; ipc.ttl = 0; ipc.tos = -1; - rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, + rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark, type, code, icmp_param); if (IS_ERR(rt)) goto out_unlock; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 6aa4380fde1a..6e231ab58d65 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1546,7 +1546,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, daddr = replyopts.opt.opt.faddr; } - flowi4_init_output(&fl4, arg->bound_dev_if, 0, + flowi4_init_output(&fl4, arg->bound_dev_if, + IP4_REPLY_MARK(net, skb->mark), RT_TOS(arg->tos), RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, ip_reply_arg_flowi_flags(arg), diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5cde8f263d40..f50d51850285 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -838,6 +838,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "fwmark_reflect", + .data = &init_net.ipv4.sysctl_fwmark_reflect, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 8d3952796d39..f6c84a6eb238 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -400,6 +400,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) int len; int hlimit; int err = 0; + u32 mark = IP6_REPLY_MARK(net, skb->mark); if ((u8 *)hdr < skb->head || (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) @@ -466,6 +467,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) fl6.daddr = hdr->saddr; if (saddr) fl6.saddr = *saddr; + fl6.flowi6_mark = mark; fl6.flowi6_oif = iif; fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; @@ -474,6 +476,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) sk = icmpv6_xmit_lock(net); if (sk == NULL) return; + sk->sk_mark = mark; np = inet6_sk(sk); if (!icmpv6_xrlim_allow(sk, type, &fl6)) @@ -551,6 +554,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) int err = 0; int hlimit; u8 tclass; + u32 mark = IP6_REPLY_MARK(net, skb->mark); saddr = &ipv6_hdr(skb)->daddr; @@ -569,11 +573,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl6.saddr = *saddr; fl6.flowi6_oif = skb->dev->ifindex; fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; + fl6.flowi6_mark = mark; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); if (sk == NULL) return; + sk->sk_mark = mark; np = inet6_sk(sk); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 7f405a168822..058f3eca2e53 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -38,6 +38,13 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "fwmark_reflect", + .data = &init_net.ipv6.sysctl.fwmark_reflect, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3a267bf14f2f..c54976a44425 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -812,6 +812,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, fl6.flowi6_oif = inet6_iif(skb); else fl6.flowi6_oif = oif; + fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); -- cgit v1.2.3-71-gd317 From 84f39b08d7868ce10eeaf640627cb89777f0ae93 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Tue, 13 May 2014 10:17:35 -0700 Subject: net: support marking accepting TCP sockets When using mark-based routing, sockets returned from accept() may need to be marked differently depending on the incoming connection request. This is the case, for example, if different socket marks identify different networks: a listening socket may want to accept connections from all networks, but each connection should be marked with the network that the request came in on, so that subsequent packets are sent on the correct network. This patch adds a sysctl to mark TCP sockets based on the fwmark of the incoming SYN packet. If enabled, and an unmarked socket receives a SYN, then the SYN packet's fwmark is written to the connection's inet_request_sock, and later written back to the accepted socket when the connection is established. If the socket already has a nonzero mark, then the behaviour is the same as it is today, i.e., the listening socket's fwmark is used. Black-box tested using user-mode linux: - IPv4/IPv6 SYN+ACK, FIN, etc. packets are routed based on the mark of the incoming SYN packet. - The socket returned by accept() is marked with the mark of the incoming SYN packet. - Tested with syncookies=1 and syncookies=2. Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- include/net/inet_sock.h | 10 ++++++++++ include/net/netns/ipv4.h | 1 + net/ipv4/inet_connection_sock.c | 6 ++++-- net/ipv4/syncookies.c | 3 ++- net/ipv4/sysctl_net_ipv4.c | 7 +++++++ net/ipv4/tcp_ipv4.c | 1 + net/ipv6/inet6_connection_sock.c | 2 +- net/ipv6/syncookies.c | 4 +++- net/ipv6/tcp_ipv6.c | 1 + 9 files changed, 30 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 1833c3f389ee..b1edf17bec01 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -90,6 +90,7 @@ struct inet_request_sock { kmemcheck_bitfield_end(flags); struct ip_options_rcu *opt; struct sk_buff *pktopts; + u32 ir_mark; }; static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) @@ -97,6 +98,15 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) return (struct inet_request_sock *)sk; } +static inline u32 inet_request_mark(struct sock *sk, struct sk_buff *skb) +{ + if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) { + return skb->mark; + } else { + return sk->sk_mark; + } +} + struct inet_cork { unsigned int flags; __be32 addr; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index a32fc4d705da..2f0cfad66666 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -78,6 +78,7 @@ struct netns_ipv4 { int sysctl_ip_fwd_use_pmtu; int sysctl_fwmark_reflect; + int sysctl_tcp_fwmark_accept; struct ping_group_range ping_group_range; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index a56b8e6e866a..12e502cbfdc7 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -408,7 +408,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct net *net = sock_net(sk); int flags = inet_sk_flowi_flags(sk); - flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, + flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, flags, @@ -445,7 +445,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, rcu_read_lock(); opt = rcu_dereference(newinet->inet_opt); - flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, + flowi4_init_output(fl4, sk->sk_bound_dev_if, inet_rsk(req)->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, @@ -680,6 +680,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num); newsk->sk_write_space = sk_stream_write_space; + newsk->sk_mark = inet_rsk(req)->ir_mark; + newicsk->icsk_retransmits = 0; newicsk->icsk_backoff = 0; newicsk->icsk_probes_out = 0; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index f2ed13c2125f..c86624b36a62 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -303,6 +303,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->ir_rmt_port = th->source; ireq->ir_loc_addr = ip_hdr(skb)->daddr; ireq->ir_rmt_addr = ip_hdr(skb)->saddr; + ireq->ir_mark = inet_request_mark(sk, skb); ireq->ecn_ok = ecn_ok; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; @@ -339,7 +340,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, * hasn't changed since we received the original syn, but I see * no easy way to do this. */ - flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark, + flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), (opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index f50d51850285..a33b9fbc1d80 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -845,6 +845,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_fwmark_accept", + .data = &init_net.ipv4.sysctl_tcp_fwmark_accept, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a2780e5334c9..77cccda1ad0c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1318,6 +1318,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq->ir_rmt_addr = saddr; ireq->no_srccheck = inet_sk(sk)->transparent; ireq->opt = tcp_v4_save_options(skb); + ireq->ir_mark = inet_request_mark(sk, skb); if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index d4ade34ab375..a245e5ddffbd 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -81,7 +81,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, final_p = fl6_update_dst(fl6, np->opt, &final); fl6->saddr = ireq->ir_v6_loc_addr; fl6->flowi6_oif = ireq->ir_iif; - fl6->flowi6_mark = sk->sk_mark; + fl6->flowi6_mark = ireq->ir_mark; fl6->fl6_dport = ireq->ir_rmt_port; fl6->fl6_sport = htons(ireq->ir_num); security_req_classify_flow(req, flowi6_to_flowi(fl6)); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index bb53a5e73c1a..a822b880689b 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -216,6 +216,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = inet6_iif(skb); + ireq->ir_mark = inet_request_mark(sk, skb); + req->expires = 0UL; req->num_retrans = 0; ireq->ecn_ok = ecn_ok; @@ -242,7 +244,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) final_p = fl6_update_dst(&fl6, np->opt, &final); fl6.saddr = ireq->ir_v6_loc_addr; fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_mark = ireq->ir_mark; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c54976a44425..f07b2abba359 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1034,6 +1034,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) TCP_ECN_create_request(req, skb, sock_net(sk)); ireq->ir_iif = sk->sk_bound_dev_if; + ireq->ir_mark = inet_request_mark(sk, skb); /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && -- cgit v1.2.3-71-gd317 From cea092c9488cbb22c8b70336ab1413e0daf350f0 Mon Sep 17 00:00:00 2001 From: Brian W Hart Date: Wed, 14 May 2014 10:33:45 +0930 Subject: cpumask.h: silence warning with -Wsign-compare MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Silence the warning when building with -Wsign-compare when cpumask.h is included: include/linux/cpumask.h: In function ‘cpumask_parse’: include/linux/cpumask.h:603:26: warning: signed and unsigned type in conditional expression [-Wsign-compare] int len = nl ? nl - buf : strlen(buf); ^ V2: Rusty pointed out that unsigned should be used instead. Signed-off-by: Brian W Hart Signed-off-by: Rusty Russell --- include/linux/cpumask.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index d08e4d2a9b92..3557ea7b2049 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -600,7 +600,7 @@ static inline int cpulist_scnprintf(char *buf, int len, static inline int cpumask_parse(const char *buf, struct cpumask *dstp) { char *nl = strchr(buf, '\n'); - int len = nl ? nl - buf : strlen(buf); + unsigned int len = nl ? (unsigned int)(nl - buf) : strlen(buf); return bitmap_parse(buf, len, cpumask_bits(dstp), nr_cpumask_bits); } -- cgit v1.2.3-71-gd317 From 08e53fbdb85c0f6f45c0f7c1ea3defc1752a95ce Mon Sep 17 00:00:00 2001 From: Amos Kong Date: Wed, 14 May 2014 10:33:46 +0930 Subject: virtio-rng: support multiple virtio-rng devices Current hwrng core supports to register multiple hwrng devices, and there is only one device really works in the same time. QEMU alsu supports to have multiple virtio-rng backends. This patch changes virtio-rng driver to support multiple virtio-rng devices. ]# cat /sys/class/misc/hw_random/rng_available virtio_rng.0 virtio_rng.1 ]# cat /sys/class/misc/hw_random/rng_current virtio_rng.0 ]# echo -n virtio_rng.1 > /sys/class/misc/hw_random/rng_current ]# dd if=/dev/hwrng of=/dev/null Signed-off-by: Amos Kong Signed-off-by: Rusty Russell --- drivers/char/hw_random/virtio-rng.c | 102 ++++++++++++++++++++++-------------- include/linux/hw_random.h | 2 +- 2 files changed, 64 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index 2ce0e225e58c..12e242bbb0f5 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -25,88 +25,108 @@ #include #include -static struct virtqueue *vq; -static unsigned int data_avail; -static DECLARE_COMPLETION(have_data); -static bool busy; + +struct virtrng_info { + struct virtio_device *vdev; + struct hwrng hwrng; + struct virtqueue *vq; + unsigned int data_avail; + struct completion have_data; + bool busy; +}; static void random_recv_done(struct virtqueue *vq) { + struct virtrng_info *vi = vq->vdev->priv; + /* We can get spurious callbacks, e.g. shared IRQs + virtio_pci. */ - if (!virtqueue_get_buf(vq, &data_avail)) + if (!virtqueue_get_buf(vi->vq, &vi->data_avail)) return; - complete(&have_data); + complete(&vi->have_data); } /* The host will fill any buffer we give it with sweet, sweet randomness. */ -static void register_buffer(u8 *buf, size_t size) +static void register_buffer(struct virtrng_info *vi, u8 *buf, size_t size) { struct scatterlist sg; sg_init_one(&sg, buf, size); /* There should always be room for one buffer. */ - virtqueue_add_inbuf(vq, &sg, 1, buf, GFP_KERNEL); + virtqueue_add_inbuf(vi->vq, &sg, 1, buf, GFP_KERNEL); - virtqueue_kick(vq); + virtqueue_kick(vi->vq); } static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) { int ret; + struct virtrng_info *vi = (struct virtrng_info *)rng->priv; - if (!busy) { - busy = true; - init_completion(&have_data); - register_buffer(buf, size); + if (!vi->busy) { + vi->busy = true; + init_completion(&vi->have_data); + register_buffer(vi, buf, size); } if (!wait) return 0; - ret = wait_for_completion_killable(&have_data); + ret = wait_for_completion_killable(&vi->have_data); if (ret < 0) return ret; - busy = false; + vi->busy = false; - return data_avail; + return vi->data_avail; } static void virtio_cleanup(struct hwrng *rng) { - if (busy) - wait_for_completion(&have_data); -} - + struct virtrng_info *vi = (struct virtrng_info *)rng->priv; -static struct hwrng virtio_hwrng = { - .name = "virtio", - .cleanup = virtio_cleanup, - .read = virtio_read, -}; + if (vi->busy) + wait_for_completion(&vi->have_data); +} static int probe_common(struct virtio_device *vdev) { - int err; + int err, i; + struct virtrng_info *vi = NULL; + + vi = kmalloc(sizeof(struct virtrng_info), GFP_KERNEL); + vi->hwrng.name = kmalloc(40, GFP_KERNEL); + init_completion(&vi->have_data); + + vi->hwrng.read = virtio_read; + vi->hwrng.cleanup = virtio_cleanup; + vi->hwrng.priv = (unsigned long)vi; + vdev->priv = vi; - if (vq) { - /* We only support one device for now */ - return -EBUSY; - } /* We expect a single virtqueue. */ - vq = virtio_find_single_vq(vdev, random_recv_done, "input"); - if (IS_ERR(vq)) { - err = PTR_ERR(vq); - vq = NULL; + vi->vq = virtio_find_single_vq(vdev, random_recv_done, "input"); + if (IS_ERR(vi->vq)) { + err = PTR_ERR(vi->vq); + kfree(vi->hwrng.name); + vi->vq = NULL; + kfree(vi); + vi = NULL; return err; } - err = hwrng_register(&virtio_hwrng); + i = 0; + do { + sprintf(vi->hwrng.name, "virtio_rng.%d", i++); + err = hwrng_register(&vi->hwrng); + } while (err == -EEXIST); + if (err) { vdev->config->del_vqs(vdev); - vq = NULL; + kfree(vi->hwrng.name); + vi->vq = NULL; + kfree(vi); + vi = NULL; return err; } @@ -115,11 +135,15 @@ static int probe_common(struct virtio_device *vdev) static void remove_common(struct virtio_device *vdev) { + struct virtrng_info *vi = vdev->priv; vdev->config->reset(vdev); - busy = false; - hwrng_unregister(&virtio_hwrng); + vi->busy = false; + hwrng_unregister(&vi->hwrng); vdev->config->del_vqs(vdev); - vq = NULL; + kfree(vi->hwrng.name); + vi->vq = NULL; + kfree(vi); + vi = NULL; } static int virtrng_probe(struct virtio_device *vdev) diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index b4b0eef5fddf..02d9c87be54c 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -31,7 +31,7 @@ * @priv: Private data, for use by the RNG driver. */ struct hwrng { - const char *name; + char *name; int (*init)(struct hwrng *rng); void (*cleanup)(struct hwrng *rng); int (*data_present)(struct hwrng *rng, int wait); -- cgit v1.2.3-71-gd317 From b02ef20a9fba08948e643d3eec0efadf1da01a44 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 12 May 2014 18:24:45 +0200 Subject: uprobes/x86: Fix the wrong ->si_addr when xol triggers a trap If the probed insn triggers a trap, ->si_addr = regs->ip is technically correct, but this is not what the signal handler wants; we need to pass the address of the probed insn, not the address of xol slot. Add the new arch-agnostic helper, uprobe_get_trap_addr(), and change fill_trap_info() and math_error() to use it. !CONFIG_UPROBES case in uprobes.h uses a macro to avoid include hell and ensure that it can be compiled even if an architecture doesn't define instruction_pointer(). Test-case: #include #include #include extern void probe_div(void); void sigh(int sig, siginfo_t *info, void *c) { int passed = (info->si_addr == probe_div); printf(passed ? "PASS\n" : "FAIL\n"); _exit(!passed); } int main(void) { struct sigaction sa = { .sa_sigaction = sigh, .sa_flags = SA_SIGINFO, }; sigaction(SIGFPE, &sa, NULL); asm ( "xor %ecx,%ecx\n" ".globl probe_div; probe_div:\n" "idiv %ecx\n" ); return 0; } it fails if probe_div() is probed. Note: show_unhandled_signals users should probably use this helper too, but we need to cleanup them first. Signed-off-by: Oleg Nesterov Reviewed-by: Masami Hiramatsu --- arch/x86/kernel/traps.c | 7 ++++--- include/linux/uprobes.h | 4 ++++ kernel/events/uprobes.c | 10 ++++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 73b3ea32245a..3fdb20548c4b 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -148,11 +149,11 @@ static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr, case X86_TRAP_DE: sicode = FPE_INTDIV; - siaddr = regs->ip; + siaddr = uprobe_get_trap_addr(regs); break; case X86_TRAP_UD: sicode = ILL_ILLOPN; - siaddr = regs->ip; + siaddr = uprobe_get_trap_addr(regs); break; case X86_TRAP_AC: sicode = BUS_ADRALN; @@ -531,7 +532,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) task->thread.error_code = error_code; info.si_signo = SIGFPE; info.si_errno = 0; - info.si_addr = (void __user *)regs->ip; + info.si_addr = (void __user *)uprobe_get_trap_addr(regs); if (trapnr == X86_TRAP_MF) { unsigned short cwd, swd; /* diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index edff2b97b864..88c3b7e8b384 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -102,6 +102,7 @@ extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, u extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); extern bool __weak is_trap_insn(uprobe_opcode_t *insn); extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); +extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); @@ -130,6 +131,9 @@ extern bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *r #else /* !CONFIG_UPROBES */ struct uprobes_state { }; + +#define uprobe_get_trap_addr(regs) instruction_pointer(regs) + static inline int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) { diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index a13251e8bfa4..3b02c72938a8 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1351,6 +1351,16 @@ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs) return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE; } +unsigned long uprobe_get_trap_addr(struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + if (unlikely(utask && utask->active_uprobe)) + return utask->vaddr; + + return instruction_pointer(regs); +} + /* * Called with no locks held. * Called in context of a exiting or a exec-ing thread. -- cgit v1.2.3-71-gd317 From 9d800df12d31734a6853915e9d2deb5d6747985f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 May 2014 09:15:00 -0400 Subject: cgroup: rename cgroup->dummy_css to ->self and move it to the top cgroup->dummy_css is used as the placeholder css when performing css oriended operations on the cgroup. We're gonna shift more cgroup management to this css. Let's rename it to ->self and move it to the top. This is pure rename and field relocation. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 6 +++--- kernel/cgroup.c | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index aa7353deaaf3..164851e388e7 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -143,6 +143,9 @@ enum { }; struct cgroup { + /* self css with NULL ->ss, points back to this cgroup */ + struct cgroup_subsys_state self; + unsigned long flags; /* "unsigned long" so bitops work */ /* @@ -224,9 +227,6 @@ struct cgroup { struct list_head pidlists; struct mutex pidlist_mutex; - /* dummy css with NULL ->ss, points back to this cgroup */ - struct cgroup_subsys_state dummy_css; - /* For css percpu_ref killing and RCU-protected deletion */ struct rcu_head rcu_head; struct work_struct destroy_work; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f36fd9c15b3a..b57a949ae4bc 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -220,7 +220,7 @@ static void cgroup_idr_remove(struct idr *idr, int id) /** * cgroup_css - obtain a cgroup's css for the specified subsystem * @cgrp: the cgroup of interest - * @ss: the subsystem of interest (%NULL returns the dummy_css) + * @ss: the subsystem of interest (%NULL returns @cgrp->self) * * Return @cgrp's css (cgroup_subsys_state) associated with @ss. This * function must be called either under cgroup_mutex or rcu_read_lock() and @@ -235,13 +235,13 @@ static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp, return rcu_dereference_check(cgrp->subsys[ss->id], lockdep_is_held(&cgroup_mutex)); else - return &cgrp->dummy_css; + return &cgrp->self; } /** * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem * @cgrp: the cgroup of interest - * @ss: the subsystem of interest (%NULL returns the dummy_css) + * @ss: the subsystem of interest (%NULL returns @cgrp->self) * * Similar to cgroup_css() but returns the effctive css, which is defined * as the matching css of the nearest ancestor including self which has @ss @@ -254,7 +254,7 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, lockdep_assert_held(&cgroup_mutex); if (!ss) - return &cgrp->dummy_css; + return &cgrp->self; if (!(cgrp->root->subsys_mask & (1 << ss->id))) return NULL; @@ -288,7 +288,7 @@ struct cgroup_subsys_state *of_css(struct kernfs_open_file *of) if (cft->ss) return rcu_dereference_raw(cgrp->subsys[cft->ss->id]); else - return &cgrp->dummy_css; + return &cgrp->self; } EXPORT_SYMBOL_GPL(of_css); @@ -1551,7 +1551,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) INIT_LIST_HEAD(&cgrp->release_list); INIT_LIST_HEAD(&cgrp->pidlists); mutex_init(&cgrp->pidlist_mutex); - cgrp->dummy_css.cgroup = cgrp; + cgrp->self.cgroup = cgrp; for_each_subsys(ss, ssid) INIT_LIST_HEAD(&cgrp->e_csets[ssid]); @@ -3454,7 +3454,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) * ->can_attach() fails. */ do { - css_task_iter_start(&from->dummy_css, &it); + css_task_iter_start(&from->self, &it); task = css_task_iter_next(&it); if (task) get_task_struct(task); @@ -3719,7 +3719,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, if (!array) return -ENOMEM; /* now, populate the array */ - css_task_iter_start(&cgrp->dummy_css, &it); + css_task_iter_start(&cgrp->self, &it); while ((tsk = css_task_iter_next(&it))) { if (unlikely(n == length)) break; @@ -3793,7 +3793,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) } rcu_read_unlock(); - css_task_iter_start(&cgrp->dummy_css, &it); + css_task_iter_start(&cgrp->self, &it); while ((tsk = css_task_iter_next(&it))) { switch (tsk->state) { case TASK_RUNNING: @@ -4274,7 +4274,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, init_cgroup_housekeeping(cgrp); cgrp->parent = parent; - cgrp->dummy_css.parent = &parent->dummy_css; + cgrp->self.parent = &parent->self; cgrp->root = root; if (notify_on_release(parent)) -- cgit v1.2.3-71-gd317 From 249f3468a282dcbad53484c821bebb447f14ee03 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 May 2014 09:15:01 -0400 Subject: cgroup: remove cgroup_destory_css_killed() cgroup_destroy_css_killed() is cgroup destruction stage which happens after all csses are offlined. After the recent updates, it no longer does anything other than putting the base reference. This patch removes the function and makes cgroup_destroy_locked() put the base ref at the end isntead. This also makes cgroup->nr_css unnecessary. Removed. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 3 --- kernel/cgroup.c | 62 +++++--------------------------------------------- 2 files changed, 6 insertions(+), 59 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 164851e388e7..160fcc69149e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -158,9 +158,6 @@ struct cgroup { */ int id; - /* the number of attached css's */ - int nr_css; - /* * If this cgroup contains any tasks, it contributes one to * populated_cnt. All children with non-zero popuplated_cnt of diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e9aa2a51ca68..4a94b0be598d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -178,7 +178,6 @@ static struct cftype cgroup_base_files[]; static void cgroup_put(struct cgroup *cgrp); static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask); -static void cgroup_destroy_css_killed(struct cgroup *cgrp); static int cgroup_destroy_locked(struct cgroup *cgrp); static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss); static void kill_css(struct cgroup_subsys_state *css); @@ -4169,7 +4168,6 @@ static int online_css(struct cgroup_subsys_state *css) ret = ss->css_online(css); if (!ret) { css->flags |= CSS_ONLINE; - css->cgroup->nr_css++; rcu_assign_pointer(css->cgroup->subsys[ss->id], css); } return ret; @@ -4189,7 +4187,6 @@ static void offline_css(struct cgroup_subsys_state *css) ss->css_offline(css); css->flags &= ~CSS_ONLINE; - css->cgroup->nr_css--; RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL); wake_up_all(&css->cgroup->offline_waitq); @@ -4374,39 +4371,18 @@ out_destroy: /* * This is called when the refcnt of a css is confirmed to be killed. - * css_tryget_online() is now guaranteed to fail. + * css_tryget_online() is now guaranteed to fail. Tell the subsystem to + * initate destruction and put the css ref from kill_css(). */ static void css_killed_work_fn(struct work_struct *work) { struct cgroup_subsys_state *css = container_of(work, struct cgroup_subsys_state, destroy_work); - struct cgroup *cgrp = css->cgroup; mutex_lock(&cgroup_mutex); - - /* - * css_tryget_online() is guaranteed to fail now. Tell subsystems - * to initate destruction. - */ offline_css(css); - - /* - * If @cgrp is marked dead, it's waiting for refs of all css's to - * be disabled before proceeding to the second phase of cgroup - * destruction. If we are the last one, kick it off. - */ - if (!cgrp->nr_css && cgroup_is_dead(cgrp)) - cgroup_destroy_css_killed(cgrp); - mutex_unlock(&cgroup_mutex); - /* - * Put the css refs from kill_css(). Each css holds an extra - * reference to the cgroup's dentry and cgroup removal proceeds - * regardless of css refs. On the last put of each css, whenever - * that may be, the extra dentry ref is put so that dentry - * destruction happens only after all css's are released. - */ css_put(css); } @@ -4518,11 +4494,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) */ set_bit(CGRP_DEAD, &cgrp->flags); - /* - * Initiate massacre of all css's. cgroup_destroy_css_killed() - * will be invoked to perform the rest of destruction once the - * percpu refs of all css's are confirmed to be killed. - */ + /* initiate massacre of all css's */ for_each_css(css, ssid, cgrp) kill_css(css); @@ -4532,15 +4504,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) list_del_init(&cgrp->release_list); raw_spin_unlock(&release_list_lock); - /* - * If @cgrp has css's attached, the second stage of cgroup - * destruction is kicked off from css_killed_work_fn() after the - * refs of all attached css's are killed. If @cgrp doesn't have - * any css, we kick it off here. - */ - if (!cgrp->nr_css) - cgroup_destroy_css_killed(cgrp); - /* * Remove @cgrp directory along with the base files. @cgrp has an * extra ref on its kn. @@ -4550,25 +4513,12 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) set_bit(CGRP_RELEASABLE, &cgrp->parent->flags); check_for_release(cgrp->parent); + /* put the base reference */ + cgroup_put(cgrp); + return 0; }; -/** - * cgroup_destroy_css_killed - the second step of cgroup destruction - * @cgrp: the cgroup whose csses have just finished offlining - * - * This function is invoked from a work item for a cgroup which is being - * destroyed after all css's are offlined and performs the rest of - * destruction. This is the second step of destruction described in the - * comment above cgroup_destroy_locked(). - */ -static void cgroup_destroy_css_killed(struct cgroup *cgrp) -{ - lockdep_assert_held(&cgroup_mutex); - - cgroup_put(cgrp); -} - static int cgroup_rmdir(struct kernfs_node *kn) { struct cgroup *cgrp; -- cgit v1.2.3-71-gd317 From 9395a4500404e05173eda9a2d198b6fa500e90c5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 May 2014 09:15:02 -0400 Subject: cgroup: enable refcnting for root csses Currently, css_get(), css_tryget() and css_tryget_online() are noops for root csses as an optimization; however, we're planning to use css refcnts to track of cgroup lifetime too and root cgroups also need to be reference counted. Since css has been converted to percpu_refcnt, the overhead of refcnting is miniscule and this optimization isn't too meaningful anymore. Furthermore, controllers which optimize the root cgroup often never even invoke these functions in their hot paths. This patch enables refcnting for root csses too. This makes CSS_ROOT flag unused and removes it. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 10 ++-------- kernel/cgroup.c | 6 +++--- 2 files changed, 5 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 160fcc69149e..286e39e4e9bf 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -77,7 +77,6 @@ struct cgroup_subsys_state { /* bits in struct cgroup_subsys_state flags field */ enum { - CSS_ROOT = (1 << 0), /* this CSS is the root of the subsystem */ CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ }; @@ -89,9 +88,7 @@ enum { */ static inline void css_get(struct cgroup_subsys_state *css) { - /* We don't need to reference count the root state */ - if (!(css->flags & CSS_ROOT)) - percpu_ref_get(&css->refcnt); + percpu_ref_get(&css->refcnt); } /** @@ -106,8 +103,6 @@ static inline void css_get(struct cgroup_subsys_state *css) */ static inline bool css_tryget_online(struct cgroup_subsys_state *css) { - if (css->flags & CSS_ROOT) - return true; return percpu_ref_tryget_live(&css->refcnt); } @@ -119,8 +114,7 @@ static inline bool css_tryget_online(struct cgroup_subsys_state *css) */ static inline void css_put(struct cgroup_subsys_state *css) { - if (!(css->flags & CSS_ROOT)) - percpu_ref_put(&css->refcnt); + percpu_ref_put(&css->refcnt); } /* bits in struct cgroup flags field */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e694f4153edb..cb5864e36f99 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4158,8 +4158,6 @@ static void init_and_link_css(struct cgroup_subsys_state *css, if (cgrp->parent) { css->parent = cgroup_css(cgrp->parent, ss); css_get(css->parent); - } else { - css->flags |= CSS_ROOT; } BUG_ON(cgroup_css(cgrp, ss)); @@ -4582,9 +4580,10 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) BUG_ON(IS_ERR(css)); init_and_link_css(css, ss, &cgrp_dfl_root.cgrp); if (early) { - /* idr_alloc() can't be called safely during early init */ + /* allocation can't be done safely during early init */ css->id = 1; } else { + BUG_ON(percpu_ref_init(&css->refcnt, css_release)); css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL); BUG_ON(css->id < 0); } @@ -4671,6 +4670,7 @@ int __init cgroup_init(void) struct cgroup_subsys_state *css = init_css_set.subsys[ss->id]; + BUG_ON(percpu_ref_init(&css->refcnt, css_release)); css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL); BUG_ON(css->id < 0); -- cgit v1.2.3-71-gd317 From 9d755d33f0db8c9b49438f71b38a56e375b34360 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 May 2014 09:15:02 -0400 Subject: cgroup: use cgroup->self.refcnt for cgroup refcnting Currently cgroup implements refcnting separately using atomic_t cgroup->refcnt. The destruction paths of cgroup and css are rather complex and bear a lot of similiarities including the use of RCU and bouncing to a work item. This patch makes cgroup use the refcnt of self css for refcnting instead of using its own. This makes cgroup refcnting use css's percpu refcnt and share the destruction mechanism. * css_release_work_fn() and css_free_work_fn() are updated to handle both csses and cgroups. This is a bit messy but should do until we can make cgroup->self a full css, which currently can't be done thanks to multiple hierarchies. * cgroup_destroy_locked() now performs percpu_ref_kill(&cgrp->self.refcnt) instead of cgroup_put(cgrp). * Negative refcnt sanity check in cgroup_get() is no longer necessary as percpu_ref already handles it. * Similarly, as a cgroup which hasn't been killed will never be released regardless of its refcnt value and percpu_ref has sanity check on kill, cgroup_is_dead() sanity check in cgroup_put() is no longer necessary. * As whether a refcnt reached zero or not can only be decided after the reference count is killed, cgroup_root->cgrp's refcnting can no longer be used to decide whether to kill the root or not. Let's make cgroup_kill_sb() explicitly initiate destruction if the root doesn't have any children. This makes sense anyway as unmounted cgroup hierarchy without any children should be destroyed. While this is a bit messy, this will allow pushing more bookkeeping towards cgroup->self and thus handling cgroups and csses in more uniform way. In the very long term, it should be possible to introduce a base subsystem and convert the self css to a proper one making things whole lot simpler and unified. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 6 -- kernel/cgroup.c | 146 +++++++++++++++++++++++++++---------------------- 2 files changed, 80 insertions(+), 72 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 286e39e4e9bf..76dadd77a120 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -160,8 +160,6 @@ struct cgroup { */ int populated_cnt; - atomic_t refcnt; - /* * We link our 'sibling' struct into our parent's 'children'. * Our children link their 'sibling' into our 'children'. @@ -218,10 +216,6 @@ struct cgroup { struct list_head pidlists; struct mutex pidlist_mutex; - /* For css percpu_ref killing and RCU-protected deletion */ - struct rcu_head rcu_head; - struct work_struct destroy_work; - /* used to wait for offlining of csses */ wait_queue_head_t offline_waitq; }; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index cb5864e36f99..c01e8e8dfad0 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -176,10 +176,12 @@ static int need_forkexit_callback __read_mostly; static struct cftype cgroup_base_files[]; static void cgroup_put(struct cgroup *cgrp); +static bool cgroup_has_live_children(struct cgroup *cgrp); static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask); static int cgroup_destroy_locked(struct cgroup *cgrp); static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss); +static void css_release(struct percpu_ref *ref); static void kill_css(struct cgroup_subsys_state *css); static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], bool is_add); @@ -1008,62 +1010,15 @@ static umode_t cgroup_file_mode(const struct cftype *cft) return mode; } -static void cgroup_free_fn(struct work_struct *work) -{ - struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work); - - atomic_dec(&cgrp->root->nr_cgrps); - cgroup_pidlist_destroy_all(cgrp); - - if (cgrp->parent) { - /* - * We get a ref to the parent, and put the ref when this - * cgroup is being freed, so it's guaranteed that the - * parent won't be destroyed before its children. - */ - cgroup_put(cgrp->parent); - kernfs_put(cgrp->kn); - kfree(cgrp); - } else { - /* - * This is root cgroup's refcnt reaching zero, which - * indicates that the root should be released. - */ - cgroup_destroy_root(cgrp->root); - } -} - -static void cgroup_free_rcu(struct rcu_head *head) -{ - struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head); - - INIT_WORK(&cgrp->destroy_work, cgroup_free_fn); - queue_work(cgroup_destroy_wq, &cgrp->destroy_work); -} - static void cgroup_get(struct cgroup *cgrp) { WARN_ON_ONCE(cgroup_is_dead(cgrp)); - WARN_ON_ONCE(atomic_read(&cgrp->refcnt) <= 0); - atomic_inc(&cgrp->refcnt); + css_get(&cgrp->self); } static void cgroup_put(struct cgroup *cgrp) { - if (!atomic_dec_and_test(&cgrp->refcnt)) - return; - if (WARN_ON_ONCE(cgrp->parent && !cgroup_is_dead(cgrp))) - return; - - /* delete this cgroup from parent->children */ - mutex_lock(&cgroup_mutex); - list_del_rcu(&cgrp->sibling); - mutex_unlock(&cgroup_mutex); - - cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id); - cgrp->id = -1; - - call_rcu(&cgrp->rcu_head, cgroup_free_rcu); + css_put(&cgrp->self); } /** @@ -1548,7 +1503,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) struct cgroup_subsys *ss; int ssid; - atomic_set(&cgrp->refcnt, 1); INIT_LIST_HEAD(&cgrp->sibling); INIT_LIST_HEAD(&cgrp->children); INIT_LIST_HEAD(&cgrp->cset_links); @@ -1597,6 +1551,10 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask) goto out; root_cgrp->id = ret; + ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release); + if (ret) + goto out; + /* * We're accessing css_set_count without locking css_set_rwsem here, * but that's OK - it can only be increased by someone holding @@ -1605,11 +1563,11 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask) */ ret = allocate_cgrp_cset_links(css_set_count, &tmp_links); if (ret) - goto out; + goto cancel_ref; ret = cgroup_init_root_id(root); if (ret) - goto out; + goto cancel_ref; root->kf_root = kernfs_create_root(&cgroup_kf_syscall_ops, KERNFS_ROOT_CREATE_DEACTIVATED, @@ -1657,6 +1615,8 @@ destroy_root: root->kf_root = NULL; exit_root_id: cgroup_exit_root_id(root); +cancel_ref: + percpu_ref_cancel_init(&root_cgrp->self.refcnt); out: free_cgrp_cset_links(&tmp_links); return ret; @@ -1735,13 +1695,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, } /* - * A root's lifetime is governed by its root cgroup. Zero - * ref indicate that the root is being destroyed. Wait for - * destruction to complete so that the subsystems are free. - * We can use wait_queue for the wait but this path is - * super cold. Let's just sleep for a bit and retry. + * A root's lifetime is governed by its root cgroup. + * tryget_live failure indicate that the root is being + * destroyed. Wait for destruction to complete so that the + * subsystems are free. We can use wait_queue for the wait + * but this path is super cold. Let's just sleep for a bit + * and retry. */ - if (!atomic_inc_not_zero(&root->cgrp.refcnt)) { + if (!percpu_ref_tryget_live(&root->cgrp.self.refcnt)) { mutex_unlock(&cgroup_mutex); msleep(10); ret = restart_syscall(); @@ -1794,7 +1755,16 @@ static void cgroup_kill_sb(struct super_block *sb) struct kernfs_root *kf_root = kernfs_root_from_sb(sb); struct cgroup_root *root = cgroup_root_from_kf(kf_root); - cgroup_put(&root->cgrp); + /* + * If @root doesn't have any mounts or children, start killing it. + * This prevents new mounts by disabling percpu_ref_tryget_live(). + * cgroup_mount() may wait for @root's release. + */ + if (cgroup_has_live_children(&root->cgrp)) + cgroup_put(&root->cgrp); + else + percpu_ref_kill(&root->cgrp.self.refcnt); + kernfs_kill_sb(sb); } @@ -4110,11 +4080,37 @@ static void css_free_work_fn(struct work_struct *work) container_of(work, struct cgroup_subsys_state, destroy_work); struct cgroup *cgrp = css->cgroup; - if (css->parent) - css_put(css->parent); + if (css->ss) { + /* css free path */ + if (css->parent) + css_put(css->parent); - css->ss->css_free(css); - cgroup_put(cgrp); + css->ss->css_free(css); + cgroup_put(cgrp); + } else { + /* cgroup free path */ + atomic_dec(&cgrp->root->nr_cgrps); + cgroup_pidlist_destroy_all(cgrp); + + if (cgrp->parent) { + /* + * We get a ref to the parent, and put the ref when + * this cgroup is being freed, so it's guaranteed + * that the parent won't be destroyed before its + * children. + */ + cgroup_put(cgrp->parent); + kernfs_put(cgrp->kn); + kfree(cgrp); + } else { + /* + * This is root cgroup's refcnt reaching zero, + * which indicates that the root should be + * released. + */ + cgroup_destroy_root(cgrp->root); + } + } } static void css_free_rcu_fn(struct rcu_head *rcu_head) @@ -4131,8 +4127,20 @@ static void css_release_work_fn(struct work_struct *work) struct cgroup_subsys_state *css = container_of(work, struct cgroup_subsys_state, destroy_work); struct cgroup_subsys *ss = css->ss; + struct cgroup *cgrp = css->cgroup; - cgroup_idr_remove(&ss->css_idr, css->id); + if (ss) { + /* css release path */ + cgroup_idr_remove(&ss->css_idr, css->id); + } else { + /* cgroup release path */ + mutex_lock(&cgroup_mutex); + list_del_rcu(&cgrp->sibling); + mutex_unlock(&cgroup_mutex); + + cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id); + cgrp->id = -1; + } call_rcu(&css->rcu_head, css_free_rcu_fn); } @@ -4285,6 +4293,10 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, goto out_unlock; } + ret = percpu_ref_init(&cgrp->self.refcnt, css_release); + if (ret) + goto out_free_cgrp; + /* * Temporarily set the pointer to NULL, so idr_find() won't return * a half-baked cgroup. @@ -4292,7 +4304,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_NOWAIT); if (cgrp->id < 0) { ret = -ENOMEM; - goto out_free_cgrp; + goto out_cancel_ref; } init_cgroup_housekeeping(cgrp); @@ -4365,6 +4377,8 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, out_free_id: cgroup_idr_remove(&root->cgroup_idr, cgrp->id); +out_cancel_ref: + percpu_ref_cancel_init(&cgrp->self.refcnt); out_free_cgrp: kfree(cgrp); out_unlock: @@ -4521,7 +4535,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) check_for_release(cgrp->parent); /* put the base reference */ - cgroup_put(cgrp); + percpu_ref_kill(&cgrp->self.refcnt); return 0; }; -- cgit v1.2.3-71-gd317 From 7413af1fb70e7efa6dbc7f27663e7a5126b3aa33 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 6 May 2014 21:34:14 -0400 Subject: ftrace: Make get_ftrace_addr() and get_ftrace_addr_old() global Move and rename get_ftrace_addr() and get_ftrace_addr_old() to ftrace_get_addr_new() and ftrace_get_addr_curr() respectively. This moves these two helper functions in the generic code out from the arch specific code, and renames them to have a better generic name. This will allow other archs to use them as well as makes it a bit easier to work on getting separate trampolines for different functions. ftrace_get_addr_new() returns the trampoline address that the mcount call address will be converted to. ftrace_get_addr_curr() returns the trampoline address of what the mcount call address currently jumps to. Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 36 +++++------------------------------- include/linux/ftrace.h | 2 ++ kernel/trace/ftrace.c | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 4b3c195d4133..5ef43ce8492f 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -349,38 +349,12 @@ static int add_brk_on_nop(struct dyn_ftrace *rec) return add_break(rec->ip, old); } -/* - * If the record has the FTRACE_FL_REGS set, that means that it - * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS - * is not not set, then it wants to convert to the normal callback. - */ -static unsigned long get_ftrace_addr(struct dyn_ftrace *rec) -{ - if (rec->flags & FTRACE_FL_REGS) - return (unsigned long)FTRACE_REGS_ADDR; - else - return (unsigned long)FTRACE_ADDR; -} - -/* - * The FTRACE_FL_REGS_EN is set when the record already points to - * a function that saves all the regs. Basically the '_EN' version - * represents the current state of the function. - */ -static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec) -{ - if (rec->flags & FTRACE_FL_REGS_EN) - return (unsigned long)FTRACE_REGS_ADDR; - else - return (unsigned long)FTRACE_ADDR; -} - static int add_breakpoints(struct dyn_ftrace *rec, int enable) { unsigned long ftrace_addr; int ret; - ftrace_addr = get_ftrace_old_addr(rec); + ftrace_addr = ftrace_get_addr_curr(rec); ret = ftrace_test_record(rec, enable); @@ -438,14 +412,14 @@ static int remove_breakpoint(struct dyn_ftrace *rec) * If not, don't touch the breakpoint, we make just create * a disaster. */ - ftrace_addr = get_ftrace_addr(rec); + ftrace_addr = ftrace_get_addr_new(rec); nop = ftrace_call_replace(ip, ftrace_addr); if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0) goto update; /* Check both ftrace_addr and ftrace_old_addr */ - ftrace_addr = get_ftrace_old_addr(rec); + ftrace_addr = ftrace_get_addr_curr(rec); nop = ftrace_call_replace(ip, ftrace_addr); if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) @@ -489,7 +463,7 @@ static int add_update(struct dyn_ftrace *rec, int enable) ret = ftrace_test_record(rec, enable); - ftrace_addr = get_ftrace_addr(rec); + ftrace_addr = ftrace_get_addr_new(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: @@ -536,7 +510,7 @@ static int finish_update(struct dyn_ftrace *rec, int enable) ret = ftrace_update_record(rec, enable); - ftrace_addr = get_ftrace_addr(rec); + ftrace_addr = ftrace_get_addr_new(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f0ff2c2453e7..2f8cbffecd3d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -400,6 +400,8 @@ int ftrace_update_record(struct dyn_ftrace *rec, int enable); int ftrace_test_record(struct dyn_ftrace *rec, int enable); void ftrace_run_stop_machine(int command); unsigned long ftrace_location(unsigned long ip); +unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec); +unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec); extern ftrace_func_t ftrace_trace_function; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 98fa931b6864..e825fded435d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1755,6 +1755,42 @@ int ftrace_test_record(struct dyn_ftrace *rec, int enable) return ftrace_check_record(rec, enable, 0); } +/** + * ftrace_get_addr_new - Get the call address to set to + * @rec: The ftrace record descriptor + * + * If the record has the FTRACE_FL_REGS set, that means that it + * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS + * is not not set, then it wants to convert to the normal callback. + * + * Returns the address of the trampoline to set to + */ +unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec) +{ + if (rec->flags & FTRACE_FL_REGS) + return (unsigned long)FTRACE_REGS_ADDR; + else + return (unsigned long)FTRACE_ADDR; +} + +/** + * ftrace_get_addr_curr - Get the call address that is already there + * @rec: The ftrace record descriptor + * + * The FTRACE_FL_REGS_EN is set when the record already points to + * a function that saves all the regs. Basically the '_EN' version + * represents the current state of the function. + * + * Returns the address of the trampoline that is currently being called + */ +unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec) +{ + if (rec->flags & FTRACE_FL_REGS_EN) + return (unsigned long)FTRACE_REGS_ADDR; + else + return (unsigned long)FTRACE_ADDR; +} + static int __ftrace_replace_code(struct dyn_ftrace *rec, int enable) { -- cgit v1.2.3-71-gd317 From f1b2f2bd5821c6ab7feed2e133343dd54b212ed9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 7 May 2014 16:09:49 -0400 Subject: ftrace: Remove FTRACE_UPDATE_MODIFY_CALL_REGS flag As the decision to what needs to be done (converting a call to the ftrace_caller to ftrace_caller_regs or to convert from ftrace_caller_regs to ftrace_caller) can easily be determined from the rec->flags of FTRACE_FL_REGS and FTRACE_FL_REGS_EN, there's no need to have the ftrace_check_record() return either a UPDATE_MODIFY_CALL_REGS or a UPDATE_MODIFY_CALL. Just he latter is enough. This added flag causes more complexity than is required. Remove it. Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 3 --- include/linux/ftrace.h | 2 -- kernel/trace/ftrace.c | 13 ++++--------- 3 files changed, 4 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 5ef43ce8492f..89de3eaf8772 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -366,7 +366,6 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable) /* converting nop to call */ return add_brk_on_nop(rec); - case FTRACE_UPDATE_MODIFY_CALL_REGS: case FTRACE_UPDATE_MODIFY_CALL: case FTRACE_UPDATE_MAKE_NOP: /* converting a call to a nop */ @@ -469,7 +468,6 @@ static int add_update(struct dyn_ftrace *rec, int enable) case FTRACE_UPDATE_IGNORE: return 0; - case FTRACE_UPDATE_MODIFY_CALL_REGS: case FTRACE_UPDATE_MODIFY_CALL: case FTRACE_UPDATE_MAKE_CALL: /* converting nop to call */ @@ -516,7 +514,6 @@ static int finish_update(struct dyn_ftrace *rec, int enable) case FTRACE_UPDATE_IGNORE: return 0; - case FTRACE_UPDATE_MODIFY_CALL_REGS: case FTRACE_UPDATE_MODIFY_CALL: case FTRACE_UPDATE_MAKE_CALL: /* converting nop to call */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2f8cbffecd3d..3e6dfb31f8e6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -362,14 +362,12 @@ enum { * IGNORE - The function is already what we want it to be * MAKE_CALL - Start tracing the function * MODIFY_CALL - Stop saving regs for the function - * MODIFY_CALL_REGS - Start saving regs for the function * MAKE_NOP - Stop tracing the function */ enum { FTRACE_UPDATE_IGNORE, FTRACE_UPDATE_MAKE_CALL, FTRACE_UPDATE_MODIFY_CALL, - FTRACE_UPDATE_MODIFY_CALL_REGS, FTRACE_UPDATE_MAKE_NOP, }; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 52c2b53b7953..cc07b7fc4372 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1701,19 +1701,15 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) /* * If this record is being updated from a nop, then * return UPDATE_MAKE_CALL. - * Otherwise, if the EN flag is set, then return - * UPDATE_MODIFY_CALL_REGS to tell the caller to convert - * from the non-save regs, to a save regs function. * Otherwise, * return UPDATE_MODIFY_CALL to tell the caller to convert - * from the save regs, to a non-save regs function. + * from the save regs, to a non-save regs function or + * vice versa. */ if (flag & FTRACE_FL_ENABLED) return FTRACE_UPDATE_MAKE_CALL; - else if (rec->flags & FTRACE_FL_REGS_EN) - return FTRACE_UPDATE_MODIFY_CALL_REGS; - else - return FTRACE_UPDATE_MODIFY_CALL; + + return FTRACE_UPDATE_MODIFY_CALL; } if (update) { @@ -1815,7 +1811,6 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) case FTRACE_UPDATE_MAKE_NOP: return ftrace_make_nop(NULL, rec, ftrace_addr); - case FTRACE_UPDATE_MODIFY_CALL_REGS: case FTRACE_UPDATE_MODIFY_CALL: return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr); } -- cgit v1.2.3-71-gd317 From f9f36917903b57c571b1ddcfc6bc794ca4dd8232 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 14 May 2014 14:13:41 +0800 Subject: libahci_platform: add host_flags parameter in ahci_platform_init_host() Add a dynamic host_flags argument to make ahci_platform_init_host more flexible, then remove the AHCI_HFLAGS(...) argument from some driver's ata_port_info, and pass that in as the new argument. Cc: Hans de Geode Reviewed-by: Hans de Goede Signed-off-by: Kefeng Wang Signed-off-by: Tejun Heo --- drivers/ata/ahci_da850.c | 3 ++- drivers/ata/ahci_imx.c | 3 ++- drivers/ata/ahci_mvebu.c | 3 ++- drivers/ata/ahci_platform.c | 2 +- drivers/ata/ahci_st.c | 2 +- drivers/ata/ahci_sunxi.c | 9 ++++++--- drivers/ata/ahci_xgene.c | 7 +++++-- drivers/ata/libahci_platform.c | 5 ++++- include/linux/ahci_platform.h | 1 + 9 files changed, 24 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/ata/ahci_da850.c b/drivers/ata/ahci_da850.c index 2c83613ce2db..2b77d53bccf8 100644 --- a/drivers/ata/ahci_da850.c +++ b/drivers/ata/ahci_da850.c @@ -85,7 +85,8 @@ static int ahci_da850_probe(struct platform_device *pdev) da850_sata_init(dev, pwrdn_reg, hpriv->mmio); - rc = ahci_platform_init_host(pdev, hpriv, &ahci_da850_port_info, 0, 0); + rc = ahci_platform_init_host(pdev, hpriv, &ahci_da850_port_info, + 0, 0, 0); if (rc) goto disable_resources; diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c index 497c7abe1c7d..e7e44a73e4fe 100644 --- a/drivers/ata/ahci_imx.c +++ b/drivers/ata/ahci_imx.c @@ -267,7 +267,8 @@ static int imx_ahci_probe(struct platform_device *pdev) reg_val = clk_get_rate(imxpriv->ahb_clk) / 1000; writel(reg_val, hpriv->mmio + HOST_TIMER1MS); - ret = ahci_platform_init_host(pdev, hpriv, &ahci_imx_port_info, 0, 0); + ret = ahci_platform_init_host(pdev, hpriv, &ahci_imx_port_info, + 0, 0, 0); if (ret) imx_sata_disable(hpriv); diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c index 1df8630c6b65..fd3dfd733b84 100644 --- a/drivers/ata/ahci_mvebu.c +++ b/drivers/ata/ahci_mvebu.c @@ -88,7 +88,8 @@ static int ahci_mvebu_probe(struct platform_device *pdev) ahci_mvebu_mbus_config(hpriv, dram); ahci_mvebu_regret_option(hpriv); - rc = ahci_platform_init_host(pdev, hpriv, &ahci_mvebu_port_info, 0, 0); + rc = ahci_platform_init_host(pdev, hpriv, &ahci_mvebu_port_info, + 0, 0, 0); if (rc) goto disable_resources; diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index ef67e79944f9..a476a1fd3f8f 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -55,7 +55,7 @@ static int ahci_probe(struct platform_device *pdev) goto disable_resources; } - rc = ahci_platform_init_host(pdev, hpriv, &ahci_port_info, 0, 0); + rc = ahci_platform_init_host(pdev, hpriv, &ahci_port_info, 0, 0, 0); if (rc) goto pdata_exit; diff --git a/drivers/ata/ahci_st.c b/drivers/ata/ahci_st.c index 633222226c19..2595598df9ce 100644 --- a/drivers/ata/ahci_st.c +++ b/drivers/ata/ahci_st.c @@ -166,7 +166,7 @@ static int st_ahci_probe(struct platform_device *pdev) if (err) return err; - err = ahci_platform_init_host(pdev, hpriv, &st_ahci_port_info, 0, 0); + err = ahci_platform_init_host(pdev, hpriv, &st_ahci_port_info, 0, 0, 0); if (err) { ahci_platform_disable_resources(hpriv); return err; diff --git a/drivers/ata/ahci_sunxi.c b/drivers/ata/ahci_sunxi.c index 42d3f64e74b3..02002f125bd4 100644 --- a/drivers/ata/ahci_sunxi.c +++ b/drivers/ata/ahci_sunxi.c @@ -157,8 +157,6 @@ static void ahci_sunxi_start_engine(struct ata_port *ap) } static const struct ata_port_info ahci_sunxi_port_info = { - AHCI_HFLAGS(AHCI_HFLAG_32BIT_ONLY | AHCI_HFLAG_NO_MSI | - AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ), .flags = AHCI_FLAG_COMMON | ATA_FLAG_NCQ, .pio_mask = ATA_PIO4, .udma_mask = ATA_UDMA6, @@ -169,6 +167,7 @@ static int ahci_sunxi_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct ahci_host_priv *hpriv; + unsigned long hflags; int rc; hpriv = ahci_platform_get_resources(pdev); @@ -185,7 +184,11 @@ static int ahci_sunxi_probe(struct platform_device *pdev) if (rc) goto disable_resources; - rc = ahci_platform_init_host(pdev, hpriv, &ahci_sunxi_port_info, 0, 0); + hflags = AHCI_HFLAG_32BIT_ONLY | AHCI_HFLAG_NO_MSI | + AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ; + + rc = ahci_platform_init_host(pdev, hpriv, &ahci_sunxi_port_info, + hflags, 0, 0); if (rc) goto disable_resources; diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c index 77c89bf171f1..042a9bb45c86 100644 --- a/drivers/ata/ahci_xgene.c +++ b/drivers/ata/ahci_xgene.c @@ -303,7 +303,6 @@ static struct ata_port_operations xgene_ahci_ops = { }; static const struct ata_port_info xgene_ahci_port_info = { - AHCI_HFLAGS(AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ), .flags = AHCI_FLAG_COMMON | ATA_FLAG_NCQ, .pio_mask = ATA_PIO4, .udma_mask = ATA_UDMA6, @@ -382,6 +381,7 @@ static int xgene_ahci_probe(struct platform_device *pdev) struct ahci_host_priv *hpriv; struct xgene_ahci_context *ctx; struct resource *res; + unsigned long hflags; int rc; hpriv = ahci_platform_get_resources(pdev); @@ -450,7 +450,10 @@ static int xgene_ahci_probe(struct platform_device *pdev) goto disable_resources; } - rc = ahci_platform_init_host(pdev, hpriv, &xgene_ahci_port_info, 0, 0); + hflags = AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ; + + rc = ahci_platform_init_host(pdev, hpriv, &xgene_ahci_port_info, + hflags, 0, 0); if (rc) goto disable_resources; diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index 7cb3a85719c0..3a5b4ed25a4f 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -283,6 +283,7 @@ EXPORT_SYMBOL_GPL(ahci_platform_get_resources); * @pdev: platform device pointer for the host * @hpriv: ahci-host private data for the host * @pi_template: template for the ata_port_info to use + * @host_flags: ahci host flags used in ahci_host_priv * @force_port_map: param passed to ahci_save_initial_config * @mask_port_map: param passed to ahci_save_initial_config * @@ -296,6 +297,7 @@ EXPORT_SYMBOL_GPL(ahci_platform_get_resources); int ahci_platform_init_host(struct platform_device *pdev, struct ahci_host_priv *hpriv, const struct ata_port_info *pi_template, + unsigned long host_flags, unsigned int force_port_map, unsigned int mask_port_map) { @@ -312,7 +314,8 @@ int ahci_platform_init_host(struct platform_device *pdev, } /* prepare host */ - hpriv->flags |= (unsigned long)pi.private_data; + pi.private_data = (void *)host_flags; + hpriv->flags |= host_flags; ahci_save_initial_config(dev, hpriv, force_port_map, mask_port_map); diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h index 1f16d502600c..6dfd51a04d77 100644 --- a/include/linux/ahci_platform.h +++ b/include/linux/ahci_platform.h @@ -44,6 +44,7 @@ struct ahci_host_priv *ahci_platform_get_resources( int ahci_platform_init_host(struct platform_device *pdev, struct ahci_host_priv *hpriv, const struct ata_port_info *pi_template, + unsigned long host_flags, unsigned int force_port_map, unsigned int mask_port_map); -- cgit v1.2.3-71-gd317 From 1a56f2aa4752293e5a9c0c3a2331620aa1fdb808 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 14 May 2014 13:43:37 +0900 Subject: workqueue: Remove deprecated flush[_delayed]_work_sync() flush[_delayed]_work_sync() were deprecated by 4382973 ("workqueue: deprecate flush[_delayed]_work_sync()") and have been deprecated for a long time. In addition, these are not used anymore. So, let's remove these functions. Signed-off-by: Jingoo Han Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 1b22c42e9c2d..aa92d0295e28 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -587,18 +587,6 @@ static inline bool keventd_up(void) return system_wq != NULL; } -/* used to be different but now identical to flush_work(), deprecated */ -static inline bool __deprecated flush_work_sync(struct work_struct *work) -{ - return flush_work(work); -} - -/* used to be different but now identical to flush_delayed_work(), deprecated */ -static inline bool __deprecated flush_delayed_work_sync(struct delayed_work *dwork) -{ - return flush_delayed_work(dwork); -} - #ifndef CONFIG_SMP static inline long work_on_cpu(int cpu, long (*fn)(void *), void *arg) { -- cgit v1.2.3-71-gd317 From cf416171e7e1d966111f53bdae82f51af05e7bf8 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 14 May 2014 13:58:06 +0900 Subject: workqueue: Remove deprecated system_nrt[_freezable]_wq system_nrt[_freezable]_wq were deprecated by 3b07e9c ("workqueue: deprecate system_nrt[_freezable]_wq") and have been deprecated for a long time. In addition, these are not used anymore. So, let's remove these functions. Signed-off-by: Jingoo Han Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index aa92d0295e28..d93d28b2ec73 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -364,20 +364,6 @@ extern struct workqueue_struct *system_freezable_wq; extern struct workqueue_struct *system_power_efficient_wq; extern struct workqueue_struct *system_freezable_power_efficient_wq; -static inline struct workqueue_struct * __deprecated __system_nrt_wq(void) -{ - return system_wq; -} - -static inline struct workqueue_struct * __deprecated __system_nrt_freezable_wq(void) -{ - return system_freezable_wq; -} - -/* equivlalent to system_wq and system_freezable_wq, deprecated */ -#define system_nrt_wq __system_nrt_wq() -#define system_nrt_freezable_wq __system_nrt_freezable_wq() - extern struct workqueue_struct * __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, struct lock_class_key *key, const char *lock_name, ...) __printf(1, 6); -- cgit v1.2.3-71-gd317 From 122ff243f5f104194750ecbc76d5946dd1eec934 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 12 May 2014 16:04:53 -0700 Subject: ipv4: make ip_local_reserved_ports per netns ip_local_port_range is already per netns, so should ip_local_reserved_ports be. And since it is none by default we don't actually need it when we don't enable CONFIG_SYSCTL. By the way, rename inet_is_reserved_local_port() to inet_is_local_reserved_port() Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/ip.h | 14 +++++++++++--- include/net/netns/ipv4.h | 4 ++++ kernel/sysctl.c | 4 ++-- net/ipv4/af_inet.c | 8 +------- net/ipv4/inet_connection_sock.c | 5 +---- net/ipv4/inet_hashtables.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 31 ++++++++++++++----------------- net/ipv4/udp.c | 2 +- net/sctp/socket.c | 5 +++-- 9 files changed, 38 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 14c50a1650ef..512bcd5dabac 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -208,11 +208,19 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o void inet_get_local_port_range(struct net *net, int *low, int *high); -extern unsigned long *sysctl_local_reserved_ports; -static inline int inet_is_reserved_local_port(int port) +#if CONFIG_SYSCTL +static inline int inet_is_local_reserved_port(struct net *net, int port) { - return test_bit(port, sysctl_local_reserved_ports); + if (!net->ipv4.sysctl_local_reserved_ports) + return 0; + return test_bit(port, net->ipv4.sysctl_local_reserved_ports); } +#else +static inline int inet_is_local_reserved_port(struct net *net, int port) +{ + return 0; +} +#endif extern int sysctl_ip_nonlocal_bind; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2f0cfad66666..aec5e12f9f19 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -84,6 +84,10 @@ struct netns_ipv4 { atomic_t dev_addr_genid; +#ifdef CONFIG_SYSCTL + unsigned long *sysctl_local_reserved_ports; +#endif + #ifdef CONFIG_IP_MROUTE #ifndef CONFIG_IP_MROUTE_MULTIPLE_TABLES struct mr_table *mrt; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 74f5b580fe34..e36ae4b15726 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2501,11 +2501,11 @@ int proc_do_large_bitmap(struct ctl_table *table, int write, bool first = 1; size_t left = *lenp; unsigned long bitmap_len = table->maxlen; - unsigned long *bitmap = (unsigned long *) table->data; + unsigned long *bitmap = *(unsigned long **) table->data; unsigned long *tmp_bitmap = NULL; char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c; - if (!bitmap_len || !left || (*ppos && !write)) { + if (!bitmap || !bitmap_len || !left || (*ppos && !write)) { *lenp = 0; return 0; } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 211c0cc6c3d3..279132bcadd9 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1705,13 +1705,9 @@ static int __init inet_init(void) BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb)); - sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); - if (!sysctl_local_reserved_ports) - goto out; - rc = proto_register(&tcp_prot, 1); if (rc) - goto out_free_reserved_ports; + goto out; rc = proto_register(&udp_prot, 1); if (rc) @@ -1821,8 +1817,6 @@ out_unregister_udp_proto: proto_unregister(&udp_prot); out_unregister_tcp_proto: proto_unregister(&tcp_prot); -out_free_reserved_ports: - kfree(sysctl_local_reserved_ports); goto out; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 12e502cbfdc7..14d02ea905b6 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -29,9 +29,6 @@ const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; EXPORT_SYMBOL(inet_csk_timer_bug_msg); #endif -unsigned long *sysctl_local_reserved_ports; -EXPORT_SYMBOL(sysctl_local_reserved_ports); - void inet_get_local_port_range(struct net *net, int *low, int *high) { unsigned int seq; @@ -113,7 +110,7 @@ again: smallest_size = -1; do { - if (inet_is_reserved_local_port(rover)) + if (inet_is_local_reserved_port(net, rover)) goto next_nolock; head = &hashinfo->bhash[inet_bhashfn(net, rover, hashinfo->bhash_size)]; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 8b9cf279450d..83331f1b86ac 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -500,7 +500,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, local_bh_disable(); for (i = 1; i <= remaining; i++) { port = low + (i + offset) % remaining; - if (inet_is_reserved_local_port(port)) + if (inet_is_local_reserved_port(net, port)) continue; head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a33b9fbc1d80..79a007c52558 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -436,13 +436,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "ip_local_reserved_ports", - .data = NULL, /* initialized in sysctl_ipv4_init */ - .maxlen = 65536, - .mode = 0644, - .proc_handler = proc_do_large_bitmap, - }, { .procname = "igmp_max_memberships", .data = &sysctl_igmp_max_memberships, @@ -824,6 +817,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = ipv4_local_port_range, }, + { + .procname = "ip_local_reserved_ports", + .data = &init_net.ipv4.sysctl_local_reserved_ports, + .maxlen = 65536, + .mode = 0644, + .proc_handler = proc_do_large_bitmap, + }, { .procname = "ip_no_pmtu_disc", .data = &init_net.ipv4.sysctl_ip_no_pmtu_disc, @@ -876,8 +876,14 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) if (net->ipv4.ipv4_hdr == NULL) goto err_reg; + net->ipv4.sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); + if (!net->ipv4.sysctl_local_reserved_ports) + goto err_ports; + return 0; +err_ports: + unregister_net_sysctl_table(net->ipv4.ipv4_hdr); err_reg: if (!net_eq(net, &init_net)) kfree(table); @@ -889,6 +895,7 @@ static __net_exit void ipv4_sysctl_exit_net(struct net *net) { struct ctl_table *table; + kfree(net->ipv4.sysctl_local_reserved_ports); table = net->ipv4.ipv4_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv4.ipv4_hdr); kfree(table); @@ -902,16 +909,6 @@ static __net_initdata struct pernet_operations ipv4_sysctl_ops = { static __init int sysctl_ipv4_init(void) { struct ctl_table_header *hdr; - struct ctl_table *i; - - for (i = ipv4_table; i->procname; i++) { - if (strcmp(i->procname, "ip_local_reserved_ports") == 0) { - i->data = sysctl_local_reserved_ports; - break; - } - } - if (!i->procname) - return -EINVAL; hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table); if (hdr == NULL) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 54ea0a3a48f1..6729ea97a59d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -246,7 +246,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, do { if (low <= snum && snum <= high && !test_bit(snum >> udptable->log, bitmap) && - !inet_is_reserved_local_port(snum)) + !inet_is_local_reserved_port(net, snum)) goto found; snum += rand; } while (snum != first); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index e37b2cbbf177..2af76eaba8f7 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5946,8 +5946,9 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) /* Search for an available port. */ int low, high, remaining, index; unsigned int rover; + struct net *net = sock_net(sk); - inet_get_local_port_range(sock_net(sk), &low, &high); + inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; rover = prandom_u32() % remaining + low; @@ -5955,7 +5956,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) rover++; if ((rover < low) || (rover > high)) rover = low; - if (inet_is_reserved_local_port(rover)) + if (inet_is_local_reserved_port(net, rover)) continue; index = sctp_phashfn(sock_net(sk), rover); head = &sctp_port_hashtable[index]; -- cgit v1.2.3-71-gd317 From 2eacc23c422b4553030168f315cb49522fa1b1f6 Mon Sep 17 00:00:00 2001 From: Yuval Atias Date: Wed, 14 May 2014 12:15:10 +0300 Subject: net/mlx4_core: Enforce irq affinity changes immediatly During heavy traffic, napi is constatntly polling the complition queue and no interrupt is fired. Because of that, changes to irq affinity are ignored until traffic is stopped and resumed. By registering to the irq notifier mechanism, and forcing interrupt when affinity is changed, irq affinity changes will be immediatly enforced. Signed-off-by: Yuval Atias Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cq.c | 3 ++ drivers/net/ethernet/mellanox/mlx4/en_rx.c | 11 +++++- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 6 +++ drivers/net/ethernet/mellanox/mlx4/eq.c | 62 ++++++++++++++++++++++++++++++ include/linux/mlx4/device.h | 3 ++ 5 files changed, 83 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index 0487121e4a0f..8542030b89cf 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -293,6 +293,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, atomic_set(&cq->refcount, 1); init_completion(&cq->free); + cq->irq = priv->eq_table.eq[cq->vector].irq; + cq->irq_affinity_change = false; + return 0; err_radix: diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index a1512450816d..e8c0d2b832b7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -895,10 +895,17 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) mlx4_en_cq_unlock_napi(cq); /* If we used up all the quota - we're probably not done yet... */ - if (done == budget) + if (done == budget) { INC_PERF_COUNTER(priv->pstats.napi_quota); - else { + if (unlikely(cq->mcq.irq_affinity_change)) { + cq->mcq.irq_affinity_change = false; + napi_complete(napi); + mlx4_en_arm_cq(priv, cq); + return 0; + } + } else { /* Done for now */ + cq->mcq.irq_affinity_change = false; napi_complete(napi); mlx4_en_arm_cq(priv, cq); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 89585c6311c3..cb964056d710 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -474,9 +474,15 @@ int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget) /* If we used up all the quota - we're probably not done yet... */ if (done < budget) { /* Done for now */ + cq->mcq.irq_affinity_change = false; napi_complete(napi); mlx4_en_arm_cq(priv, cq); return done; + } else if (unlikely(cq->mcq.irq_affinity_change)) { + cq->mcq.irq_affinity_change = false; + napi_complete(napi); + mlx4_en_arm_cq(priv, cq); + return 0; } return budget; } diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 6c088bc1845b..d954ec1eac17 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -53,6 +53,11 @@ enum { MLX4_EQ_ENTRY_SIZE = 0x20 }; +struct mlx4_irq_notify { + void *arg; + struct irq_affinity_notify notify; +}; + #define MLX4_EQ_STATUS_OK ( 0 << 28) #define MLX4_EQ_STATUS_WRITE_FAIL (10 << 28) #define MLX4_EQ_OWNER_SW ( 0 << 24) @@ -1083,6 +1088,57 @@ static void mlx4_unmap_clr_int(struct mlx4_dev *dev) iounmap(priv->clr_base); } +static void mlx4_irq_notifier_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + struct mlx4_irq_notify *n = container_of(notify, + struct mlx4_irq_notify, + notify); + struct mlx4_priv *priv = (struct mlx4_priv *)n->arg; + struct radix_tree_iter iter; + void **slot; + + radix_tree_for_each_slot(slot, &priv->cq_table.tree, &iter, 0) { + struct mlx4_cq *cq = (struct mlx4_cq *)(*slot); + + if (cq->irq == notify->irq) + cq->irq_affinity_change = true; + } +} + +static void mlx4_release_irq_notifier(struct kref *ref) +{ + struct mlx4_irq_notify *n = container_of(ref, struct mlx4_irq_notify, + notify.kref); + kfree(n); +} + +static void mlx4_assign_irq_notifier(struct mlx4_priv *priv, + struct mlx4_dev *dev, int irq) +{ + struct mlx4_irq_notify *irq_notifier = NULL; + int err = 0; + + irq_notifier = kzalloc(sizeof(*irq_notifier), GFP_KERNEL); + if (!irq_notifier) { + mlx4_warn(dev, "Failed to allocate irq notifier. irq %d\n", + irq); + return; + } + + irq_notifier->notify.irq = irq; + irq_notifier->notify.notify = mlx4_irq_notifier_notify; + irq_notifier->notify.release = mlx4_release_irq_notifier; + irq_notifier->arg = priv; + err = irq_set_affinity_notifier(irq, &irq_notifier->notify); + if (err) { + kfree(irq_notifier); + irq_notifier = NULL; + mlx4_warn(dev, "Failed to set irq notifier. irq %d\n", irq); + } +} + + int mlx4_alloc_eq_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1353,6 +1409,9 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, continue; /*we dont want to break here*/ } + mlx4_assign_irq_notifier(priv, dev, + priv->eq_table.eq[vec].irq); + eq_set_ci(&priv->eq_table.eq[vec], 1); } } @@ -1379,6 +1438,9 @@ void mlx4_release_eq(struct mlx4_dev *dev, int vec) Belonging to a legacy EQ*/ mutex_lock(&priv->msix_ctl.pool_lock); if (priv->msix_ctl.pool_bm & 1ULL << i) { + irq_set_affinity_notifier( + priv->eq_table.eq[vec].irq, + NULL); free_irq(priv->eq_table.eq[vec].irq, &priv->eq_table.eq[vec]); priv->msix_ctl.pool_bm &= ~(1ULL << i); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index ba87bd21295a..c0468e6f0442 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -577,6 +577,9 @@ struct mlx4_cq { u32 cons_index; + u16 irq; + bool irq_affinity_change; + __be32 *set_ci_db; __be32 *arm_db; int arm_sn; -- cgit v1.2.3-71-gd317 From c7228317441f4dee5e5916e30300dd8c61f75af7 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 13 May 2014 20:30:07 -0700 Subject: net: Use a more standard macro for INET_ADDR_COOKIE Missing a colon on definition use is a bit odd so change the macro for the 32 bit case to declare an __attribute__((unused)) and __deprecated variable. The __deprecated attribute will cause gcc to emit an error if the variable is actually used. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 8 +++++--- net/ipv4/inet_hashtables.c | 4 ++-- net/ipv4/udp.c | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 1bdb47715def..dd1950a7e273 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -292,12 +292,12 @@ static inline struct sock *inet_lookup_listener(struct net *net, #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ const __addrpair __name = (__force __addrpair) ( \ (((__force __u64)(__be32)(__saddr)) << 32) | \ - ((__force __u64)(__be32)(__daddr))); + ((__force __u64)(__be32)(__daddr))) #else /* __LITTLE_ENDIAN */ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ const __addrpair __name = (__force __addrpair) ( \ (((__force __u64)(__be32)(__daddr)) << 32) | \ - ((__force __u64)(__be32)(__saddr))); + ((__force __u64)(__be32)(__saddr))) #endif /* __BIG_ENDIAN */ #define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \ (((__sk)->sk_portpair == (__ports)) && \ @@ -306,7 +306,9 @@ static inline struct sock *inet_lookup_listener(struct net *net, ((__sk)->sk_bound_dev_if == (__dif))) && \ net_eq(sock_net(__sk), (__net))) #else /* 32-bit arch */ -#define INET_ADDR_COOKIE(__name, __saddr, __daddr) +#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ + const int __name __deprecated __attribute__((unused)) + #define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \ (((__sk)->sk_portpair == (__ports)) && \ ((__sk)->sk_daddr == (__saddr)) && \ diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 83331f1b86ac..43116e8c8e13 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -274,7 +274,7 @@ struct sock *__inet_lookup_established(struct net *net, const __be32 daddr, const u16 hnum, const int dif) { - INET_ADDR_COOKIE(acookie, saddr, daddr) + INET_ADDR_COOKIE(acookie, saddr, daddr); const __portpair ports = INET_COMBINED_PORTS(sport, hnum); struct sock *sk; const struct hlist_nulls_node *node; @@ -327,7 +327,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, __be32 daddr = inet->inet_rcv_saddr; __be32 saddr = inet->inet_daddr; int dif = sk->sk_bound_dev_if; - INET_ADDR_COOKIE(acookie, saddr, daddr) + INET_ADDR_COOKIE(acookie, saddr, daddr); const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); struct net *net = sock_net(sk); unsigned int hash = inet_ehashfn(net, daddr, lport, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6729ea97a59d..590532a7bd2d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1875,7 +1875,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum); unsigned int slot2 = hash2 & udp_table.mask; struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; - INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr) + INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr); const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum); rcu_read_lock(); -- cgit v1.2.3-71-gd317 From 542ad4f8886b376dac9a4334bdb38f9c22a4d8da Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 25 Apr 2014 15:08:29 -0700 Subject: Input: gpio_keys - convert struct descriptions to kernel-doc This patch converts descriptions of the structures defined in linux/gpio_keys.h to follow kernel-doc format. There is no functional change. Signed-off-by: Andy Shevchenko Acked-by: Linus Walleij Signed-off-by: Dmitry Torokhov --- include/linux/gpio_keys.h | 48 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h index a7e977ff4abf..8b622468952c 100644 --- a/include/linux/gpio_keys.h +++ b/include/linux/gpio_keys.h @@ -3,29 +3,53 @@ struct device; +/** + * struct gpio_keys_button - configuration parameters + * @code: input event code (KEY_*, SW_*) + * @gpio: %-1 if this key does not support gpio + * @active_low: %true indicates that button is considered + * depressed when gpio is low + * @desc: label that will be attached to button's gpio + * @type: input event type (%EV_KEY, %EV_SW, %EV_ABS) + * @wakeup: configure the button as a wake-up source + * @debounce_interval: debounce ticks interval in msecs + * @can_disable: %true indicates that userspace is allowed to + * disable button via sysfs + * @value: axis value for %EV_ABS + * @irq: Irq number in case of interrupt keys + */ struct gpio_keys_button { - /* Configuration parameters */ - unsigned int code; /* input event code (KEY_*, SW_*) */ - int gpio; /* -1 if this key does not support gpio */ + unsigned int code; + int gpio; int active_low; const char *desc; - unsigned int type; /* input event type (EV_KEY, EV_SW, EV_ABS) */ - int wakeup; /* configure the button as a wake-up source */ - int debounce_interval; /* debounce ticks interval in msecs */ + unsigned int type; + int wakeup; + int debounce_interval; bool can_disable; - int value; /* axis value for EV_ABS */ - unsigned int irq; /* Irq number in case of interrupt keys */ + int value; + unsigned int irq; }; +/** + * struct gpio_keys_platform_data - platform data for gpio_keys driver + * @buttons: pointer to array of &gpio_keys_button structures + * describing buttons attached to the device + * @nbuttons: number of elements in @buttons array + * @poll_interval: polling interval in msecs - for polling driver only + * @rep: enable input subsystem auto repeat + * @enable: platform hook for enabling the device + * @disable: platform hook for disabling the device + * @name: input device name + */ struct gpio_keys_platform_data { struct gpio_keys_button *buttons; int nbuttons; - unsigned int poll_interval; /* polling interval in msecs - - for polling driver only */ - unsigned int rep:1; /* enable input subsystem auto repeat */ + unsigned int poll_interval; + unsigned int rep:1; int (*enable)(struct device *dev); void (*disable)(struct device *dev); - const char *name; /* input device name */ + const char *name; }; #endif -- cgit v1.2.3-71-gd317 From bf1de9761c21f56d5b0c6a0acd3b792d801c61e6 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 28 Apr 2014 10:49:51 -0700 Subject: Input: implement managed polled input devices Managed resources are becoming more and more popular in drivers. Let's implement managed polled input devices, to complement managed regular input devices. Similarly to managed regular input devices only one new call devm_input_allocate_polled_device() is added and the rest of APIs is modified to work with both managed and non-managed devices. Reviewed-by: David Herrmann Tested-by: Alexander Shiyan Signed-off-by: Dmitry Torokhov --- drivers/input/input-polldev.c | 118 +++++++++++++++++++++++++++++++++++++++++- include/linux/input-polldev.h | 3 ++ 2 files changed, 119 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/input/input-polldev.c b/drivers/input/input-polldev.c index 4b191908d5de..3664f81655ca 100644 --- a/drivers/input/input-polldev.c +++ b/drivers/input/input-polldev.c @@ -176,6 +176,91 @@ struct input_polled_dev *input_allocate_polled_device(void) } EXPORT_SYMBOL(input_allocate_polled_device); +struct input_polled_devres { + struct input_polled_dev *polldev; +}; + +static int devm_input_polldev_match(struct device *dev, void *res, void *data) +{ + struct input_polled_devres *devres = res; + + return devres->polldev == data; +} + +static void devm_input_polldev_release(struct device *dev, void *res) +{ + struct input_polled_devres *devres = res; + struct input_polled_dev *polldev = devres->polldev; + + dev_dbg(dev, "%s: dropping reference/freeing %s\n", + __func__, dev_name(&polldev->input->dev)); + + input_put_device(polldev->input); + kfree(polldev); +} + +static void devm_input_polldev_unregister(struct device *dev, void *res) +{ + struct input_polled_devres *devres = res; + struct input_polled_dev *polldev = devres->polldev; + + dev_dbg(dev, "%s: unregistering device %s\n", + __func__, dev_name(&polldev->input->dev)); + input_unregister_device(polldev->input); + + /* + * Note that we are still holding extra reference to the input + * device so it will stick around until devm_input_polldev_release() + * is called. + */ +} + +/** + * devm_input_allocate_polled_device - allocate managed polled device + * @dev: device owning the polled device being created + * + * Returns prepared &struct input_polled_dev or %NULL. + * + * Managed polled input devices do not need to be explicitly unregistered + * or freed as it will be done automatically when owner device unbinds + * from * its driver (or binding fails). Once such managed polled device + * is allocated, it is ready to be set up and registered in the same + * fashion as regular polled input devices (using + * input_register_polled_device() function). + * + * If you want to manually unregister and free such managed polled devices, + * it can be still done by calling input_unregister_polled_device() and + * input_free_polled_device(), although it is rarely needed. + * + * NOTE: the owner device is set up as parent of input device and users + * should not override it. + */ +struct input_polled_dev *devm_input_allocate_polled_device(struct device *dev) +{ + struct input_polled_dev *polldev; + struct input_polled_devres *devres; + + devres = devres_alloc(devm_input_polldev_release, sizeof(*devres), + GFP_KERNEL); + if (!devres) + return NULL; + + polldev = input_allocate_polled_device(); + if (!polldev) { + devres_free(devres); + return NULL; + } + + polldev->input->dev.parent = dev; + polldev->devres_managed = true; + + devres->polldev = polldev; + devres_add(dev, devres); + + return polldev; +} +EXPORT_SYMBOL(devm_input_allocate_polled_device); + /** * input_free_polled_device - free memory allocated for polled device * @dev: device to free @@ -186,7 +271,12 @@ EXPORT_SYMBOL(input_allocate_polled_device); void input_free_polled_device(struct input_polled_dev *dev) { if (dev) { - input_free_device(dev->input); + if (dev->devres_managed) + WARN_ON(devres_destroy(dev->input->dev.parent, + devm_input_polldev_release, + devm_input_polldev_match, + dev)); + input_put_device(dev->input); kfree(dev); } } @@ -204,9 +294,19 @@ EXPORT_SYMBOL(input_free_polled_device); */ int input_register_polled_device(struct input_polled_dev *dev) { + struct input_polled_devres *devres = NULL; struct input_dev *input = dev->input; int error; + if (dev->devres_managed) { + devres = devres_alloc(devm_input_polldev_unregister, + sizeof(*devres), GFP_KERNEL); + if (!devres) + return -ENOMEM; + + devres->polldev = dev; + } + input_set_drvdata(input, dev); INIT_DELAYED_WORK(&dev->work, input_polled_device_work); @@ -221,8 +321,10 @@ int input_register_polled_device(struct input_polled_dev *dev) input->dev.groups = input_polldev_attribute_groups; error = input_register_device(input); - if (error) + if (error) { + devres_free(devres); return error; + } /* * Take extra reference to the underlying input device so @@ -233,6 +335,12 @@ int input_register_polled_device(struct input_polled_dev *dev) */ input_get_device(input); + if (dev->devres_managed) { + dev_dbg(input->dev.parent, "%s: registering %s with devres.\n", + __func__, dev_name(&input->dev)); + devres_add(input->dev.parent, devres); + } + return 0; } EXPORT_SYMBOL(input_register_polled_device); @@ -247,6 +355,12 @@ EXPORT_SYMBOL(input_register_polled_device); */ void input_unregister_polled_device(struct input_polled_dev *dev) { + if (dev->devres_managed) + WARN_ON(devres_destroy(dev->input->dev.parent, + devm_input_polldev_unregister, + devm_input_polldev_match, + dev)); + input_unregister_device(dev->input); } EXPORT_SYMBOL(input_unregister_polled_device); diff --git a/include/linux/input-polldev.h b/include/linux/input-polldev.h index ce0b72464eb8..2465182670db 100644 --- a/include/linux/input-polldev.h +++ b/include/linux/input-polldev.h @@ -48,9 +48,12 @@ struct input_polled_dev { /* private: */ struct delayed_work work; + + bool devres_managed; }; struct input_polled_dev *input_allocate_polled_device(void); +struct input_polled_dev *devm_input_allocate_polled_device(struct device *dev); void input_free_polled_device(struct input_polled_dev *dev); int input_register_polled_device(struct input_polled_dev *dev); void input_unregister_polled_device(struct input_polled_dev *dev); -- cgit v1.2.3-71-gd317 From 34d22ce22b0b249804816990a3b62b08b1a62546 Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Fri, 9 May 2014 14:11:44 +0300 Subject: cfg80211: Add API to update CSA counters in mgmt frames Add NL80211_ATTR_CSA_C_OFFSETS_TX which holds an array of offsets to the CSA counters which should be updated when sending a management frames with NL80211_CMD_FRAME. This API should be used by the drivers that wish to keep the CSA counter updated in probe responses, but do not implement probe response offloading and so, do not use ieee80211_proberesp_get function. Signed-off-by: Andrei Otcheretianski Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++++ include/uapi/linux/nl80211.h | 8 ++++++++ net/wireless/nl80211.c | 24 ++++++++++++++++++++++-- 3 files changed, 34 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e3a48b0a2b3b..f46e1e15746d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1986,6 +1986,8 @@ struct cfg80211_update_ft_ies_params { * @len: buffer length * @no_cck: don't use cck rates for this frame * @dont_wait_for_ack: tells the low level not to wait for an ack + * @n_csa_offsets: length of csa_offsets array + * @csa_offsets: array of all the csa offsets in the frame */ struct cfg80211_mgmt_tx_params { struct ieee80211_channel *chan; @@ -1995,6 +1997,8 @@ struct cfg80211_mgmt_tx_params { size_t len; bool no_cck; bool dont_wait_for_ack; + int n_csa_offsets; + const u16 *csa_offsets; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index b65095a85dee..ec90fc9d2358 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -503,6 +503,9 @@ * TX status event pertaining to the TX request. * %NL80211_ATTR_TX_NO_CCK_RATE is used to decide whether to send the * management frames at CCK rate or not in 2GHz band. + * %NL80211_ATTR_CSA_C_OFFSETS_TX is an array of offsets to CSA + * counters which will be updated to the current value. This attribute + * is used during CSA period. * @NL80211_CMD_FRAME_WAIT_CANCEL: When an off-channel TX was requested, this * command may be used with the corresponding cookie to cancel the wait * time if it is known that it is no longer necessary. @@ -1576,6 +1579,9 @@ enum nl80211_commands { * advertise values that cannot always be met. In such cases, an attempt * to add a new station entry with @NL80211_CMD_NEW_STATION may fail. * + * @NL80211_ATTR_CSA_C_OFFSETS_TX: An array of csa counter offsets (u16) which + * should be updated when the frame is transmitted. + * * @NL80211_ATTR_TDLS_PEER_CAPABILITY: flags for TDLS peer capabilities, u32. * As specified in the &enum nl80211_tdls_peer_capability. * @@ -1920,6 +1926,8 @@ enum nl80211_attrs { NL80211_ATTR_IFACE_SOCKET_OWNER, + NL80211_ATTR_CSA_C_OFFSETS_TX, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 74e7299e4add..4c0ca40ef90e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -386,6 +386,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_WIPHY_FREQ_HINT] = { .type = NLA_U32 }, [NL80211_ATTR_TDLS_PEER_CAPABILITY] = { .type = NLA_U32 }, [NL80211_ATTR_IFACE_SOCKET_OWNER] = { .type = NLA_FLAG }, + [NL80211_ATTR_CSA_C_OFFSETS_TX] = { .type = NLA_BINARY }, }; /* policy for the key attributes */ @@ -7786,6 +7787,27 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) if (!chandef.chan && params.offchan) return -EINVAL; + params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); + params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]); + + if (info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]) { + int len = nla_len(info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]); + int i; + + if (len % sizeof(u16)) + return -EINVAL; + + params.n_csa_offsets = len / sizeof(u16); + params.csa_offsets = + nla_data(info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]); + + /* check that all the offsets fit the frame */ + for (i = 0; i < params.n_csa_offsets; i++) { + if (params.csa_offsets[i] >= params.len) + return -EINVAL; + } + } + if (!params.dont_wait_for_ack) { msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) @@ -7799,8 +7821,6 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) } } - params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); - params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]); params.chan = chandef.chan; err = cfg80211_mlme_mgmt_tx(rdev, wdev, ¶ms, &cookie); if (err) -- cgit v1.2.3-71-gd317 From 9a774c78e2114c7e8605e3a168ccd552cbe3d922 Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Fri, 9 May 2014 14:11:46 +0300 Subject: cfg80211: Support multiple CSA counters Change the type of NL80211_ATTR_CSA_C_OFF_BEACON and NL80211_ATTR_CSA_C_OFF_PRESP to be NLA_BINARY which allows userspace to use beacons and probe responses with multiple CSA counters. This isn't breaking the API since userspace can continue to use nla_put_u16 for this attributes, which is equivalent to a single element u16 array. In addition advertise max number of supported CSA counters. This is needed when using CSA and eCSA IEs together. Signed-off-by: Andrei Otcheretianski Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 18 ++++++++++-- include/uapi/linux/nl80211.h | 11 +++++--- net/mac80211/cfg.c | 10 +++++-- net/wireless/core.c | 2 ++ net/wireless/nl80211.c | 65 ++++++++++++++++++++++++++++++++++---------- net/wireless/trace.h | 22 +++++++++------ 6 files changed, 96 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f46e1e15746d..447cb58f0d77 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -694,8 +694,10 @@ struct cfg80211_ap_settings { * * @chandef: defines the channel to use after the switch * @beacon_csa: beacon data while performing the switch - * @counter_offset_beacon: offset for the counter within the beacon (tail) - * @counter_offset_presp: offset for the counter within the probe response + * @counter_offsets_beacon: offsets of the counters within the beacon (tail) + * @counter_offsets_presp: offsets of the counters within the probe response + * @n_counter_offsets_beacon: number of csa counters the beacon (tail) + * @n_counter_offsets_presp: number of csa counters in the probe response * @beacon_after: beacon data to be used on the new channel * @radar_required: whether radar detection is required on the new channel * @block_tx: whether transmissions should be blocked while changing @@ -704,7 +706,10 @@ struct cfg80211_ap_settings { struct cfg80211_csa_settings { struct cfg80211_chan_def chandef; struct cfg80211_beacon_data beacon_csa; - u16 counter_offset_beacon, counter_offset_presp; + const u16 *counter_offsets_beacon; + const u16 *counter_offsets_presp; + unsigned int n_counter_offsets_beacon; + unsigned int n_counter_offsets_presp; struct cfg80211_beacon_data beacon_after; bool radar_required; bool block_tx; @@ -3048,6 +3053,13 @@ struct wiphy { u16 max_ap_assoc_sta; + /* + * Number of supported csa_counters in beacons and probe responses. + * This value should be set if the driver wishes to limit the number of + * csa counters. Default (0) means infinite. + */ + u8 max_num_csa_counters; + char priv[0] __aligned(NETDEV_ALIGN); }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index ec90fc9d2358..0cfa827123ff 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1528,10 +1528,10 @@ enum nl80211_commands { * operation). * @NL80211_ATTR_CSA_IES: Nested set of attributes containing the IE information * for the time while performing a channel switch. - * @NL80211_ATTR_CSA_C_OFF_BEACON: Offset of the channel switch counter - * field in the beacons tail (%NL80211_ATTR_BEACON_TAIL). - * @NL80211_ATTR_CSA_C_OFF_PRESP: Offset of the channel switch counter - * field in the probe response (%NL80211_ATTR_PROBE_RESP). + * @NL80211_ATTR_CSA_C_OFF_BEACON: An array of offsets (u16) to the channel + * switch counters in the beacons tail (%NL80211_ATTR_BEACON_TAIL). + * @NL80211_ATTR_CSA_C_OFF_PRESP: An array of offsets (u16) to the channel + * switch counters in the probe response (%NL80211_ATTR_PROBE_RESP). * * @NL80211_ATTR_RXMGMT_FLAGS: flags for nl80211_send_mgmt(), u32. * As specified in the &enum nl80211_rxmgmt_flags. @@ -1581,6 +1581,8 @@ enum nl80211_commands { * * @NL80211_ATTR_CSA_C_OFFSETS_TX: An array of csa counter offsets (u16) which * should be updated when the frame is transmitted. + * @NL80211_ATTR_MAX_CSA_COUNTERS: U8 attribute used to advertise the maximum + * supported number of csa counters. * * @NL80211_ATTR_TDLS_PEER_CAPABILITY: flags for TDLS peer capabilities, u32. * As specified in the &enum nl80211_tdls_peer_capability. @@ -1927,6 +1929,7 @@ enum nl80211_attrs { NL80211_ATTR_IFACE_SOCKET_OWNER, NL80211_ATTR_CSA_C_OFFSETS_TX, + NL80211_ATTR_MAX_CSA_COUNTERS, /* add attributes here, update the policy in nl80211.c */ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 8e4754ebc2d8..7a6f8aba5c46 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3192,8 +3192,14 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, break; sdata->csa_counter_offset_beacon = - params->counter_offset_beacon; - sdata->csa_counter_offset_presp = params->counter_offset_presp; + params->counter_offsets_beacon[0]; + + if (params->n_counter_offsets_presp) + sdata->csa_counter_offset_presp = + params->counter_offsets_presp[0]; + else + sdata->csa_counter_offset_presp = 0; + err = ieee80211_assign_beacon(sdata, ¶ms->beacon_csa); if (err < 0) { kfree(sdata->u.ap.next_beacon); diff --git a/net/wireless/core.c b/net/wireless/core.c index 39788711ce6e..d03d8bdb29ca 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -402,6 +402,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->wiphy.rts_threshold = (u32) -1; rdev->wiphy.coverage_class = 0; + rdev->wiphy.max_num_csa_counters = 1; + return &rdev->wiphy; } EXPORT_SYMBOL(wiphy_new); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4c0ca40ef90e..ca19b1520389 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -371,8 +371,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 }, [NL80211_ATTR_CH_SWITCH_BLOCK_TX] = { .type = NLA_FLAG }, [NL80211_ATTR_CSA_IES] = { .type = NLA_NESTED }, - [NL80211_ATTR_CSA_C_OFF_BEACON] = { .type = NLA_U16 }, - [NL80211_ATTR_CSA_C_OFF_PRESP] = { .type = NLA_U16 }, + [NL80211_ATTR_CSA_C_OFF_BEACON] = { .type = NLA_BINARY }, + [NL80211_ATTR_CSA_C_OFF_PRESP] = { .type = NLA_BINARY }, [NL80211_ATTR_STA_SUPPORTED_CHANNELS] = { .type = NLA_BINARY }, [NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES] = { .type = NLA_BINARY }, [NL80211_ATTR_HANDLE_DFS] = { .type = NLA_FLAG }, @@ -1670,6 +1670,13 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, } nla_nest_end(msg, nested); } + state->split_start++; + break; + case 12: + if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH && + nla_put_u8(msg, NL80211_ATTR_MAX_CSA_COUNTERS, + rdev->wiphy.max_num_csa_counters)) + goto nla_put_failure; /* done */ state->split_start = 0; @@ -5864,6 +5871,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) u8 radar_detect_width = 0; int err; bool need_new_beacon = false; + int len, i; if (!rdev->ops->channel_switch || !(rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)) @@ -5922,26 +5930,55 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) if (!csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]) return -EINVAL; - params.counter_offset_beacon = - nla_get_u16(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]); - if (params.counter_offset_beacon >= params.beacon_csa.tail_len) + len = nla_len(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]); + if (!len || (len % sizeof(u16))) return -EINVAL; - /* sanity check - counters should be the same */ - if (params.beacon_csa.tail[params.counter_offset_beacon] != - params.count) + params.n_counter_offsets_beacon = len / sizeof(u16); + if (rdev->wiphy.max_num_csa_counters && + (params.n_counter_offsets_beacon > + rdev->wiphy.max_num_csa_counters)) return -EINVAL; + params.counter_offsets_beacon = + nla_data(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]); + + /* sanity checks - counters should fit and be the same */ + for (i = 0; i < params.n_counter_offsets_beacon; i++) { + u16 offset = params.counter_offsets_beacon[i]; + + if (offset >= params.beacon_csa.tail_len) + return -EINVAL; + + if (params.beacon_csa.tail[offset] != params.count) + return -EINVAL; + } + if (csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]) { - params.counter_offset_presp = - nla_get_u16(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]); - if (params.counter_offset_presp >= - params.beacon_csa.probe_resp_len) + len = nla_len(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]); + if (!len || (len % sizeof(u16))) return -EINVAL; - if (params.beacon_csa.probe_resp[params.counter_offset_presp] != - params.count) + params.n_counter_offsets_presp = len / sizeof(u16); + if (rdev->wiphy.max_num_csa_counters && + (params.n_counter_offsets_beacon > + rdev->wiphy.max_num_csa_counters)) return -EINVAL; + + params.counter_offsets_presp = + nla_data(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]); + + /* sanity checks - counters should fit and be the same */ + for (i = 0; i < params.n_counter_offsets_presp; i++) { + u16 offset = params.counter_offsets_presp[i]; + + if (offset >= params.beacon_csa.probe_resp_len) + return -EINVAL; + + if (params.beacon_csa.probe_resp[offset] != + params.count) + return -EINVAL; + } } skip_beacons: diff --git a/net/wireless/trace.h b/net/wireless/trace.h index cdfbb00e1b37..560ed77084e9 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1876,29 +1876,33 @@ TRACE_EVENT(rdev_channel_switch, WIPHY_ENTRY NETDEV_ENTRY CHAN_DEF_ENTRY - __field(u16, counter_offset_beacon) - __field(u16, counter_offset_presp) __field(bool, radar_required) __field(bool, block_tx) __field(u8, count) + __dynamic_array(u16, bcn_ofs, params->n_counter_offsets_beacon) + __dynamic_array(u16, pres_ofs, params->n_counter_offsets_presp) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; CHAN_DEF_ASSIGN(¶ms->chandef); - __entry->counter_offset_beacon = params->counter_offset_beacon; - __entry->counter_offset_presp = params->counter_offset_presp; __entry->radar_required = params->radar_required; __entry->block_tx = params->block_tx; __entry->count = params->count; + memcpy(__get_dynamic_array(bcn_ofs), + params->counter_offsets_beacon, + params->n_counter_offsets_beacon * sizeof(u16)); + + /* probe response offsets are optional */ + if (params->n_counter_offsets_presp) + memcpy(__get_dynamic_array(pres_ofs), + params->counter_offsets_presp, + params->n_counter_offsets_presp * sizeof(u16)); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT - ", block_tx: %d, count: %u, radar_required: %d" - ", counter offsets (beacon/presp): %u/%u", + ", block_tx: %d, count: %u, radar_required: %d", WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG, - __entry->block_tx, __entry->count, __entry->radar_required, - __entry->counter_offset_beacon, - __entry->counter_offset_presp) + __entry->block_tx, __entry->count, __entry->radar_required) ); TRACE_EVENT(rdev_set_qos_map, -- cgit v1.2.3-71-gd317 From 6ec8c332a0f93959e615158cc212b3abfd52abe7 Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Fri, 9 May 2014 14:11:49 +0300 Subject: mac80211: Provide ieee80211_beacon_get_template API Add a new API ieee80211_beacon_get_template, which doesn't affect DTIM counter and should be used if the device generates beacon frames, and new beacon template is needed. In addition set the offsets to TIM IE for MESH interface. Signed-off-by: Andrei Otcheretianski Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- include/net/mac80211.h | 43 ++++++++++++++++++++++----- net/mac80211/tx.c | 80 ++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 94 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 3541c48a97cd..e6521261a585 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3411,6 +3411,39 @@ void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw, */ void ieee80211_report_low_ack(struct ieee80211_sta *sta, u32 num_packets); +/** + * struct ieee80211_mutable_offsets - mutable beacon offsets + * @tim_offset: position of TIM element + * @tim_length: size of TIM element + */ +struct ieee80211_mutable_offsets { + u16 tim_offset; + u16 tim_length; +}; + +/** + * ieee80211_beacon_get_template - beacon template generation function + * @hw: pointer obtained from ieee80211_alloc_hw(). + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @offs: &struct ieee80211_mutable_offsets pointer to struct that will + * receive the offsets that may be updated by the driver. + * + * If the driver implements beaconing modes, it must use this function to + * obtain the beacon template. + * + * This function should be used if the beacon frames are generated by the + * device, and then the driver must use the returned beacon as the template + * The driver is responsible to update the DTIM count. + * + * The driver is responsible for freeing the returned skb. + * + * Return: The beacon template. %NULL on error. + */ +struct sk_buff * +ieee80211_beacon_get_template(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_mutable_offsets *offs); + /** * ieee80211_beacon_get_tim - beacon generation function * @hw: pointer obtained from ieee80211_alloc_hw(). @@ -3422,16 +3455,12 @@ void ieee80211_report_low_ack(struct ieee80211_sta *sta, u32 num_packets); * Set to 0 if invalid (in non-AP modes). * * If the driver implements beaconing modes, it must use this function to - * obtain the beacon frame/template. + * obtain the beacon frame. * * If the beacon frames are generated by the host system (i.e., not in * hardware/firmware), the driver uses this function to get each beacon - * frame from mac80211 -- it is responsible for calling this function - * before the beacon is needed (e.g. based on hardware interrupt). - * - * If the beacon frames are generated by the device, then the driver - * must use the returned beacon as the template and change the TIM IE - * according to the current DTIM parameters/TIM bitmap. + * frame from mac80211 -- it is responsible for calling this function exactly + * once before the beacon is needed (e.g. based on hardware interrupt). * * The driver is responsible for freeing the returned skb. * diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 0d1a42d4b6de..509456e5722d 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2328,7 +2328,8 @@ void ieee80211_tx_pending(unsigned long data) /* functions for drivers to get certain frames */ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, - struct ps_data *ps, struct sk_buff *skb) + struct ps_data *ps, struct sk_buff *skb, + bool is_template) { u8 *pos, *tim; int aid0 = 0; @@ -2341,11 +2342,12 @@ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, * checking byte-for-byte */ have_bits = !bitmap_empty((unsigned long *)ps->tim, IEEE80211_MAX_AID+1); - - if (ps->dtim_count == 0) - ps->dtim_count = sdata->vif.bss_conf.dtim_period - 1; - else - ps->dtim_count--; + if (!is_template) { + if (ps->dtim_count == 0) + ps->dtim_count = sdata->vif.bss_conf.dtim_period - 1; + else + ps->dtim_count--; + } tim = pos = (u8 *) skb_put(skb, 6); *pos++ = WLAN_EID_TIM; @@ -2391,7 +2393,8 @@ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, } static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, - struct ps_data *ps, struct sk_buff *skb) + struct ps_data *ps, struct sk_buff *skb, + bool is_template) { struct ieee80211_local *local = sdata->local; @@ -2403,10 +2406,10 @@ static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, * of the tim bitmap in mac80211 and the driver. */ if (local->tim_in_locked_section) { - __ieee80211_beacon_add_tim(sdata, ps, skb); + __ieee80211_beacon_add_tim(sdata, ps, skb, is_template); } else { spin_lock_bh(&local->tim_lock); - __ieee80211_beacon_add_tim(sdata, ps, skb); + __ieee80211_beacon_add_tim(sdata, ps, skb, is_template); spin_unlock_bh(&local->tim_lock); } @@ -2536,9 +2539,11 @@ bool ieee80211_csa_is_complete(struct ieee80211_vif *vif) } EXPORT_SYMBOL(ieee80211_csa_is_complete); -struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - u16 *tim_offset, u16 *tim_length) +static struct sk_buff * +__ieee80211_beacon_get(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_mutable_offsets *offs, + bool is_template) { struct ieee80211_local *local = hw_to_local(hw); struct sk_buff *skb = NULL; @@ -2556,10 +2561,8 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, if (!ieee80211_sdata_running(sdata) || !chanctx_conf) goto out; - if (tim_offset) - *tim_offset = 0; - if (tim_length) - *tim_length = 0; + if (offs) + memset(offs, 0, sizeof(*offs)); if (sdata->vif.type == NL80211_IFTYPE_AP) { struct ieee80211_if_ap *ap = &sdata->u.ap; @@ -2584,12 +2587,13 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, memcpy(skb_put(skb, beacon->head_len), beacon->head, beacon->head_len); - ieee80211_beacon_add_tim(sdata, &ap->ps, skb); + ieee80211_beacon_add_tim(sdata, &ap->ps, skb, + is_template); - if (tim_offset) - *tim_offset = beacon->head_len; - if (tim_length) - *tim_length = skb->len - beacon->head_len; + if (offs) { + offs->tim_offset = beacon->head_len; + offs->tim_length = skb->len - beacon->head_len; + } if (beacon->tail) memcpy(skb_put(skb, beacon->tail_len), @@ -2641,7 +2645,13 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, goto out; skb_reserve(skb, local->tx_headroom); memcpy(skb_put(skb, bcn->head_len), bcn->head, bcn->head_len); - ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb); + ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb, is_template); + + if (offs) { + offs->tim_offset = bcn->head_len; + offs->tim_length = skb->len - bcn->head_len; + } + memcpy(skb_put(skb, bcn->tail_len), bcn->tail, bcn->tail_len); } else { WARN_ON(1); @@ -2678,6 +2688,32 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, out: rcu_read_unlock(); return skb; + +} + +struct sk_buff * +ieee80211_beacon_get_template(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_mutable_offsets *offs) +{ + return __ieee80211_beacon_get(hw, vif, offs, true); +} +EXPORT_SYMBOL(ieee80211_beacon_get_template); + +struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + u16 *tim_offset, u16 *tim_length) +{ + struct ieee80211_mutable_offsets offs = {}; + struct sk_buff *bcn = __ieee80211_beacon_get(hw, vif, &offs, false); + + if (tim_offset) + *tim_offset = offs.tim_offset; + + if (tim_length) + *tim_length = offs.tim_length; + + return bcn; } EXPORT_SYMBOL(ieee80211_beacon_get_tim); -- cgit v1.2.3-71-gd317 From 1af586c9116cdf6863823a830593c48cd9bcecde Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Fri, 9 May 2014 14:11:50 +0300 Subject: mac80211: Handle the CSA counters correctly Make the beacon CSA counters part of ieee80211_mutable_offsets and don't decrement CSA counters when generating a beacon template. This permits the driver to offload the CSA counters handling. Since mac80211 updates the probe responses with the correct counter, the driver should sync the counter's value with mac80211 using ieee80211_csa_update_counter function. Signed-off-by: Andrei Otcheretianski Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- include/net/mac80211.h | 23 +++++++++++++- net/mac80211/cfg.c | 4 +-- net/mac80211/ieee80211_i.h | 2 -- net/mac80211/tx.c | 76 ++++++++++++++++++++++++++++++++++------------ 4 files changed, 80 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e6521261a585..982d2cd80166 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3411,14 +3411,20 @@ void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw, */ void ieee80211_report_low_ack(struct ieee80211_sta *sta, u32 num_packets); +#define IEEE80211_MAX_CSA_COUNTERS_NUM 2 + /** * struct ieee80211_mutable_offsets - mutable beacon offsets * @tim_offset: position of TIM element * @tim_length: size of TIM element + * @csa_offs: array of IEEE80211_MAX_CSA_COUNTERS_NUM offsets to CSA counters. + * This array can contain zero values which should be ignored. */ struct ieee80211_mutable_offsets { u16 tim_offset; u16 tim_length; + + u16 csa_counter_offs[IEEE80211_MAX_CSA_COUNTERS_NUM]; }; /** @@ -3433,7 +3439,8 @@ struct ieee80211_mutable_offsets { * * This function should be used if the beacon frames are generated by the * device, and then the driver must use the returned beacon as the template - * The driver is responsible to update the DTIM count. + * The driver or the device are responsible to update the DTIM and, when + * applicable, the CSA count. * * The driver is responsible for freeing the returned skb. * @@ -3485,6 +3492,20 @@ static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, return ieee80211_beacon_get_tim(hw, vif, NULL, NULL); } +/** + * ieee80211_csa_update_counter - request mac80211 to decrement the csa counter + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * + * The csa counter should be updated after each beacon transmission. + * This function is called implicitly when + * ieee80211_beacon_get/ieee80211_beacon_get_tim are called, however if the + * beacon frames are generated by the device, the driver should call this + * function after each beacon transmission to sync mac80211's csa counters. + * + * Return: new csa counter value + */ +u8 ieee80211_csa_update_counter(struct ieee80211_vif *vif); + /** * ieee80211_csa_finish - notify mac80211 about channel switch * @vif: &struct ieee80211_vif pointer from the add_interface callback. diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d44dca56b8ff..bfd2534e5a4d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3502,10 +3502,10 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, sdata->vif.type == NL80211_IFTYPE_ADHOC) && params->n_csa_offsets) { int i; + u8 c = sdata->csa_current_counter; for (i = 0; i < params->n_csa_offsets; i++) - data[params->csa_offsets[i]] = - sdata->csa_current_counter; + data[params->csa_offsets[i]] = c; } IEEE80211_SKB_CB(skb)->flags = flags; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 05ed592d8bbe..57e0b2682cef 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -70,8 +70,6 @@ struct ieee80211_local; #define IEEE80211_DEAUTH_FRAME_LEN (24 /* hdr */ + 2 /* reason */) -#define IEEE80211_MAX_CSA_COUNTERS_NUM 2 - struct ieee80211_fragment_entry { unsigned long first_frag_time; unsigned int seq; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 509456e5722d..5214686d9fd1 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2416,13 +2416,14 @@ static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, return 0; } -static void ieee80211_update_csa(struct ieee80211_sub_if_data *sdata, - struct beacon_data *beacon) +static void ieee80211_set_csa(struct ieee80211_sub_if_data *sdata, + struct beacon_data *beacon) { struct probe_resp *resp; u8 *beacon_data; size_t beacon_data_len; int i; + u8 count = sdata->csa_current_counter; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: @@ -2450,16 +2451,7 @@ static void ieee80211_update_csa(struct ieee80211_sub_if_data *sdata, if (WARN_ON(counter_offset_beacon >= beacon_data_len)) return; - /* Warn if the driver did not check for/react to csa - * completeness. A beacon with CSA counter set to 0 - * should never occur, because a counter of 1 means - * switch just before the next beacon. - */ - if (WARN_ON(beacon_data[counter_offset_beacon] == 1)) - return; - - beacon_data[counter_offset_beacon] = - sdata->csa_current_counter - 1; + beacon_data[counter_offset_beacon] = count; } if (sdata->vif.type == NL80211_IFTYPE_AP && @@ -2474,14 +2466,24 @@ static void ieee80211_update_csa(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); return; } - resp->data[counter_offset_presp] = - sdata->csa_current_counter - 1; + resp->data[counter_offset_presp] = count; rcu_read_unlock(); } } +} + +u8 ieee80211_csa_update_counter(struct ieee80211_vif *vif) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); sdata->csa_current_counter--; + + /* the counter should never reach 0 */ + WARN_ON(!sdata->csa_current_counter); + + return sdata->csa_current_counter; } +EXPORT_SYMBOL(ieee80211_csa_update_counter); bool ieee80211_csa_is_complete(struct ieee80211_vif *vif) { @@ -2552,6 +2554,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, enum ieee80211_band band; struct ieee80211_tx_rate_control txrc; struct ieee80211_chanctx_conf *chanctx_conf; + int csa_off_base = 0; rcu_read_lock(); @@ -2569,8 +2572,12 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, struct beacon_data *beacon = rcu_dereference(ap->beacon); if (beacon) { - if (sdata->vif.csa_active) - ieee80211_update_csa(sdata, beacon); + if (sdata->vif.csa_active) { + if (!is_template) + ieee80211_csa_update_counter(vif); + + ieee80211_set_csa(sdata, beacon); + } /* * headroom, head length, @@ -2593,6 +2600,9 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, if (offs) { offs->tim_offset = beacon->head_len; offs->tim_length = skb->len - beacon->head_len; + + /* for AP the csa offsets are from tail */ + csa_off_base = skb->len; } if (beacon->tail) @@ -2608,9 +2618,12 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, if (!presp) goto out; - if (sdata->vif.csa_active) - ieee80211_update_csa(sdata, presp); + if (sdata->vif.csa_active) { + if (!is_template) + ieee80211_csa_update_counter(vif); + ieee80211_set_csa(sdata, presp); + } skb = dev_alloc_skb(local->tx_headroom + presp->head_len + local->hw.extra_beacon_tailroom); @@ -2630,8 +2643,17 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, if (!bcn) goto out; - if (sdata->vif.csa_active) - ieee80211_update_csa(sdata, bcn); + if (sdata->vif.csa_active) { + if (!is_template) + /* TODO: For mesh csa_counter is in TU, so + * decrementing it by one isn't correct, but + * for now we leave it consistent with overall + * mac80211's behavior. + */ + ieee80211_csa_update_counter(vif); + + ieee80211_set_csa(sdata, bcn); + } if (ifmsh->sync_ops) ifmsh->sync_ops->adjust_tbtt(sdata, bcn); @@ -2658,6 +2680,20 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, goto out; } + /* CSA offsets */ + if (offs) { + int i; + + for (i = 0; i < IEEE80211_MAX_CSA_COUNTERS_NUM; i++) { + u16 csa_off = sdata->csa_counter_offset_beacon[i]; + + if (!csa_off) + continue; + + offs->csa_counter_offs[i] = csa_off_base + csa_off; + } + } + band = chanctx_conf->def.chan->band; info = IEEE80211_SKB_CB(skb); -- cgit v1.2.3-71-gd317 From 4449bf927b61bdb4389393c6fea6837214d1ace7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 6 May 2014 13:10:24 -0400 Subject: tracing: Add __bitmask() macro to trace events to cpumasks and other bitmasks Being able to show a cpumask of events can be useful as some events may affect only some CPUs. There is no standard way to record the cpumask and converting it to a string is rather expensive during the trace as traces happen in hotpaths. It would be better to record the raw event mask and be able to parse it at print time. The following macros were added for use with the TRACE_EVENT() macro: __bitmask() __assign_bitmask() __get_bitmask() To test this, I added this to the sched_migrate_task event, which looked like this: TRACE_EVENT(sched_migrate_task, TP_PROTO(struct task_struct *p, int dest_cpu, const struct cpumask *cpus), TP_ARGS(p, dest_cpu, cpus), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( int, prio ) __field( int, orig_cpu ) __field( int, dest_cpu ) __bitmask( cpumask, num_possible_cpus() ) ), TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->prio = p->prio; __entry->orig_cpu = task_cpu(p); __entry->dest_cpu = dest_cpu; __assign_bitmask(cpumask, cpumask_bits(cpus), num_possible_cpus()); ), TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d cpumask=%s", __entry->comm, __entry->pid, __entry->prio, __entry->orig_cpu, __entry->dest_cpu, __get_bitmask(cpumask)) ); With the output of: ksmtuned-3613 [003] d..2 485.220508: sched_migrate_task: comm=ksmtuned pid=3615 prio=120 orig_cpu=3 dest_cpu=2 cpumask=00000000,0000000f migration/1-13 [001] d..5 485.221202: sched_migrate_task: comm=ksmtuned pid=3614 prio=120 orig_cpu=1 dest_cpu=0 cpumask=00000000,0000000f awk-3615 [002] d.H5 485.221747: sched_migrate_task: comm=rcu_preempt pid=7 prio=120 orig_cpu=0 dest_cpu=1 cpumask=00000000,000000ff migration/2-18 [002] d..5 485.222062: sched_migrate_task: comm=ksmtuned pid=3615 prio=120 orig_cpu=2 dest_cpu=3 cpumask=00000000,0000000f Link: http://lkml.kernel.org/r/1399377998-14870-6-git-send-email-javi.merino@arm.com Link: http://lkml.kernel.org/r/20140506132238.22e136d1@gandalf.local.home Suggested-by: Javi Merino Tested-by: Javi Merino Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 3 +++ include/linux/trace_seq.h | 10 ++++++++ include/trace/ftrace.h | 57 +++++++++++++++++++++++++++++++++++++++++++- kernel/trace/trace_output.c | 41 +++++++++++++++++++++++++++++++ 4 files changed, 110 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index d16da3e53bc7..cff3106ffe2c 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -38,6 +38,9 @@ const char *ftrace_print_symbols_seq_u64(struct trace_seq *p, *symbol_array); #endif +const char *ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, + unsigned int bitmask_size); + const char *ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int len); diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index a32d86ec8bf2..136116924d8d 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -46,6 +46,9 @@ extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, extern void *trace_seq_reserve(struct trace_seq *s, size_t len); extern int trace_seq_path(struct trace_seq *s, const struct path *path); +extern int trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, + int nmaskbits); + #else /* CONFIG_TRACING */ static inline int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) { @@ -57,6 +60,13 @@ trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) return 0; } +static inline int +trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, + int nmaskbits) +{ + return 0; +} + static inline int trace_print_seq(struct seq_file *m, struct trace_seq *s) { return 0; diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 0a1a4f7caf09..9b7a989dcbcc 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -53,6 +53,9 @@ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) +#undef __bitmask +#define __bitmask(item, nr_bits) __dynamic_array(char, item, -1) + #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args @@ -128,6 +131,9 @@ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) +#undef __bitmask +#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) + #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ struct ftrace_data_offsets_##call { \ @@ -200,6 +206,15 @@ #undef __get_str #define __get_str(field) (char *)__get_dynamic_array(field) +#undef __get_bitmask +#define __get_bitmask(field) \ + ({ \ + void *__bitmask = __get_dynamic_array(field); \ + unsigned int __bitmask_size; \ + __bitmask_size = (__entry->__data_loc_##field >> 16) & 0xffff; \ + ftrace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ + }) + #undef __print_flags #define __print_flags(flag, delim, flag_array...) \ ({ \ @@ -322,6 +337,9 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = { \ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) +#undef __bitmask +#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) + #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ static int notrace __init \ @@ -372,6 +390,29 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ #define __string(item, src) __dynamic_array(char, item, \ strlen((src) ? (const char *)(src) : "(null)") + 1) +/* + * __bitmask_size_in_bytes_raw is the number of bytes needed to hold + * num_possible_cpus(). + */ +#define __bitmask_size_in_bytes_raw(nr_bits) \ + (((nr_bits) + 7) / 8) + +#define __bitmask_size_in_longs(nr_bits) \ + ((__bitmask_size_in_bytes_raw(nr_bits) + \ + ((BITS_PER_LONG / 8) - 1)) / (BITS_PER_LONG / 8)) + +/* + * __bitmask_size_in_bytes is the number of bytes needed to hold + * num_possible_cpus() padded out to the nearest long. This is what + * is saved in the buffer, just to be consistent. + */ +#define __bitmask_size_in_bytes(nr_bits) \ + (__bitmask_size_in_longs(nr_bits) * (BITS_PER_LONG / 8)) + +#undef __bitmask +#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, \ + __bitmask_size_in_longs(nr_bits)) + #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static inline notrace int ftrace_get_offsets_##call( \ @@ -513,12 +554,22 @@ static inline notrace int ftrace_get_offsets_##call( \ __entry->__data_loc_##item = __data_offsets.item; #undef __string -#define __string(item, src) __dynamic_array(char, item, -1) \ +#define __string(item, src) __dynamic_array(char, item, -1) #undef __assign_str #define __assign_str(dst, src) \ strcpy(__get_str(dst), (src) ? (const char *)(src) : "(null)"); +#undef __bitmask +#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) + +#undef __get_bitmask +#define __get_bitmask(field) (char *)__get_dynamic_array(field) + +#undef __assign_bitmask +#define __assign_bitmask(dst, src, nr_bits) \ + memcpy(__get_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) + #undef TP_fast_assign #define TP_fast_assign(args...) args @@ -586,6 +637,7 @@ static inline void ftrace_test_probe_##call(void) \ #undef __print_hex #undef __get_dynamic_array #undef __get_str +#undef __get_bitmask #undef TP_printk #define TP_printk(fmt, args...) "\"" fmt "\", " __stringify(args) @@ -651,6 +703,9 @@ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call #undef __get_str #define __get_str(field) (char *)__get_dynamic_array(field) +#undef __get_bitmask +#define __get_bitmask(field) (char *)__get_dynamic_array(field) + #undef __perf_addr #define __perf_addr(a) (__addr = (a)) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index a436de18aa99..f3dad80c20b2 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -125,6 +125,34 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) } EXPORT_SYMBOL_GPL(trace_seq_printf); +/** + * trace_seq_bitmask - put a list of longs as a bitmask print output + * @s: trace sequence descriptor + * @maskp: points to an array of unsigned longs that represent a bitmask + * @nmaskbits: The number of bits that are valid in @maskp + * + * It returns 0 if the trace oversizes the buffer's free + * space, 1 otherwise. + * + * Writes a ASCII representation of a bitmask string into @s. + */ +int +trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, + int nmaskbits) +{ + int len = (PAGE_SIZE - 1) - s->len; + int ret; + + if (s->full || !len) + return 0; + + ret = bitmap_scnprintf(s->buffer, len, maskp, nmaskbits); + s->len += ret; + + return 1; +} +EXPORT_SYMBOL_GPL(trace_seq_bitmask); + /** * trace_seq_vprintf - sequence printing of trace information * @s: trace sequence descriptor @@ -398,6 +426,19 @@ ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val, EXPORT_SYMBOL(ftrace_print_symbols_seq_u64); #endif +const char * +ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, + unsigned int bitmask_size) +{ + const char *ret = p->buffer + p->len; + + trace_seq_bitmask(p, bitmask_ptr, bitmask_size * 8); + trace_seq_putc(p, 0); + + return ret; +} +EXPORT_SYMBOL_GPL(ftrace_print_bitmask_seq); + const char * ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) { -- cgit v1.2.3-71-gd317 From fcd77db07dd2b8d35e0db0d1209f2ce1bb05531e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 15 May 2014 13:43:14 -0400 Subject: net: Fix CONFIG_SYSCTL ifdef test. > include/net/ip.h:211:5: warning: "CONFIG_SYSCTL" is not defined [-Wundef] > #if CONFIG_SYSCTL > ^ Reported-by: Stephen Rothwell Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 512bcd5dabac..2e4947895d75 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -208,7 +208,7 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o void inet_get_local_port_range(struct net *net, int *low, int *high); -#if CONFIG_SYSCTL +#ifdef CONFIG_SYSCTL static inline int inet_is_local_reserved_port(struct net *net, int port) { if (!net->ipv4.sysctl_local_reserved_ports) -- cgit v1.2.3-71-gd317 From c3a6114f31600b94ee10ebf62e4d493b401ade87 Mon Sep 17 00:00:00 2001 From: Phoebe Buckheister Date: Wed, 14 May 2014 17:43:06 +0200 Subject: ieee802154: add definitions for link-layer security and header functions When dealing with 802.15.4, one often has to know the maximum payload size for a given packet. This depends on many factors, one of which is whether or not a security header is present in the frame. These definitions and functions provide an easy way for any upper layer to calculate the maximum payload size for a packet. The first obvious user for this is 6lowpan, which duplicates this calculation and gets it partially wrong because it ignores security headers. Signed-off-by: Phoebe Buckheister Signed-off-by: David S. Miller --- include/net/ieee802154.h | 9 +++++++ include/net/ieee802154_netdev.h | 29 +++++++++++++++++++++++ net/ieee802154/header_ops.c | 52 +++++++++++++++++++++++++++++++++++------ 3 files changed, 83 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/ieee802154.h b/include/net/ieee802154.h index c7ae0ac528dc..0aa7122e8f15 100644 --- a/include/net/ieee802154.h +++ b/include/net/ieee802154.h @@ -79,6 +79,15 @@ #define IEEE802154_SCF_KEY_SHORT_INDEX 2 #define IEEE802154_SCF_KEY_HW_INDEX 3 +#define IEEE802154_SCF_SECLEVEL_NONE 0 +#define IEEE802154_SCF_SECLEVEL_MIC32 1 +#define IEEE802154_SCF_SECLEVEL_MIC64 2 +#define IEEE802154_SCF_SECLEVEL_MIC128 3 +#define IEEE802154_SCF_SECLEVEL_ENC 4 +#define IEEE802154_SCF_SECLEVEL_ENC_MIC32 5 +#define IEEE802154_SCF_SECLEVEL_ENC_MIC64 6 +#define IEEE802154_SCF_SECLEVEL_ENC_MIC128 7 + /* MAC footer size */ #define IEEE802154_MFR_SIZE 2 /* 2 octets */ diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index 5a719ca892f4..6e4d3e1071b5 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -27,6 +27,7 @@ #ifndef IEEE802154_NETDEVICE_H #define IEEE802154_NETDEVICE_H +#include #include #include #include @@ -114,6 +115,34 @@ int ieee802154_hdr_pull(struct sk_buff *skb, struct ieee802154_hdr *hdr); int ieee802154_hdr_peek_addrs(const struct sk_buff *skb, struct ieee802154_hdr *hdr); +/* parses the full 802.15.4 header a given skb and stores them into hdr, + * performing pan id decompression and length checks to be suitable for use in + * header_ops.parse + */ +int ieee802154_hdr_peek(const struct sk_buff *skb, struct ieee802154_hdr *hdr); + +int ieee802154_max_payload(const struct ieee802154_hdr *hdr); + +static inline int +ieee802154_sechdr_authtag_len(const struct ieee802154_sechdr *sec) +{ + switch (sec->level) { + case IEEE802154_SCF_SECLEVEL_MIC32: + case IEEE802154_SCF_SECLEVEL_ENC_MIC32: + return 4; + case IEEE802154_SCF_SECLEVEL_MIC64: + case IEEE802154_SCF_SECLEVEL_ENC_MIC64: + return 8; + case IEEE802154_SCF_SECLEVEL_MIC128: + case IEEE802154_SCF_SECLEVEL_ENC_MIC128: + return 16; + case IEEE802154_SCF_SECLEVEL_NONE: + case IEEE802154_SCF_SECLEVEL_ENC: + default: + return 0; + } +} + static inline int ieee802154_hdr_length(struct sk_buff *skb) { struct ieee802154_hdr hdr; diff --git a/net/ieee802154/header_ops.c b/net/ieee802154/header_ops.c index bed42a48408c..c09294e39ca6 100644 --- a/net/ieee802154/header_ops.c +++ b/net/ieee802154/header_ops.c @@ -195,15 +195,16 @@ ieee802154_hdr_get_sechdr(const u8 *buf, struct ieee802154_sechdr *hdr) return pos; } +static int ieee802154_sechdr_lengths[4] = { + [IEEE802154_SCF_KEY_IMPLICIT] = 5, + [IEEE802154_SCF_KEY_INDEX] = 6, + [IEEE802154_SCF_KEY_SHORT_INDEX] = 10, + [IEEE802154_SCF_KEY_HW_INDEX] = 14, +}; + static int ieee802154_hdr_sechdr_len(u8 sc) { - switch (IEEE802154_SCF_KEY_ID_MODE(sc)) { - case IEEE802154_SCF_KEY_IMPLICIT: return 5; - case IEEE802154_SCF_KEY_INDEX: return 6; - case IEEE802154_SCF_KEY_SHORT_INDEX: return 10; - case IEEE802154_SCF_KEY_HW_INDEX: return 14; - default: return -EINVAL; - } + return ieee802154_sechdr_lengths[IEEE802154_SCF_KEY_ID_MODE(sc)]; } static int ieee802154_hdr_minlen(const struct ieee802154_hdr *hdr) @@ -285,3 +286,40 @@ ieee802154_hdr_peek_addrs(const struct sk_buff *skb, struct ieee802154_hdr *hdr) return pos; } EXPORT_SYMBOL_GPL(ieee802154_hdr_peek_addrs); + +int +ieee802154_hdr_peek(const struct sk_buff *skb, struct ieee802154_hdr *hdr) +{ + const u8 *buf = skb_mac_header(skb); + int pos; + + pos = ieee802154_hdr_peek_addrs(skb, hdr); + if (pos < 0) + return -EINVAL; + + if (hdr->fc.security_enabled) { + u8 key_id_mode = IEEE802154_SCF_KEY_ID_MODE(*(buf + pos)); + int want = pos + ieee802154_sechdr_lengths[key_id_mode]; + + if (buf + want > skb_tail_pointer(skb)) + return -EINVAL; + + pos += ieee802154_hdr_get_sechdr(buf + pos, &hdr->sec); + } + + return pos; +} +EXPORT_SYMBOL_GPL(ieee802154_hdr_peek); + +int ieee802154_max_payload(const struct ieee802154_hdr *hdr) +{ + int hlen = ieee802154_hdr_minlen(hdr); + + if (hdr->fc.security_enabled) { + hlen += ieee802154_sechdr_lengths[hdr->sec.key_id_mode] - 1; + hlen += ieee802154_sechdr_authtag_len(&hdr->sec); + } + + return IEEE802154_MTU - hlen - IEEE802154_MFR_SIZE; +} +EXPORT_SYMBOL_GPL(ieee802154_max_payload); -- cgit v1.2.3-71-gd317 From 32edc40ae65cf84e1ab69f6f8316ce81559e115d Mon Sep 17 00:00:00 2001 From: Phoebe Buckheister Date: Wed, 14 May 2014 17:43:08 +0200 Subject: ieee802154: change _cb handling slightly The current mac_cb handling of ieee802154 is rather awkward and limited. Decompose the single flags field into multiple fields with the meanings of each subfield of the flags field to make future extensions (for example, link-layer security) easier. Also don't set the frame sequence number in upper layers, since that's a thing the MAC is supposed to set on frame transmit - we set it on header creation, but assuming that upper layers do not blindly duplicate our headers, this is fine. Signed-off-by: Phoebe Buckheister Signed-off-by: David S. Miller --- include/net/ieee802154_netdev.h | 25 +++++++------------------ net/ieee802154/6lowpan_rtnl.c | 9 ++++----- net/ieee802154/dgram.c | 10 ++++------ net/mac802154/rx.c | 2 -- net/mac802154/wpan.c | 26 +++++++++++++------------- 5 files changed, 28 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index 6e4d3e1071b5..bc9a7475e57e 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -222,8 +222,9 @@ static inline void ieee802154_addr_to_sa(struct ieee802154_addr_sa *sa, */ struct ieee802154_mac_cb { u8 lqi; - u8 flags; - u8 seq; + u8 type; + bool ackreq; + bool secen; struct ieee802154_addr source; struct ieee802154_addr dest; }; @@ -233,24 +234,12 @@ static inline struct ieee802154_mac_cb *mac_cb(struct sk_buff *skb) return (struct ieee802154_mac_cb *)skb->cb; } -#define MAC_CB_FLAG_TYPEMASK ((1 << 3) - 1) - -#define MAC_CB_FLAG_ACKREQ (1 << 3) -#define MAC_CB_FLAG_SECEN (1 << 4) - -static inline bool mac_cb_is_ackreq(struct sk_buff *skb) -{ - return mac_cb(skb)->flags & MAC_CB_FLAG_ACKREQ; -} - -static inline bool mac_cb_is_secen(struct sk_buff *skb) +static inline struct ieee802154_mac_cb *mac_cb_init(struct sk_buff *skb) { - return mac_cb(skb)->flags & MAC_CB_FLAG_SECEN; -} + BUILD_BUG_ON(sizeof(struct ieee802154_mac_cb) > sizeof(skb->cb)); -static inline int mac_cb_type(struct sk_buff *skb) -{ - return mac_cb(skb)->flags & MAC_CB_FLAG_TYPEMASK; + memset(skb->cb, 0, sizeof(struct ieee802154_mac_cb)); + return mac_cb(skb); } #define IEEE802154_MAC_SCAN_ED 0 diff --git a/net/ieee802154/6lowpan_rtnl.c b/net/ieee802154/6lowpan_rtnl.c index 0f5a69ed746d..d0191c53cbd8 100644 --- a/net/ieee802154/6lowpan_rtnl.c +++ b/net/ieee802154/6lowpan_rtnl.c @@ -92,6 +92,7 @@ static int lowpan_header_create(struct sk_buff *skb, const u8 *saddr = _saddr; const u8 *daddr = _daddr; struct ieee802154_addr sa, da; + struct ieee802154_mac_cb *cb = mac_cb_init(skb); /* TODO: * if this package isn't ipv6 one, where should it be routed? @@ -115,8 +116,7 @@ static int lowpan_header_create(struct sk_buff *skb, * from MAC subif of the 'dev' and 'real_dev' network devices, but * this isn't implemented in mainline yet, so currently we assign 0xff */ - mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA; - mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev); + cb->type = IEEE802154_FC_TYPE_DATA; /* prepare wpan address data */ sa.mode = IEEE802154_ADDR_LONG; @@ -135,11 +135,10 @@ static int lowpan_header_create(struct sk_buff *skb, } else { da.mode = IEEE802154_ADDR_LONG; da.extended_addr = ieee802154_devaddr_from_raw(daddr); - - /* request acknowledgment */ - mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ; } + cb->ackreq = !lowpan_is_addr_broadcast(daddr); + return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, type, (void *)&da, (void *)&sa, 0); } diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index 786437bc0c08..d95e2e1bdf54 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -209,6 +209,7 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, struct net_device *dev; unsigned int mtu; struct sk_buff *skb; + struct ieee802154_mac_cb *cb; struct dgram_sock *ro = dgram_sk(sk); int hlen, tlen; int err; @@ -249,18 +250,15 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, skb_reset_network_header(skb); - mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA; - if (ro->want_ack) - mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ; + cb = mac_cb_init(skb); + cb->type = IEEE802154_FC_TYPE_DATA; + cb->ackreq = ro->want_ack; - mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev); err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &ro->dst_addr, ro->bound ? &ro->src_addr : NULL, size); if (err < 0) goto out_skb; - skb_reset_mac_header(skb); - err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); if (err < 0) goto out_skb; diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index 03855b0677cc..0597b96dc9ba 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -59,8 +59,6 @@ mac802154_subif_rx(struct ieee802154_dev *hw, struct sk_buff *skb, u8 lqi) skb->protocol = htons(ETH_P_IEEE802154); skb_reset_mac_header(skb); - BUILD_BUG_ON(sizeof(struct ieee802154_mac_cb) > sizeof(skb->cb)); - if (!(priv->hw.flags & IEEE802154_HW_OMIT_CKSUM)) { u16 crc; diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c index cb34064d05f6..bb49e88c460f 100644 --- a/net/mac802154/wpan.c +++ b/net/mac802154/wpan.c @@ -192,15 +192,17 @@ static int mac802154_header_create(struct sk_buff *skb, { struct ieee802154_hdr hdr; struct mac802154_sub_if_data *priv = netdev_priv(dev); + struct ieee802154_mac_cb *cb = mac_cb(skb); int hlen; if (!daddr) return -EINVAL; memset(&hdr.fc, 0, sizeof(hdr.fc)); - hdr.fc.type = mac_cb_type(skb); - hdr.fc.security_enabled = mac_cb_is_secen(skb); - hdr.fc.ack_request = mac_cb_is_ackreq(skb); + hdr.fc.type = cb->type; + hdr.fc.security_enabled = cb->secen; + hdr.fc.ack_request = cb->ackreq; + hdr.seq = ieee802154_mlme_ops(dev)->get_dsn(dev); if (!saddr) { spin_lock_bh(&priv->mib_lock); @@ -391,12 +393,12 @@ mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb) sdata->dev->stats.rx_packets++; sdata->dev->stats.rx_bytes += skb->len; - switch (mac_cb_type(skb)) { + switch (mac_cb(skb)->type) { case IEEE802154_FC_TYPE_DATA: return mac802154_process_data(sdata->dev, skb); default: pr_warn("ieee802154: bad frame received (type = %d)\n", - mac_cb_type(skb)); + mac_cb(skb)->type); kfree_skb(skb); return NET_RX_DROP; } @@ -423,6 +425,7 @@ static int mac802154_parse_frame_start(struct sk_buff *skb) { int hlen; struct ieee802154_hdr hdr; + struct ieee802154_mac_cb *cb = mac_cb_init(skb); hlen = ieee802154_hdr_pull(skb, &hdr); if (hlen < 0) @@ -433,18 +436,15 @@ static int mac802154_parse_frame_start(struct sk_buff *skb) pr_debug("fc: %04x dsn: %02x\n", le16_to_cpup((__le16 *)&hdr.fc), hdr.seq); - mac_cb(skb)->flags = hdr.fc.type; - - if (hdr.fc.ack_request) - mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ; - if (hdr.fc.security_enabled) - mac_cb(skb)->flags |= MAC_CB_FLAG_SECEN; + cb->type = hdr.fc.type; + cb->ackreq = hdr.fc.ack_request; + cb->secen = hdr.fc.security_enabled; mac802154_print_addr("destination", &hdr.dest); mac802154_print_addr("source", &hdr.source); - mac_cb(skb)->source = hdr.source; - mac_cb(skb)->dest = hdr.dest; + cb->source = hdr.source; + cb->dest = hdr.dest; if (hdr.fc.security_enabled) { u64 key; -- cgit v1.2.3-71-gd317 From 622582786c9e041d0bd52bde201787adeab249f8 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 13 May 2014 19:50:46 -0700 Subject: net: filter: x86: internal BPF JIT Maps all internal BPF instructions into x86_64 instructions. This patch replaces original BPF x64 JIT with internal BPF x64 JIT. sysctl net.core.bpf_jit_enable is reused as on/off switch. Performance: 1. old BPF JIT and internal BPF JIT generate equivalent x86_64 code. No performance difference is observed for filters that were JIT-able before Example assembler code for BPF filter "tcpdump port 22" original BPF -> old JIT: original BPF -> internal BPF -> new JIT: 0: push %rbp 0: push %rbp 1: mov %rsp,%rbp 1: mov %rsp,%rbp 4: sub $0x60,%rsp 4: sub $0x228,%rsp 8: mov %rbx,-0x8(%rbp) b: mov %rbx,-0x228(%rbp) // prologue 12: mov %r13,-0x220(%rbp) 19: mov %r14,-0x218(%rbp) 20: mov %r15,-0x210(%rbp) 27: xor %eax,%eax // clear A c: xor %ebx,%ebx 29: xor %r13,%r13 // clear X e: mov 0x68(%rdi),%r9d 2c: mov 0x68(%rdi),%r9d 12: sub 0x6c(%rdi),%r9d 30: sub 0x6c(%rdi),%r9d 16: mov 0xd8(%rdi),%r8 34: mov 0xd8(%rdi),%r10 3b: mov %rdi,%rbx 1d: mov $0xc,%esi 3e: mov $0xc,%esi 22: callq 0xffffffffe1021e15 43: callq 0xffffffffe102bd75 27: cmp $0x86dd,%eax 48: cmp $0x86dd,%rax 2c: jne 0x0000000000000069 4f: jne 0x000000000000009a 2e: mov $0x14,%esi 51: mov $0x14,%esi 33: callq 0xffffffffe1021e31 56: callq 0xffffffffe102bd91 38: cmp $0x84,%eax 5b: cmp $0x84,%rax 3d: je 0x0000000000000049 62: je 0x0000000000000074 3f: cmp $0x6,%eax 64: cmp $0x6,%rax 42: je 0x0000000000000049 68: je 0x0000000000000074 44: cmp $0x11,%eax 6a: cmp $0x11,%rax 47: jne 0x00000000000000c6 6e: jne 0x0000000000000117 49: mov $0x36,%esi 74: mov $0x36,%esi 4e: callq 0xffffffffe1021e15 79: callq 0xffffffffe102bd75 53: cmp $0x16,%eax 7e: cmp $0x16,%rax 56: je 0x00000000000000bf 82: je 0x0000000000000110 58: mov $0x38,%esi 88: mov $0x38,%esi 5d: callq 0xffffffffe1021e15 8d: callq 0xffffffffe102bd75 62: cmp $0x16,%eax 92: cmp $0x16,%rax 65: je 0x00000000000000bf 96: je 0x0000000000000110 67: jmp 0x00000000000000c6 98: jmp 0x0000000000000117 69: cmp $0x800,%eax 9a: cmp $0x800,%rax 6e: jne 0x00000000000000c6 a1: jne 0x0000000000000117 70: mov $0x17,%esi a3: mov $0x17,%esi 75: callq 0xffffffffe1021e31 a8: callq 0xffffffffe102bd91 7a: cmp $0x84,%eax ad: cmp $0x84,%rax 7f: je 0x000000000000008b b4: je 0x00000000000000c2 81: cmp $0x6,%eax b6: cmp $0x6,%rax 84: je 0x000000000000008b ba: je 0x00000000000000c2 86: cmp $0x11,%eax bc: cmp $0x11,%rax 89: jne 0x00000000000000c6 c0: jne 0x0000000000000117 8b: mov $0x14,%esi c2: mov $0x14,%esi 90: callq 0xffffffffe1021e15 c7: callq 0xffffffffe102bd75 95: test $0x1fff,%ax cc: test $0x1fff,%rax 99: jne 0x00000000000000c6 d3: jne 0x0000000000000117 d5: mov %rax,%r14 9b: mov $0xe,%esi d8: mov $0xe,%esi a0: callq 0xffffffffe1021e44 dd: callq 0xffffffffe102bd91 // MSH e2: and $0xf,%eax e5: shl $0x2,%eax e8: mov %rax,%r13 eb: mov %r14,%rax ee: mov %r13,%rsi a5: lea 0xe(%rbx),%esi f1: add $0xe,%esi a8: callq 0xffffffffe1021e0d f4: callq 0xffffffffe102bd6d ad: cmp $0x16,%eax f9: cmp $0x16,%rax b0: je 0x00000000000000bf fd: je 0x0000000000000110 ff: mov %r13,%rsi b2: lea 0x10(%rbx),%esi 102: add $0x10,%esi b5: callq 0xffffffffe1021e0d 105: callq 0xffffffffe102bd6d ba: cmp $0x16,%eax 10a: cmp $0x16,%rax bd: jne 0x00000000000000c6 10e: jne 0x0000000000000117 bf: mov $0xffff,%eax 110: mov $0xffff,%eax c4: jmp 0x00000000000000c8 115: jmp 0x000000000000011c c6: xor %eax,%eax 117: mov $0x0,%eax c8: mov -0x8(%rbp),%rbx 11c: mov -0x228(%rbp),%rbx // epilogue cc: leaveq 123: mov -0x220(%rbp),%r13 cd: retq 12a: mov -0x218(%rbp),%r14 131: mov -0x210(%rbp),%r15 138: leaveq 139: retq On fully cached SKBs both JITed functions take 12 nsec to execute. BPF interpreter executes the program in 30 nsec. The difference in generated assembler is due to the following: Old BPF imlements LDX_MSH instruction via sk_load_byte_msh() helper function inside bpf_jit.S. New JIT removes the helper and does it explicitly, so ldx_msh cost is the same for both JITs, but generated code looks longer. New JIT has 4 registers to save, so prologue/epilogue are larger, but the cost is within noise on x64. Old JIT checks whether first insn clears A and if not emits 'xor %eax,%eax'. New JIT clears %rax unconditionally. 2. old BPF JIT doesn't support ANC_NLATTR, ANC_PAY_OFFSET, ANC_RANDOM extensions. New JIT supports all BPF extensions. Performance of such filters improves 2-4 times depending on a filter. The longer the filter the higher performance gain. Synthetic benchmarks with many ancillary loads see 20x speedup which seems to be the maximum gain from JIT Notes: . net.core.bpf_jit_enable=2 + tools/net/bpf_jit_disasm is still functional and can be used to see generated assembler . there are two jit_compile() functions and code flow for classic filters is: sk_attach_filter() - load classic BPF bpf_jit_compile() - try to JIT from classic BPF sk_convert_filter() - convert classic to internal bpf_int_jit_compile() - JIT from internal BPF seccomp and tracing filters will just call bpf_int_jit_compile() Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/x86/net/bpf_jit.S | 77 +-- arch/x86/net/bpf_jit_comp.c | 1314 +++++++++++++++++++++++-------------------- include/linux/filter.h | 3 + net/core/filter.c | 9 +- 4 files changed, 748 insertions(+), 655 deletions(-) (limited to 'include') diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S index 01495755701b..6440221ced0d 100644 --- a/arch/x86/net/bpf_jit.S +++ b/arch/x86/net/bpf_jit.S @@ -12,13 +12,16 @@ /* * Calling convention : - * rdi : skb pointer + * rbx : skb pointer (callee saved) * esi : offset of byte(s) to fetch in skb (can be scratched) - * r8 : copy of skb->data + * r10 : copy of skb->data * r9d : hlen = skb->len - skb->data_len */ -#define SKBDATA %r8 +#define SKBDATA %r10 #define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */ +#define MAX_BPF_STACK (512 /* from filter.h */ + \ + 32 /* space for rbx,r13,r14,r15 */ + \ + 8 /* space for skb_copy_bits */) sk_load_word: .globl sk_load_word @@ -68,53 +71,31 @@ sk_load_byte_positive_offset: movzbl (SKBDATA,%rsi),%eax ret -/** - * sk_load_byte_msh - BPF_S_LDX_B_MSH helper - * - * Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf) - * Must preserve A accumulator (%eax) - * Inputs : %esi is the offset value - */ -sk_load_byte_msh: - .globl sk_load_byte_msh - test %esi,%esi - js bpf_slow_path_byte_msh_neg - -sk_load_byte_msh_positive_offset: - .globl sk_load_byte_msh_positive_offset - cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */ - jle bpf_slow_path_byte_msh - movzbl (SKBDATA,%rsi),%ebx - and $15,%bl - shl $2,%bl - ret - /* rsi contains offset and can be scratched */ #define bpf_slow_path_common(LEN) \ - push %rdi; /* save skb */ \ + mov %rbx, %rdi; /* arg1 == skb */ \ push %r9; \ push SKBDATA; \ /* rsi already has offset */ \ mov $LEN,%ecx; /* len */ \ - lea -12(%rbp),%rdx; \ + lea - MAX_BPF_STACK + 32(%rbp),%rdx; \ call skb_copy_bits; \ test %eax,%eax; \ pop SKBDATA; \ - pop %r9; \ - pop %rdi + pop %r9; bpf_slow_path_word: bpf_slow_path_common(4) js bpf_error - mov -12(%rbp),%eax + mov - MAX_BPF_STACK + 32(%rbp),%eax bswap %eax ret bpf_slow_path_half: bpf_slow_path_common(2) js bpf_error - mov -12(%rbp),%ax + mov - MAX_BPF_STACK + 32(%rbp),%ax rol $8,%ax movzwl %ax,%eax ret @@ -122,21 +103,11 @@ bpf_slow_path_half: bpf_slow_path_byte: bpf_slow_path_common(1) js bpf_error - movzbl -12(%rbp),%eax - ret - -bpf_slow_path_byte_msh: - xchg %eax,%ebx /* dont lose A , X is about to be scratched */ - bpf_slow_path_common(1) - js bpf_error - movzbl -12(%rbp),%eax - and $15,%al - shl $2,%al - xchg %eax,%ebx + movzbl - MAX_BPF_STACK + 32(%rbp),%eax ret #define sk_negative_common(SIZE) \ - push %rdi; /* save skb */ \ + mov %rbx, %rdi; /* arg1 == skb */ \ push %r9; \ push SKBDATA; \ /* rsi already has offset */ \ @@ -145,10 +116,8 @@ bpf_slow_path_byte_msh: test %rax,%rax; \ pop SKBDATA; \ pop %r9; \ - pop %rdi; \ jz bpf_error - bpf_slow_path_word_neg: cmp SKF_MAX_NEG_OFF, %esi /* test range */ jl bpf_error /* offset lower -> error */ @@ -179,22 +148,12 @@ sk_load_byte_negative_offset: movzbl (%rax), %eax ret -bpf_slow_path_byte_msh_neg: - cmp SKF_MAX_NEG_OFF, %esi - jl bpf_error -sk_load_byte_msh_negative_offset: - .globl sk_load_byte_msh_negative_offset - xchg %eax,%ebx /* dont lose A , X is about to be scratched */ - sk_negative_common(1) - movzbl (%rax),%eax - and $15,%al - shl $2,%al - xchg %eax,%ebx - ret - bpf_error: # force a return 0 from jit handler - xor %eax,%eax - mov -8(%rbp),%rbx + xor %eax,%eax + mov - MAX_BPF_STACK(%rbp),%rbx + mov - MAX_BPF_STACK + 8(%rbp),%r13 + mov - MAX_BPF_STACK + 16(%rbp),%r14 + mov - MAX_BPF_STACK + 24(%rbp),%r15 leaveq ret diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index c5fa7c9cb665..92aef8fdac2f 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1,6 +1,7 @@ /* bpf_jit_comp.c : BPF JIT compiler * * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com) + * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -14,28 +15,16 @@ #include #include -/* - * Conventions : - * EAX : BPF A accumulator - * EBX : BPF X accumulator - * RDI : pointer to skb (first argument given to JIT function) - * RBP : frame pointer (even if CONFIG_FRAME_POINTER=n) - * ECX,EDX,ESI : scratch registers - * r9d : skb->len - skb->data_len (headlen) - * r8 : skb->data - * -8(RBP) : saved RBX value - * -16(RBP)..-80(RBP) : BPF_MEMWORDS values - */ int bpf_jit_enable __read_mostly; /* * assembly code in arch/x86/net/bpf_jit.S */ -extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[]; +extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[]; -extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[]; +extern u8 sk_load_byte_positive_offset[]; extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[]; -extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[]; +extern u8 sk_load_byte_negative_offset[]; static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) { @@ -56,30 +45,44 @@ static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) -#define EMIT1_off32(b1, off) do { EMIT1(b1); EMIT(off, 4);} while (0) - -#define CLEAR_A() EMIT2(0x31, 0xc0) /* xor %eax,%eax */ -#define CLEAR_X() EMIT2(0x31, 0xdb) /* xor %ebx,%ebx */ +#define EMIT1_off32(b1, off) \ + do {EMIT1(b1); EMIT(off, 4); } while (0) +#define EMIT2_off32(b1, b2, off) \ + do {EMIT2(b1, b2); EMIT(off, 4); } while (0) +#define EMIT3_off32(b1, b2, b3, off) \ + do {EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) +#define EMIT4_off32(b1, b2, b3, b4, off) \ + do {EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) static inline bool is_imm8(int value) { return value <= 127 && value >= -128; } -static inline bool is_near(int offset) +static inline bool is_simm32(s64 value) { - return offset <= 127 && offset >= -128; + return value == (s64) (s32) value; } -#define EMIT_JMP(offset) \ -do { \ - if (offset) { \ - if (is_near(offset)) \ - EMIT2(0xeb, offset); /* jmp .+off8 */ \ - else \ - EMIT1_off32(0xe9, offset); /* jmp .+off32 */ \ - } \ -} while (0) +/* mov A, X */ +#define EMIT_mov(A, X) \ + do {if (A != X) \ + EMIT3(add_2mod(0x48, A, X), 0x89, add_2reg(0xC0, A, X)); \ + } while (0) + +static int bpf_size_to_x86_bytes(int bpf_size) +{ + if (bpf_size == BPF_W) + return 4; + else if (bpf_size == BPF_H) + return 2; + else if (bpf_size == BPF_B) + return 1; + else if (bpf_size == BPF_DW) + return 4; /* imm32 */ + else + return 0; +} /* list of x86 cond jumps opcodes (. + s8) * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) @@ -90,27 +93,8 @@ do { \ #define X86_JNE 0x75 #define X86_JBE 0x76 #define X86_JA 0x77 - -#define EMIT_COND_JMP(op, offset) \ -do { \ - if (is_near(offset)) \ - EMIT2(op, offset); /* jxx .+off8 */ \ - else { \ - EMIT2(0x0f, op + 0x10); \ - EMIT(offset, 4); /* jxx .+off32 */ \ - } \ -} while (0) - -#define COND_SEL(CODE, TOP, FOP) \ - case CODE: \ - t_op = TOP; \ - f_op = FOP; \ - goto cond_branch - - -#define SEEN_DATAREF 1 /* might call external helpers */ -#define SEEN_XREG 2 /* ebx is used */ -#define SEEN_MEM 4 /* use mem[] for temporary storage */ +#define X86_JGE 0x7D +#define X86_JG 0x7F static inline void bpf_flush_icache(void *start, void *end) { @@ -125,26 +109,6 @@ static inline void bpf_flush_icache(void *start, void *end) #define CHOOSE_LOAD_FUNC(K, func) \ ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) -/* Helper to find the offset of pkt_type in sk_buff - * We want to make sure its still a 3bit field starting at a byte boundary. - */ -#define PKT_TYPE_MAX 7 -static int pkt_type_offset(void) -{ - struct sk_buff skb_probe = { - .pkt_type = ~0, - }; - char *ct = (char *)&skb_probe; - unsigned int off; - - for (off = 0; off < sizeof(struct sk_buff); off++) { - if (ct[off] == PKT_TYPE_MAX) - return off; - } - pr_err_once("Please fix pkt_type_offset(), as pkt_type couldn't be found\n"); - return -1; -} - struct bpf_binary_header { unsigned int pages; /* Note : for security reasons, bpf code will follow a randomly @@ -178,546 +142,715 @@ static struct bpf_binary_header *bpf_alloc_binary(unsigned int proglen, return header; } +/* pick a register outside of BPF range for JIT internal work */ +#define AUX_REG (MAX_BPF_REG + 1) + +/* the following table maps BPF registers to x64 registers. + * x64 register r12 is unused, since if used as base address register + * in load/store instructions, it always needs an extra byte of encoding + */ +static const int reg2hex[] = { + [BPF_REG_0] = 0, /* rax */ + [BPF_REG_1] = 7, /* rdi */ + [BPF_REG_2] = 6, /* rsi */ + [BPF_REG_3] = 2, /* rdx */ + [BPF_REG_4] = 1, /* rcx */ + [BPF_REG_5] = 0, /* r8 */ + [BPF_REG_6] = 3, /* rbx callee saved */ + [BPF_REG_7] = 5, /* r13 callee saved */ + [BPF_REG_8] = 6, /* r14 callee saved */ + [BPF_REG_9] = 7, /* r15 callee saved */ + [BPF_REG_FP] = 5, /* rbp readonly */ + [AUX_REG] = 3, /* r11 temp register */ +}; + +/* is_ereg() == true if BPF register 'reg' maps to x64 r8..r15 + * which need extra byte of encoding. + * rax,rcx,...,rbp have simpler encoding + */ +static inline bool is_ereg(u32 reg) +{ + if (reg == BPF_REG_5 || reg == AUX_REG || + (reg >= BPF_REG_7 && reg <= BPF_REG_9)) + return true; + else + return false; +} + +/* add modifiers if 'reg' maps to x64 registers r8..r15 */ +static inline u8 add_1mod(u8 byte, u32 reg) +{ + if (is_ereg(reg)) + byte |= 1; + return byte; +} + +static inline u8 add_2mod(u8 byte, u32 r1, u32 r2) +{ + if (is_ereg(r1)) + byte |= 1; + if (is_ereg(r2)) + byte |= 4; + return byte; +} + +/* encode dest register 'a_reg' into x64 opcode 'byte' */ +static inline u8 add_1reg(u8 byte, u32 a_reg) +{ + return byte + reg2hex[a_reg]; +} + +/* encode dest 'a_reg' and src 'x_reg' registers into x64 opcode 'byte' */ +static inline u8 add_2reg(u8 byte, u32 a_reg, u32 x_reg) +{ + return byte + reg2hex[a_reg] + (reg2hex[x_reg] << 3); +} + struct jit_context { unsigned int cleanup_addr; /* epilogue code offset */ - int pc_ret0; /* bpf index of first RET #0 instruction (if any) */ - u8 seen; + bool seen_ld_abs; }; static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, int oldproglen, struct jit_context *ctx) { - const struct sock_filter *filter = bpf_prog->insns; - int flen = bpf_prog->len; + struct sock_filter_int *insn = bpf_prog->insnsi; + int insn_cnt = bpf_prog->len; u8 temp[64]; - u8 *prog; - int ilen, i, proglen; - int t_offset, f_offset; - u8 t_op, f_op, seen = 0; - u8 *func; - unsigned int cleanup_addr = ctx->cleanup_addr; - u8 seen_or_pass0 = ctx->seen; - - /* no prologue/epilogue for trivial filters (RET something) */ - proglen = 0; - prog = temp; + int i; + int proglen = 0; + u8 *prog = temp; + int stacksize = MAX_BPF_STACK + + 32 /* space for rbx, r13, r14, r15 */ + + 8 /* space for skb_copy_bits() buffer */; - if (seen_or_pass0) { - EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */ - EMIT4(0x48, 0x83, 0xec, 96); /* subq $96,%rsp */ - /* note : must save %rbx in case bpf_error is hit */ - if (seen_or_pass0 & (SEEN_XREG | SEEN_DATAREF)) - EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */ - if (seen_or_pass0 & SEEN_XREG) - CLEAR_X(); /* make sure we dont leek kernel memory */ - - /* - * If this filter needs to access skb data, - * loads r9 and r8 with : - * r9 = skb->len - skb->data_len - * r8 = skb->data - */ - if (seen_or_pass0 & SEEN_DATAREF) { - if (offsetof(struct sk_buff, len) <= 127) - /* mov off8(%rdi),%r9d */ - EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len)); - else { - /* mov off32(%rdi),%r9d */ - EMIT3(0x44, 0x8b, 0x8f); - EMIT(offsetof(struct sk_buff, len), 4); - } - if (is_imm8(offsetof(struct sk_buff, data_len))) - /* sub off8(%rdi),%r9d */ - EMIT4(0x44, 0x2b, 0x4f, offsetof(struct sk_buff, data_len)); - else { - EMIT3(0x44, 0x2b, 0x8f); - EMIT(offsetof(struct sk_buff, data_len), 4); - } + EMIT1(0x55); /* push rbp */ + EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */ - if (is_imm8(offsetof(struct sk_buff, data))) - /* mov off8(%rdi),%r8 */ - EMIT4(0x4c, 0x8b, 0x47, offsetof(struct sk_buff, data)); - else { - /* mov off32(%rdi),%r8 */ - EMIT3(0x4c, 0x8b, 0x87); - EMIT(offsetof(struct sk_buff, data), 4); - } + /* sub rsp, stacksize */ + EMIT3_off32(0x48, 0x81, 0xEC, stacksize); + + /* all classic BPF filters use R6(rbx) save it */ + + /* mov qword ptr [rbp-X],rbx */ + EMIT3_off32(0x48, 0x89, 0x9D, -stacksize); + + /* sk_convert_filter() maps classic BPF register X to R7 and uses R8 + * as temporary, so all tcpdump filters need to spill/fill R7(r13) and + * R8(r14). R9(r15) spill could be made conditional, but there is only + * one 'bpf_error' return path out of helper functions inside bpf_jit.S + * The overhead of extra spill is negligible for any filter other + * than synthetic ones. Therefore not worth adding complexity. + */ + + /* mov qword ptr [rbp-X],r13 */ + EMIT3_off32(0x4C, 0x89, 0xAD, -stacksize + 8); + /* mov qword ptr [rbp-X],r14 */ + EMIT3_off32(0x4C, 0x89, 0xB5, -stacksize + 16); + /* mov qword ptr [rbp-X],r15 */ + EMIT3_off32(0x4C, 0x89, 0xBD, -stacksize + 24); + + /* clear A and X registers */ + EMIT2(0x31, 0xc0); /* xor eax, eax */ + EMIT3(0x4D, 0x31, 0xED); /* xor r13, r13 */ + + if (ctx->seen_ld_abs) { + /* r9d : skb->len - skb->data_len (headlen) + * r10 : skb->data + */ + if (is_imm8(offsetof(struct sk_buff, len))) + /* mov %r9d, off8(%rdi) */ + EMIT4(0x44, 0x8b, 0x4f, + offsetof(struct sk_buff, len)); + else + /* mov %r9d, off32(%rdi) */ + EMIT3_off32(0x44, 0x8b, 0x8f, + offsetof(struct sk_buff, len)); + + if (is_imm8(offsetof(struct sk_buff, data_len))) + /* sub %r9d, off8(%rdi) */ + EMIT4(0x44, 0x2b, 0x4f, + offsetof(struct sk_buff, data_len)); + else + EMIT3_off32(0x44, 0x2b, 0x8f, + offsetof(struct sk_buff, data_len)); + + if (is_imm8(offsetof(struct sk_buff, data))) + /* mov %r10, off8(%rdi) */ + EMIT4(0x4c, 0x8b, 0x57, + offsetof(struct sk_buff, data)); + else + /* mov %r10, off32(%rdi) */ + EMIT3_off32(0x4c, 0x8b, 0x97, + offsetof(struct sk_buff, data)); + } + + for (i = 0; i < insn_cnt; i++, insn++) { + const s32 K = insn->imm; + u32 a_reg = insn->a_reg; + u32 x_reg = insn->x_reg; + u8 b1 = 0, b2 = 0, b3 = 0; + s64 jmp_offset; + u8 jmp_cond; + int ilen; + u8 *func; + + switch (insn->code) { + /* ALU */ + case BPF_ALU | BPF_ADD | BPF_X: + case BPF_ALU | BPF_SUB | BPF_X: + case BPF_ALU | BPF_AND | BPF_X: + case BPF_ALU | BPF_OR | BPF_X: + case BPF_ALU | BPF_XOR | BPF_X: + case BPF_ALU64 | BPF_ADD | BPF_X: + case BPF_ALU64 | BPF_SUB | BPF_X: + case BPF_ALU64 | BPF_AND | BPF_X: + case BPF_ALU64 | BPF_OR | BPF_X: + case BPF_ALU64 | BPF_XOR | BPF_X: + switch (BPF_OP(insn->code)) { + case BPF_ADD: b2 = 0x01; break; + case BPF_SUB: b2 = 0x29; break; + case BPF_AND: b2 = 0x21; break; + case BPF_OR: b2 = 0x09; break; + case BPF_XOR: b2 = 0x31; break; } - } + if (BPF_CLASS(insn->code) == BPF_ALU64) + EMIT1(add_2mod(0x48, a_reg, x_reg)); + else if (is_ereg(a_reg) || is_ereg(x_reg)) + EMIT1(add_2mod(0x40, a_reg, x_reg)); + EMIT2(b2, add_2reg(0xC0, a_reg, x_reg)); + break; - switch (filter[0].code) { - case BPF_S_RET_K: - case BPF_S_LD_W_LEN: - case BPF_S_ANC_PROTOCOL: - case BPF_S_ANC_IFINDEX: - case BPF_S_ANC_MARK: - case BPF_S_ANC_RXHASH: - case BPF_S_ANC_CPU: - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: - case BPF_S_ANC_QUEUE: - case BPF_S_ANC_PKTTYPE: - case BPF_S_LD_W_ABS: - case BPF_S_LD_H_ABS: - case BPF_S_LD_B_ABS: - /* first instruction sets A register (or is RET 'constant') */ + /* mov A, X */ + case BPF_ALU64 | BPF_MOV | BPF_X: + EMIT_mov(a_reg, x_reg); break; - default: - /* make sure we dont leak kernel information to user */ - CLEAR_A(); /* A = 0 */ - } - for (i = 0; i < flen; i++) { - unsigned int K = filter[i].k; + /* mov32 A, X */ + case BPF_ALU | BPF_MOV | BPF_X: + if (is_ereg(a_reg) || is_ereg(x_reg)) + EMIT1(add_2mod(0x40, a_reg, x_reg)); + EMIT2(0x89, add_2reg(0xC0, a_reg, x_reg)); + break; - switch (filter[i].code) { - case BPF_S_ALU_ADD_X: /* A += X; */ - seen |= SEEN_XREG; - EMIT2(0x01, 0xd8); /* add %ebx,%eax */ - break; - case BPF_S_ALU_ADD_K: /* A += K; */ - if (!K) - break; - if (is_imm8(K)) - EMIT3(0x83, 0xc0, K); /* add imm8,%eax */ - else - EMIT1_off32(0x05, K); /* add imm32,%eax */ - break; - case BPF_S_ALU_SUB_X: /* A -= X; */ - seen |= SEEN_XREG; - EMIT2(0x29, 0xd8); /* sub %ebx,%eax */ - break; - case BPF_S_ALU_SUB_K: /* A -= K */ - if (!K) - break; - if (is_imm8(K)) - EMIT3(0x83, 0xe8, K); /* sub imm8,%eax */ - else - EMIT1_off32(0x2d, K); /* sub imm32,%eax */ - break; - case BPF_S_ALU_MUL_X: /* A *= X; */ - seen |= SEEN_XREG; - EMIT3(0x0f, 0xaf, 0xc3); /* imul %ebx,%eax */ - break; - case BPF_S_ALU_MUL_K: /* A *= K */ - if (is_imm8(K)) - EMIT3(0x6b, 0xc0, K); /* imul imm8,%eax,%eax */ - else { - EMIT2(0x69, 0xc0); /* imul imm32,%eax */ - EMIT(K, 4); - } - break; - case BPF_S_ALU_DIV_X: /* A /= X; */ - seen |= SEEN_XREG; - EMIT2(0x85, 0xdb); /* test %ebx,%ebx */ - if (ctx->pc_ret0 > 0) { - /* addrs[pc_ret0 - 1] is start address of target - * (addrs[i] - 4) is the address following this jmp - * ("xor %edx,%edx; div %ebx" being 4 bytes long) - */ - EMIT_COND_JMP(X86_JE, addrs[ctx->pc_ret0 - 1] - - (addrs[i] - 4)); - } else { - EMIT_COND_JMP(X86_JNE, 2 + 5); - CLEAR_A(); - EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */ - } - EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */ - break; - case BPF_S_ALU_MOD_X: /* A %= X; */ - seen |= SEEN_XREG; - EMIT2(0x85, 0xdb); /* test %ebx,%ebx */ - if (ctx->pc_ret0 > 0) { - /* addrs[pc_ret0 - 1] is start address of target - * (addrs[i] - 6) is the address following this jmp - * ("xor %edx,%edx; div %ebx;mov %edx,%eax" being 6 bytes long) - */ - EMIT_COND_JMP(X86_JE, addrs[ctx->pc_ret0 - 1] - - (addrs[i] - 6)); - } else { - EMIT_COND_JMP(X86_JNE, 2 + 5); - CLEAR_A(); - EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 6)); /* jmp .+off32 */ - } - EMIT2(0x31, 0xd2); /* xor %edx,%edx */ - EMIT2(0xf7, 0xf3); /* div %ebx */ - EMIT2(0x89, 0xd0); /* mov %edx,%eax */ - break; - case BPF_S_ALU_MOD_K: /* A %= K; */ - if (K == 1) { - CLEAR_A(); - break; - } - EMIT2(0x31, 0xd2); /* xor %edx,%edx */ - EMIT1(0xb9);EMIT(K, 4); /* mov imm32,%ecx */ - EMIT2(0xf7, 0xf1); /* div %ecx */ - EMIT2(0x89, 0xd0); /* mov %edx,%eax */ - break; - case BPF_S_ALU_DIV_K: /* A /= K */ - if (K == 1) - break; - EMIT2(0x31, 0xd2); /* xor %edx,%edx */ - EMIT1(0xb9);EMIT(K, 4); /* mov imm32,%ecx */ - EMIT2(0xf7, 0xf1); /* div %ecx */ - break; - case BPF_S_ALU_AND_X: - seen |= SEEN_XREG; - EMIT2(0x21, 0xd8); /* and %ebx,%eax */ - break; - case BPF_S_ALU_AND_K: - if (K >= 0xFFFFFF00) { - EMIT2(0x24, K & 0xFF); /* and imm8,%al */ - } else if (K >= 0xFFFF0000) { - EMIT2(0x66, 0x25); /* and imm16,%ax */ - EMIT(K, 2); - } else { - EMIT1_off32(0x25, K); /* and imm32,%eax */ - } - break; - case BPF_S_ALU_OR_X: - seen |= SEEN_XREG; - EMIT2(0x09, 0xd8); /* or %ebx,%eax */ - break; - case BPF_S_ALU_OR_K: - if (is_imm8(K)) - EMIT3(0x83, 0xc8, K); /* or imm8,%eax */ - else - EMIT1_off32(0x0d, K); /* or imm32,%eax */ - break; - case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */ - case BPF_S_ALU_XOR_X: - seen |= SEEN_XREG; - EMIT2(0x31, 0xd8); /* xor %ebx,%eax */ - break; - case BPF_S_ALU_XOR_K: /* A ^= K; */ - if (K == 0) - break; - if (is_imm8(K)) - EMIT3(0x83, 0xf0, K); /* xor imm8,%eax */ - else - EMIT1_off32(0x35, K); /* xor imm32,%eax */ - break; - case BPF_S_ALU_LSH_X: /* A <<= X; */ - seen |= SEEN_XREG; - EMIT4(0x89, 0xd9, 0xd3, 0xe0); /* mov %ebx,%ecx; shl %cl,%eax */ - break; - case BPF_S_ALU_LSH_K: - if (K == 0) - break; - else if (K == 1) - EMIT2(0xd1, 0xe0); /* shl %eax */ - else - EMIT3(0xc1, 0xe0, K); - break; - case BPF_S_ALU_RSH_X: /* A >>= X; */ - seen |= SEEN_XREG; - EMIT4(0x89, 0xd9, 0xd3, 0xe8); /* mov %ebx,%ecx; shr %cl,%eax */ - break; - case BPF_S_ALU_RSH_K: /* A >>= K; */ - if (K == 0) - break; - else if (K == 1) - EMIT2(0xd1, 0xe8); /* shr %eax */ - else - EMIT3(0xc1, 0xe8, K); - break; - case BPF_S_ALU_NEG: - EMIT2(0xf7, 0xd8); /* neg %eax */ - break; - case BPF_S_RET_K: - if (!K) { - if (ctx->pc_ret0 == -1) - ctx->pc_ret0 = i; - CLEAR_A(); - } else { - EMIT1_off32(0xb8, K); /* mov $imm32,%eax */ - } - /* fallinto */ - case BPF_S_RET_A: - if (seen_or_pass0) { - if (i != flen - 1) { - EMIT_JMP(cleanup_addr - addrs[i]); - break; - } - if (seen_or_pass0 & SEEN_XREG) - EMIT4(0x48, 0x8b, 0x5d, 0xf8); /* mov -8(%rbp),%rbx */ - EMIT1(0xc9); /* leaveq */ - } - EMIT1(0xc3); /* ret */ - break; - case BPF_S_MISC_TAX: /* X = A */ - seen |= SEEN_XREG; - EMIT2(0x89, 0xc3); /* mov %eax,%ebx */ - break; - case BPF_S_MISC_TXA: /* A = X */ - seen |= SEEN_XREG; - EMIT2(0x89, 0xd8); /* mov %ebx,%eax */ - break; - case BPF_S_LD_IMM: /* A = K */ - if (!K) - CLEAR_A(); - else - EMIT1_off32(0xb8, K); /* mov $imm32,%eax */ + /* neg A */ + case BPF_ALU | BPF_NEG: + case BPF_ALU64 | BPF_NEG: + if (BPF_CLASS(insn->code) == BPF_ALU64) + EMIT1(add_1mod(0x48, a_reg)); + else if (is_ereg(a_reg)) + EMIT1(add_1mod(0x40, a_reg)); + EMIT2(0xF7, add_1reg(0xD8, a_reg)); + break; + + case BPF_ALU | BPF_ADD | BPF_K: + case BPF_ALU | BPF_SUB | BPF_K: + case BPF_ALU | BPF_AND | BPF_K: + case BPF_ALU | BPF_OR | BPF_K: + case BPF_ALU | BPF_XOR | BPF_K: + case BPF_ALU64 | BPF_ADD | BPF_K: + case BPF_ALU64 | BPF_SUB | BPF_K: + case BPF_ALU64 | BPF_AND | BPF_K: + case BPF_ALU64 | BPF_OR | BPF_K: + case BPF_ALU64 | BPF_XOR | BPF_K: + if (BPF_CLASS(insn->code) == BPF_ALU64) + EMIT1(add_1mod(0x48, a_reg)); + else if (is_ereg(a_reg)) + EMIT1(add_1mod(0x40, a_reg)); + + switch (BPF_OP(insn->code)) { + case BPF_ADD: b3 = 0xC0; break; + case BPF_SUB: b3 = 0xE8; break; + case BPF_AND: b3 = 0xE0; break; + case BPF_OR: b3 = 0xC8; break; + case BPF_XOR: b3 = 0xF0; break; + } + + if (is_imm8(K)) + EMIT3(0x83, add_1reg(b3, a_reg), K); + else + EMIT2_off32(0x81, add_1reg(b3, a_reg), K); + break; + + case BPF_ALU64 | BPF_MOV | BPF_K: + /* optimization: if imm32 is positive, + * use 'mov eax, imm32' (which zero-extends imm32) + * to save 2 bytes + */ + if (K < 0) { + /* 'mov rax, imm32' sign extends imm32 */ + b1 = add_1mod(0x48, a_reg); + b2 = 0xC7; + b3 = 0xC0; + EMIT3_off32(b1, b2, add_1reg(b3, a_reg), K); break; - case BPF_S_LDX_IMM: /* X = K */ - seen |= SEEN_XREG; - if (!K) - CLEAR_X(); + } + + case BPF_ALU | BPF_MOV | BPF_K: + /* mov %eax, imm32 */ + if (is_ereg(a_reg)) + EMIT1(add_1mod(0x40, a_reg)); + EMIT1_off32(add_1reg(0xB8, a_reg), K); + break; + + /* A %= X, A /= X, A %= K, A /= K */ + case BPF_ALU | BPF_MOD | BPF_X: + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU | BPF_MOD | BPF_K: + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU64 | BPF_MOD | BPF_X: + case BPF_ALU64 | BPF_DIV | BPF_X: + case BPF_ALU64 | BPF_MOD | BPF_K: + case BPF_ALU64 | BPF_DIV | BPF_K: + EMIT1(0x50); /* push rax */ + EMIT1(0x52); /* push rdx */ + + if (BPF_SRC(insn->code) == BPF_X) + /* mov r11, X */ + EMIT_mov(AUX_REG, x_reg); + else + /* mov r11, K */ + EMIT3_off32(0x49, 0xC7, 0xC3, K); + + /* mov rax, A */ + EMIT_mov(BPF_REG_0, a_reg); + + /* xor edx, edx + * equivalent to 'xor rdx, rdx', but one byte less + */ + EMIT2(0x31, 0xd2); + + if (BPF_SRC(insn->code) == BPF_X) { + /* if (X == 0) return 0 */ + + /* cmp r11, 0 */ + EMIT4(0x49, 0x83, 0xFB, 0x00); + + /* jne .+9 (skip over pop, pop, xor and jmp) */ + EMIT2(X86_JNE, 1 + 1 + 2 + 5); + EMIT1(0x5A); /* pop rdx */ + EMIT1(0x58); /* pop rax */ + EMIT2(0x31, 0xc0); /* xor eax, eax */ + + /* jmp cleanup_addr + * addrs[i] - 11, because there are 11 bytes + * after this insn: div, mov, pop, pop, mov + */ + jmp_offset = ctx->cleanup_addr - (addrs[i] - 11); + EMIT1_off32(0xE9, jmp_offset); + } + + if (BPF_CLASS(insn->code) == BPF_ALU64) + /* div r11 */ + EMIT3(0x49, 0xF7, 0xF3); + else + /* div r11d */ + EMIT3(0x41, 0xF7, 0xF3); + + if (BPF_OP(insn->code) == BPF_MOD) + /* mov r11, rdx */ + EMIT3(0x49, 0x89, 0xD3); + else + /* mov r11, rax */ + EMIT3(0x49, 0x89, 0xC3); + + EMIT1(0x5A); /* pop rdx */ + EMIT1(0x58); /* pop rax */ + + /* mov A, r11 */ + EMIT_mov(a_reg, AUX_REG); + break; + + case BPF_ALU | BPF_MUL | BPF_K: + case BPF_ALU | BPF_MUL | BPF_X: + case BPF_ALU64 | BPF_MUL | BPF_K: + case BPF_ALU64 | BPF_MUL | BPF_X: + EMIT1(0x50); /* push rax */ + EMIT1(0x52); /* push rdx */ + + /* mov r11, A */ + EMIT_mov(AUX_REG, a_reg); + + if (BPF_SRC(insn->code) == BPF_X) + /* mov rax, X */ + EMIT_mov(BPF_REG_0, x_reg); + else + /* mov rax, K */ + EMIT3_off32(0x48, 0xC7, 0xC0, K); + + if (BPF_CLASS(insn->code) == BPF_ALU64) + EMIT1(add_1mod(0x48, AUX_REG)); + else if (is_ereg(AUX_REG)) + EMIT1(add_1mod(0x40, AUX_REG)); + /* mul(q) r11 */ + EMIT2(0xF7, add_1reg(0xE0, AUX_REG)); + + /* mov r11, rax */ + EMIT_mov(AUX_REG, BPF_REG_0); + + EMIT1(0x5A); /* pop rdx */ + EMIT1(0x58); /* pop rax */ + + /* mov A, r11 */ + EMIT_mov(a_reg, AUX_REG); + break; + + /* shifts */ + case BPF_ALU | BPF_LSH | BPF_K: + case BPF_ALU | BPF_RSH | BPF_K: + case BPF_ALU | BPF_ARSH | BPF_K: + case BPF_ALU64 | BPF_LSH | BPF_K: + case BPF_ALU64 | BPF_RSH | BPF_K: + case BPF_ALU64 | BPF_ARSH | BPF_K: + if (BPF_CLASS(insn->code) == BPF_ALU64) + EMIT1(add_1mod(0x48, a_reg)); + else if (is_ereg(a_reg)) + EMIT1(add_1mod(0x40, a_reg)); + + switch (BPF_OP(insn->code)) { + case BPF_LSH: b3 = 0xE0; break; + case BPF_RSH: b3 = 0xE8; break; + case BPF_ARSH: b3 = 0xF8; break; + } + EMIT3(0xC1, add_1reg(b3, a_reg), K); + break; + + case BPF_ALU | BPF_END | BPF_FROM_BE: + switch (K) { + case 16: + /* emit 'ror %ax, 8' to swap lower 2 bytes */ + EMIT1(0x66); + if (is_ereg(a_reg)) + EMIT1(0x41); + EMIT3(0xC1, add_1reg(0xC8, a_reg), 8); + break; + case 32: + /* emit 'bswap eax' to swap lower 4 bytes */ + if (is_ereg(a_reg)) + EMIT2(0x41, 0x0F); else - EMIT1_off32(0xbb, K); /* mov $imm32,%ebx */ - break; - case BPF_S_LD_MEM: /* A = mem[K] : mov off8(%rbp),%eax */ - seen |= SEEN_MEM; - EMIT3(0x8b, 0x45, 0xf0 - K*4); + EMIT1(0x0F); + EMIT1(add_1reg(0xC8, a_reg)); break; - case BPF_S_LDX_MEM: /* X = mem[K] : mov off8(%rbp),%ebx */ - seen |= SEEN_XREG | SEEN_MEM; - EMIT3(0x8b, 0x5d, 0xf0 - K*4); - break; - case BPF_S_ST: /* mem[K] = A : mov %eax,off8(%rbp) */ - seen |= SEEN_MEM; - EMIT3(0x89, 0x45, 0xf0 - K*4); - break; - case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */ - seen |= SEEN_XREG | SEEN_MEM; - EMIT3(0x89, 0x5d, 0xf0 - K*4); - break; - case BPF_S_LD_W_LEN: /* A = skb->len; */ - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); - if (is_imm8(offsetof(struct sk_buff, len))) - /* mov off8(%rdi),%eax */ - EMIT3(0x8b, 0x47, offsetof(struct sk_buff, len)); - else { - EMIT2(0x8b, 0x87); - EMIT(offsetof(struct sk_buff, len), 4); - } - break; - case BPF_S_LDX_W_LEN: /* X = skb->len; */ - seen |= SEEN_XREG; - if (is_imm8(offsetof(struct sk_buff, len))) - /* mov off8(%rdi),%ebx */ - EMIT3(0x8b, 0x5f, offsetof(struct sk_buff, len)); - else { - EMIT2(0x8b, 0x9f); - EMIT(offsetof(struct sk_buff, len), 4); - } - break; - case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */ - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); - if (is_imm8(offsetof(struct sk_buff, protocol))) { - /* movzwl off8(%rdi),%eax */ - EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, protocol)); - } else { - EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */ - EMIT(offsetof(struct sk_buff, protocol), 4); - } - EMIT2(0x86, 0xc4); /* ntohs() : xchg %al,%ah */ - break; - case BPF_S_ANC_IFINDEX: - if (is_imm8(offsetof(struct sk_buff, dev))) { - /* movq off8(%rdi),%rax */ - EMIT4(0x48, 0x8b, 0x47, offsetof(struct sk_buff, dev)); - } else { - EMIT3(0x48, 0x8b, 0x87); /* movq off32(%rdi),%rax */ - EMIT(offsetof(struct sk_buff, dev), 4); - } - EMIT3(0x48, 0x85, 0xc0); /* test %rax,%rax */ - EMIT_COND_JMP(X86_JE, cleanup_addr - (addrs[i] - 6)); - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); - EMIT2(0x8b, 0x80); /* mov off32(%rax),%eax */ - EMIT(offsetof(struct net_device, ifindex), 4); - break; - case BPF_S_ANC_MARK: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); - if (is_imm8(offsetof(struct sk_buff, mark))) { - /* mov off8(%rdi),%eax */ - EMIT3(0x8b, 0x47, offsetof(struct sk_buff, mark)); - } else { - EMIT2(0x8b, 0x87); - EMIT(offsetof(struct sk_buff, mark), 4); - } - break; - case BPF_S_ANC_RXHASH: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); - if (is_imm8(offsetof(struct sk_buff, hash))) { - /* mov off8(%rdi),%eax */ - EMIT3(0x8b, 0x47, offsetof(struct sk_buff, hash)); - } else { - EMIT2(0x8b, 0x87); - EMIT(offsetof(struct sk_buff, hash), 4); - } - break; - case BPF_S_ANC_QUEUE: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); - if (is_imm8(offsetof(struct sk_buff, queue_mapping))) { - /* movzwl off8(%rdi),%eax */ - EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, queue_mapping)); - } else { - EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */ - EMIT(offsetof(struct sk_buff, queue_mapping), 4); - } - break; - case BPF_S_ANC_CPU: -#ifdef CONFIG_SMP - EMIT4(0x65, 0x8b, 0x04, 0x25); /* mov %gs:off32,%eax */ - EMIT((u32)(unsigned long)&cpu_number, 4); /* A = smp_processor_id(); */ -#else - CLEAR_A(); -#endif - break; - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); - if (is_imm8(offsetof(struct sk_buff, vlan_tci))) { - /* movzwl off8(%rdi),%eax */ - EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, vlan_tci)); - } else { - EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */ - EMIT(offsetof(struct sk_buff, vlan_tci), 4); - } - BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); - if (filter[i].code == BPF_S_ANC_VLAN_TAG) { - EMIT3(0x80, 0xe4, 0xef); /* and $0xef,%ah */ - } else { - EMIT3(0xc1, 0xe8, 0x0c); /* shr $0xc,%eax */ - EMIT3(0x83, 0xe0, 0x01); /* and $0x1,%eax */ - } - break; - case BPF_S_ANC_PKTTYPE: - { - int off = pkt_type_offset(); - - if (off < 0) - return -EINVAL; - if (is_imm8(off)) { - /* movzbl off8(%rdi),%eax */ - EMIT4(0x0f, 0xb6, 0x47, off); - } else { - /* movbl off32(%rdi),%eax */ - EMIT3(0x0f, 0xb6, 0x87); - EMIT(off, 4); - } - EMIT3(0x83, 0xe0, PKT_TYPE_MAX); /* and $0x7,%eax */ + case 64: + /* emit 'bswap rax' to swap 8 bytes */ + EMIT3(add_1mod(0x48, a_reg), 0x0F, + add_1reg(0xC8, a_reg)); break; } - case BPF_S_LD_W_ABS: - func = CHOOSE_LOAD_FUNC(K, sk_load_word); -common_load: seen |= SEEN_DATAREF; - t_offset = func - (image + addrs[i]); - EMIT1_off32(0xbe, K); /* mov imm32,%esi */ - EMIT1_off32(0xe8, t_offset); /* call */ - break; - case BPF_S_LD_H_ABS: - func = CHOOSE_LOAD_FUNC(K, sk_load_half); - goto common_load; - case BPF_S_LD_B_ABS: - func = CHOOSE_LOAD_FUNC(K, sk_load_byte); - goto common_load; - case BPF_S_LDX_B_MSH: - func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh); - seen |= SEEN_DATAREF | SEEN_XREG; - t_offset = func - (image + addrs[i]); - EMIT1_off32(0xbe, K); /* mov imm32,%esi */ - EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */ - break; - case BPF_S_LD_W_IND: - func = sk_load_word; -common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG; - t_offset = func - (image + addrs[i]); - if (K) { - if (is_imm8(K)) { - EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx), %esi */ - } else { - EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */ - EMIT(K, 4); - } - } else { - EMIT2(0x89,0xde); /* mov %ebx,%esi */ - } - EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */ - break; - case BPF_S_LD_H_IND: - func = sk_load_half; - goto common_load_ind; - case BPF_S_LD_B_IND: - func = sk_load_byte; - goto common_load_ind; - case BPF_S_JMP_JA: - t_offset = addrs[i + K] - addrs[i]; - EMIT_JMP(t_offset); - break; - COND_SEL(BPF_S_JMP_JGT_K, X86_JA, X86_JBE); - COND_SEL(BPF_S_JMP_JGE_K, X86_JAE, X86_JB); - COND_SEL(BPF_S_JMP_JEQ_K, X86_JE, X86_JNE); - COND_SEL(BPF_S_JMP_JSET_K,X86_JNE, X86_JE); - COND_SEL(BPF_S_JMP_JGT_X, X86_JA, X86_JBE); - COND_SEL(BPF_S_JMP_JGE_X, X86_JAE, X86_JB); - COND_SEL(BPF_S_JMP_JEQ_X, X86_JE, X86_JNE); - COND_SEL(BPF_S_JMP_JSET_X,X86_JNE, X86_JE); - -cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; - t_offset = addrs[i + filter[i].jt] - addrs[i]; - - /* same targets, can avoid doing the test :) */ - if (filter[i].jt == filter[i].jf) { - EMIT_JMP(t_offset); - break; - } + break; + + case BPF_ALU | BPF_END | BPF_FROM_LE: + break; + + /* ST: *(u8*)(a_reg + off) = imm */ + case BPF_ST | BPF_MEM | BPF_B: + if (is_ereg(a_reg)) + EMIT2(0x41, 0xC6); + else + EMIT1(0xC6); + goto st; + case BPF_ST | BPF_MEM | BPF_H: + if (is_ereg(a_reg)) + EMIT3(0x66, 0x41, 0xC7); + else + EMIT2(0x66, 0xC7); + goto st; + case BPF_ST | BPF_MEM | BPF_W: + if (is_ereg(a_reg)) + EMIT2(0x41, 0xC7); + else + EMIT1(0xC7); + goto st; + case BPF_ST | BPF_MEM | BPF_DW: + EMIT2(add_1mod(0x48, a_reg), 0xC7); + +st: if (is_imm8(insn->off)) + EMIT2(add_1reg(0x40, a_reg), insn->off); + else + EMIT1_off32(add_1reg(0x80, a_reg), insn->off); + + EMIT(K, bpf_size_to_x86_bytes(BPF_SIZE(insn->code))); + break; + + /* STX: *(u8*)(a_reg + off) = x_reg */ + case BPF_STX | BPF_MEM | BPF_B: + /* emit 'mov byte ptr [rax + off], al' */ + if (is_ereg(a_reg) || is_ereg(x_reg) || + /* have to add extra byte for x86 SIL, DIL regs */ + x_reg == BPF_REG_1 || x_reg == BPF_REG_2) + EMIT2(add_2mod(0x40, a_reg, x_reg), 0x88); + else + EMIT1(0x88); + goto stx; + case BPF_STX | BPF_MEM | BPF_H: + if (is_ereg(a_reg) || is_ereg(x_reg)) + EMIT3(0x66, add_2mod(0x40, a_reg, x_reg), 0x89); + else + EMIT2(0x66, 0x89); + goto stx; + case BPF_STX | BPF_MEM | BPF_W: + if (is_ereg(a_reg) || is_ereg(x_reg)) + EMIT2(add_2mod(0x40, a_reg, x_reg), 0x89); + else + EMIT1(0x89); + goto stx; + case BPF_STX | BPF_MEM | BPF_DW: + EMIT2(add_2mod(0x48, a_reg, x_reg), 0x89); +stx: if (is_imm8(insn->off)) + EMIT2(add_2reg(0x40, a_reg, x_reg), insn->off); + else + EMIT1_off32(add_2reg(0x80, a_reg, x_reg), + insn->off); + break; + + /* LDX: a_reg = *(u8*)(x_reg + off) */ + case BPF_LDX | BPF_MEM | BPF_B: + /* emit 'movzx rax, byte ptr [rax + off]' */ + EMIT3(add_2mod(0x48, x_reg, a_reg), 0x0F, 0xB6); + goto ldx; + case BPF_LDX | BPF_MEM | BPF_H: + /* emit 'movzx rax, word ptr [rax + off]' */ + EMIT3(add_2mod(0x48, x_reg, a_reg), 0x0F, 0xB7); + goto ldx; + case BPF_LDX | BPF_MEM | BPF_W: + /* emit 'mov eax, dword ptr [rax+0x14]' */ + if (is_ereg(a_reg) || is_ereg(x_reg)) + EMIT2(add_2mod(0x40, x_reg, a_reg), 0x8B); + else + EMIT1(0x8B); + goto ldx; + case BPF_LDX | BPF_MEM | BPF_DW: + /* emit 'mov rax, qword ptr [rax+0x14]' */ + EMIT2(add_2mod(0x48, x_reg, a_reg), 0x8B); +ldx: /* if insn->off == 0 we can save one extra byte, but + * special case of x86 r13 which always needs an offset + * is not worth the hassle + */ + if (is_imm8(insn->off)) + EMIT2(add_2reg(0x40, x_reg, a_reg), insn->off); + else + EMIT1_off32(add_2reg(0x80, x_reg, a_reg), + insn->off); + break; + + /* STX XADD: lock *(u32*)(a_reg + off) += x_reg */ + case BPF_STX | BPF_XADD | BPF_W: + /* emit 'lock add dword ptr [rax + off], eax' */ + if (is_ereg(a_reg) || is_ereg(x_reg)) + EMIT3(0xF0, add_2mod(0x40, a_reg, x_reg), 0x01); + else + EMIT2(0xF0, 0x01); + goto xadd; + case BPF_STX | BPF_XADD | BPF_DW: + EMIT3(0xF0, add_2mod(0x48, a_reg, x_reg), 0x01); +xadd: if (is_imm8(insn->off)) + EMIT2(add_2reg(0x40, a_reg, x_reg), insn->off); + else + EMIT1_off32(add_2reg(0x80, a_reg, x_reg), + insn->off); + break; + + /* call */ + case BPF_JMP | BPF_CALL: + func = (u8 *) __bpf_call_base + K; + jmp_offset = func - (image + addrs[i]); + if (ctx->seen_ld_abs) { + EMIT2(0x41, 0x52); /* push %r10 */ + EMIT2(0x41, 0x51); /* push %r9 */ + /* need to adjust jmp offset, since + * pop %r9, pop %r10 take 4 bytes after call insn + */ + jmp_offset += 4; + } + if (!K || !is_simm32(jmp_offset)) { + pr_err("unsupported bpf func %d addr %p image %p\n", + K, func, image); + return -EINVAL; + } + EMIT1_off32(0xE8, jmp_offset); + if (ctx->seen_ld_abs) { + EMIT2(0x41, 0x59); /* pop %r9 */ + EMIT2(0x41, 0x5A); /* pop %r10 */ + } + break; + + /* cond jump */ + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: + /* cmp a_reg, x_reg */ + EMIT3(add_2mod(0x48, a_reg, x_reg), 0x39, + add_2reg(0xC0, a_reg, x_reg)); + goto emit_cond_jmp; + + case BPF_JMP | BPF_JSET | BPF_X: + /* test a_reg, x_reg */ + EMIT3(add_2mod(0x48, a_reg, x_reg), 0x85, + add_2reg(0xC0, a_reg, x_reg)); + goto emit_cond_jmp; + + case BPF_JMP | BPF_JSET | BPF_K: + /* test a_reg, imm32 */ + EMIT1(add_1mod(0x48, a_reg)); + EMIT2_off32(0xF7, add_1reg(0xC0, a_reg), K); + goto emit_cond_jmp; + + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: + /* cmp a_reg, imm8/32 */ + EMIT1(add_1mod(0x48, a_reg)); + + if (is_imm8(K)) + EMIT3(0x83, add_1reg(0xF8, a_reg), K); + else + EMIT2_off32(0x81, add_1reg(0xF8, a_reg), K); + +emit_cond_jmp: /* convert BPF opcode to x86 */ + switch (BPF_OP(insn->code)) { + case BPF_JEQ: + jmp_cond = X86_JE; + break; + case BPF_JSET: + case BPF_JNE: + jmp_cond = X86_JNE; + break; + case BPF_JGT: + /* GT is unsigned '>', JA in x86 */ + jmp_cond = X86_JA; + break; + case BPF_JGE: + /* GE is unsigned '>=', JAE in x86 */ + jmp_cond = X86_JAE; + break; + case BPF_JSGT: + /* signed '>', GT in x86 */ + jmp_cond = X86_JG; + break; + case BPF_JSGE: + /* signed '>=', GE in x86 */ + jmp_cond = X86_JGE; + break; + default: /* to silence gcc warning */ + return -EFAULT; + } + jmp_offset = addrs[i + insn->off] - addrs[i]; + if (is_imm8(jmp_offset)) { + EMIT2(jmp_cond, jmp_offset); + } else if (is_simm32(jmp_offset)) { + EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset); + } else { + pr_err("cond_jmp gen bug %llx\n", jmp_offset); + return -EFAULT; + } + + break; - switch (filter[i].code) { - case BPF_S_JMP_JGT_X: - case BPF_S_JMP_JGE_X: - case BPF_S_JMP_JEQ_X: - seen |= SEEN_XREG; - EMIT2(0x39, 0xd8); /* cmp %ebx,%eax */ - break; - case BPF_S_JMP_JSET_X: - seen |= SEEN_XREG; - EMIT2(0x85, 0xd8); /* test %ebx,%eax */ - break; - case BPF_S_JMP_JEQ_K: - if (K == 0) { - EMIT2(0x85, 0xc0); /* test %eax,%eax */ - break; - } - case BPF_S_JMP_JGT_K: - case BPF_S_JMP_JGE_K: - if (K <= 127) - EMIT3(0x83, 0xf8, K); /* cmp imm8,%eax */ + case BPF_JMP | BPF_JA: + jmp_offset = addrs[i + insn->off] - addrs[i]; + if (!jmp_offset) + /* optimize out nop jumps */ + break; +emit_jmp: + if (is_imm8(jmp_offset)) { + EMIT2(0xEB, jmp_offset); + } else if (is_simm32(jmp_offset)) { + EMIT1_off32(0xE9, jmp_offset); + } else { + pr_err("jmp gen bug %llx\n", jmp_offset); + return -EFAULT; + } + break; + + case BPF_LD | BPF_IND | BPF_W: + func = sk_load_word; + goto common_load; + case BPF_LD | BPF_ABS | BPF_W: + func = CHOOSE_LOAD_FUNC(K, sk_load_word); +common_load: ctx->seen_ld_abs = true; + jmp_offset = func - (image + addrs[i]); + if (!func || !is_simm32(jmp_offset)) { + pr_err("unsupported bpf func %d addr %p image %p\n", + K, func, image); + return -EINVAL; + } + if (BPF_MODE(insn->code) == BPF_ABS) { + /* mov %esi, imm32 */ + EMIT1_off32(0xBE, K); + } else { + /* mov %rsi, x_reg */ + EMIT_mov(BPF_REG_2, x_reg); + if (K) { + if (is_imm8(K)) + /* add %esi, imm8 */ + EMIT3(0x83, 0xC6, K); else - EMIT1_off32(0x3d, K); /* cmp imm32,%eax */ - break; - case BPF_S_JMP_JSET_K: - if (K <= 0xFF) - EMIT2(0xa8, K); /* test imm8,%al */ - else if (!(K & 0xFFFF00FF)) - EMIT3(0xf6, 0xc4, K >> 8); /* test imm8,%ah */ - else if (K <= 0xFFFF) { - EMIT2(0x66, 0xa9); /* test imm16,%ax */ - EMIT(K, 2); - } else { - EMIT1_off32(0xa9, K); /* test imm32,%eax */ - } - break; + /* add %esi, imm32 */ + EMIT2_off32(0x81, 0xC6, K); } - if (filter[i].jt != 0) { - if (filter[i].jf && f_offset) - t_offset += is_near(f_offset) ? 2 : 5; - EMIT_COND_JMP(t_op, t_offset); - if (filter[i].jf) - EMIT_JMP(f_offset); - break; - } - EMIT_COND_JMP(f_op, f_offset); - break; + } + /* skb pointer is in R6 (%rbx), it will be copied into + * %rdi if skb_copy_bits() call is necessary. + * sk_load_* helpers also use %r10 and %r9d. + * See bpf_jit.S + */ + EMIT1_off32(0xE8, jmp_offset); /* call */ + break; + + case BPF_LD | BPF_IND | BPF_H: + func = sk_load_half; + goto common_load; + case BPF_LD | BPF_ABS | BPF_H: + func = CHOOSE_LOAD_FUNC(K, sk_load_half); + goto common_load; + case BPF_LD | BPF_IND | BPF_B: + func = sk_load_byte; + goto common_load; + case BPF_LD | BPF_ABS | BPF_B: + func = CHOOSE_LOAD_FUNC(K, sk_load_byte); + goto common_load; + + case BPF_JMP | BPF_EXIT: + if (i != insn_cnt - 1) { + jmp_offset = ctx->cleanup_addr - addrs[i]; + goto emit_jmp; + } + /* update cleanup_addr */ + ctx->cleanup_addr = proglen; + /* mov rbx, qword ptr [rbp-X] */ + EMIT3_off32(0x48, 0x8B, 0x9D, -stacksize); + /* mov r13, qword ptr [rbp-X] */ + EMIT3_off32(0x4C, 0x8B, 0xAD, -stacksize + 8); + /* mov r14, qword ptr [rbp-X] */ + EMIT3_off32(0x4C, 0x8B, 0xB5, -stacksize + 16); + /* mov r15, qword ptr [rbp-X] */ + EMIT3_off32(0x4C, 0x8B, 0xBD, -stacksize + 24); + + EMIT1(0xC9); /* leave */ + EMIT1(0xC3); /* ret */ + break; + default: - /* hmm, too complex filter, give up with jit compiler */ + /* By design x64 JIT should support all BPF instructions + * This error will be seen if new instruction was added + * to interpreter, but not to JIT + * or if there is junk in sk_filter + */ + pr_err("bpf_jit: unknown opcode %02x\n", insn->code); return -EINVAL; } + ilen = prog - temp; if (image) { if (unlikely(proglen + ilen > oldproglen)) { - pr_err("bpb_jit_compile fatal error\n"); + pr_err("bpf_jit_compile fatal error\n"); return -EFAULT; } memcpy(image + proglen, temp, ilen); @@ -726,21 +859,14 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; addrs[i] = proglen; prog = temp; } - /* last bpf instruction is always a RET : - * use it to give the cleanup instruction(s) addr - */ - ctx->cleanup_addr = proglen - 1; /* ret */ - if (seen_or_pass0) - ctx->cleanup_addr -= 1; /* leaveq */ - if (seen_or_pass0 & SEEN_XREG) - ctx->cleanup_addr -= 4; /* mov -8(%rbp),%rbx */ - - ctx->seen = seen; - return proglen; } void bpf_jit_compile(struct sk_filter *prog) +{ +} + +void bpf_int_jit_compile(struct sk_filter *prog) { struct bpf_binary_header *header = NULL; int proglen, oldproglen = 0; @@ -768,8 +894,6 @@ void bpf_jit_compile(struct sk_filter *prog) addrs[i] = proglen; } ctx.cleanup_addr = proglen; - ctx.seen = SEEN_XREG | SEEN_DATAREF | SEEN_MEM; - ctx.pc_ret0 = -1; for (pass = 0; pass < 10; pass++) { proglen = do_jit(prog, addrs, image, oldproglen, &ctx); diff --git a/include/linux/filter.h b/include/linux/filter.h index 4457b383961c..9d5ae0a2c954 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -207,6 +207,9 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to); void sk_filter_charge(struct sock *sk, struct sk_filter *fp); void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); +u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); +void bpf_int_jit_compile(struct sk_filter *fp); + #ifdef CONFIG_BPF_JIT #include #include diff --git a/net/core/filter.c b/net/core/filter.c index c442a0d7d0f7..32c5b44c537e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1524,6 +1524,10 @@ out_err: return ERR_PTR(err); } +void __weak bpf_int_jit_compile(struct sk_filter *prog) +{ +} + static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, struct sock *sk) { @@ -1544,9 +1548,12 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, /* JIT compiler couldn't process this filter, so do the * internal BPF translation for the optimized interpreter. */ - if (!fp->jited) + if (!fp->jited) { fp = __sk_migrate_filter(fp, sk); + /* Probe if internal BPF can be jit-ed */ + bpf_int_jit_compile(fp); + } return fp; } -- cgit v1.2.3-71-gd317 From ad222799bec32a2db99c12b4dfa5dc19a1f6eaac Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 2 May 2014 13:22:19 +1000 Subject: drm: fix memory leak around mode_group (v2) This mode group id_list was never being freed. v2: take David's suggestion to free in minor_free. Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc.c | 6 ++++++ drivers/gpu/drm/drm_stub.c | 1 + include/drm/drm_crtc.h | 1 + 3 files changed, 8 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index d8b7099abece..a3fe32439a5b 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -1378,6 +1378,12 @@ static int drm_mode_group_init(struct drm_device *dev, struct drm_mode_group *gr return 0; } +void drm_mode_group_destroy(struct drm_mode_group *group) +{ + kfree(group->id_list); + group->id_list = NULL; +} + /* * NOTE: Driver's shouldn't ever call drm_mode_group_init_legacy_group - it is * the drm core's responsibility to set up mode control groups. diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c index 1447b0ee3676..3727ac8bc310 100644 --- a/drivers/gpu/drm/drm_stub.c +++ b/drivers/gpu/drm/drm_stub.c @@ -294,6 +294,7 @@ static void drm_minor_free(struct drm_device *dev, unsigned int type) slot = drm_minor_get_slot(dev, type); if (*slot) { + drm_mode_group_destroy(&(*slot)->mode_group); kfree(*slot); *slot = NULL; } diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index e55fccbe7c42..c6b9e8ab0a26 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -915,6 +915,7 @@ extern const char *drm_get_tv_subconnector_name(int val); extern const char *drm_get_tv_select_name(int val); extern void drm_fb_release(struct drm_file *file_priv); extern int drm_mode_group_init_legacy_group(struct drm_device *dev, struct drm_mode_group *group); +extern void drm_mode_group_destroy(struct drm_mode_group *group); extern bool drm_probe_ddc(struct i2c_adapter *adapter); extern struct edid *drm_get_edid(struct drm_connector *connector, struct i2c_adapter *adapter); -- cgit v1.2.3-71-gd317 From 31ad169148df2252a774c73c504aff43bfa4b656 Mon Sep 17 00:00:00 2001 From: Andrzej Kaczmarek Date: Wed, 14 May 2014 13:43:02 +0200 Subject: Bluetooth: Add conn info lifetime parameters to debugfs This patch adds conn_info_min_age and conn_info_max_age parameters to debugfs which determine lifetime of connection information. Actual lifetime will be random value between min and max age. Default values for min and max age are 1000ms and 3000ms respectively. Signed-off-by: Andrzej Kaczmarek Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 6 ++++ net/bluetooth/hci_core.c | 63 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 211bad6a3366..4623f45c8892 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -145,6 +145,10 @@ struct oob_data { /* Default LE RPA expiry time, 15 minutes */ #define HCI_DEFAULT_RPA_TIMEOUT (15 * 60) +/* Default min/max age of connection information (1s/3s) */ +#define DEFAULT_CONN_INFO_MIN_AGE 1000 +#define DEFAULT_CONN_INFO_MAX_AGE 3000 + struct amp_assoc { __u16 len; __u16 offset; @@ -200,6 +204,8 @@ struct hci_dev { __u16 le_conn_min_interval; __u16 le_conn_max_interval; __u16 discov_interleaved_timeout; + __u16 conn_info_min_age; + __u16 conn_info_max_age; __u8 ssp_debug_mode; __u16 devid_source; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index d31f144860d1..313bd1c164d6 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -579,6 +579,62 @@ static int sniff_max_interval_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(sniff_max_interval_fops, sniff_max_interval_get, sniff_max_interval_set, "%llu\n"); +static int conn_info_min_age_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val == 0 || val > hdev->conn_info_max_age) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->conn_info_min_age = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int conn_info_min_age_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->conn_info_min_age; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(conn_info_min_age_fops, conn_info_min_age_get, + conn_info_min_age_set, "%llu\n"); + +static int conn_info_max_age_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val == 0 || val < hdev->conn_info_min_age) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->conn_info_max_age = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int conn_info_max_age_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->conn_info_max_age; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(conn_info_max_age_fops, conn_info_max_age_get, + conn_info_max_age_set, "%llu\n"); + static int identity_show(struct seq_file *f, void *p) { struct hci_dev *hdev = f->private; @@ -1754,6 +1810,11 @@ static int __hci_init(struct hci_dev *hdev) &blacklist_fops); debugfs_create_file("uuids", 0444, hdev->debugfs, hdev, &uuids_fops); + debugfs_create_file("conn_info_min_age", 0644, hdev->debugfs, hdev, + &conn_info_min_age_fops); + debugfs_create_file("conn_info_max_age", 0644, hdev->debugfs, hdev, + &conn_info_max_age_fops); + if (lmp_bredr_capable(hdev)) { debugfs_create_file("inquiry_cache", 0444, hdev->debugfs, hdev, &inquiry_cache_fops); @@ -3789,6 +3850,8 @@ struct hci_dev *hci_alloc_dev(void) hdev->rpa_timeout = HCI_DEFAULT_RPA_TIMEOUT; hdev->discov_interleaved_timeout = DISCOV_INTERLEAVED_TIMEOUT; + hdev->conn_info_min_age = DEFAULT_CONN_INFO_MIN_AGE; + hdev->conn_info_max_age = DEFAULT_CONN_INFO_MAX_AGE; mutex_init(&hdev->lock); mutex_init(&hdev->req_lock); -- cgit v1.2.3-71-gd317 From dd9838087b8c2b45c7976e46290749732d7af9d5 Mon Sep 17 00:00:00 2001 From: Andrzej Kaczmarek Date: Wed, 14 May 2014 13:43:03 +0200 Subject: Bluetooth: Add support to get connection information This patch adds support for Get Connection Information mgmt command which can be used to query for information about connection, i.e. RSSI and local TX power level. In general values cached in hci_conn are returned as long as they are considered valid, i.e. do not exceed age limit set in hdev. This limit is calculated as random value between min/max values to avoid client trying to guess when to poll for updated information. Signed-off-by: Andrzej Kaczmarek Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 + include/net/bluetooth/mgmt.h | 12 +++ net/bluetooth/mgmt.c | 196 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 210 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 4623f45c8892..cbbab6327621 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -384,6 +384,8 @@ struct hci_conn { __s8 tx_power; unsigned long flags; + unsigned long conn_info_timestamp; + __u8 remote_cap; __u8 remote_auth; __u8 remote_id; diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index d4b571c2f9fd..226ae03cafe7 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -409,6 +409,18 @@ struct mgmt_cp_load_irks { } __packed; #define MGMT_LOAD_IRKS_SIZE 2 +#define MGMT_OP_GET_CONN_INFO 0x0031 +struct mgmt_cp_get_conn_info { + struct mgmt_addr_info addr; +} __packed; +#define MGMT_GET_CONN_INFO_SIZE MGMT_ADDR_INFO_SIZE +struct mgmt_rp_get_conn_info { + struct mgmt_addr_info addr; + __s8 rssi; + __s8 tx_power; + __s8 max_tx_power; +} __packed; + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f2a9422d6139..0e5a316fafbf 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -83,6 +83,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_SET_DEBUG_KEYS, MGMT_OP_SET_PRIVACY, MGMT_OP_LOAD_IRKS, + MGMT_OP_GET_CONN_INFO, }; static const u16 mgmt_events[] = { @@ -4557,6 +4558,200 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, return err; } +struct cmd_conn_lookup { + struct hci_conn *conn; + bool valid_tx_power; + u8 mgmt_status; +}; + +static void get_conn_info_complete(struct pending_cmd *cmd, void *data) +{ + struct cmd_conn_lookup *match = data; + struct mgmt_cp_get_conn_info *cp; + struct mgmt_rp_get_conn_info rp; + struct hci_conn *conn = cmd->user_data; + + if (conn != match->conn) + return; + + cp = (struct mgmt_cp_get_conn_info *) cmd->param; + + memset(&rp, 0, sizeof(rp)); + bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); + rp.addr.type = cp->addr.type; + + if (!match->mgmt_status) { + rp.rssi = conn->rssi; + + if (match->valid_tx_power) + rp.tx_power = conn->tx_power; + else + rp.tx_power = HCI_TX_POWER_INVALID; + + rp.max_tx_power = HCI_TX_POWER_INVALID; + } + + cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, + match->mgmt_status, &rp, sizeof(rp)); + + hci_conn_drop(conn); + + mgmt_pending_remove(cmd); +} + +static void conn_info_refresh_complete(struct hci_dev *hdev, u8 status) +{ + struct hci_cp_read_rssi *cp; + struct hci_conn *conn; + struct cmd_conn_lookup match; + u16 handle; + + BT_DBG("status 0x%02x", status); + + hci_dev_lock(hdev); + + /* TX power data is valid in case request completed successfully, + * otherwise we assume it's not valid. + */ + match.valid_tx_power = !status; + + /* Commands sent in request are either Read RSSI or Read Transmit Power + * Level so we check which one was last sent to retrieve connection + * handle. Both commands have handle as first parameter so it's safe to + * cast data on the same command struct. + * + * First command sent is always Read RSSI and we fail only if it fails. + * In other case we simply override error to indicate success as we + * already remembered if TX power value is actually valid. + */ + cp = hci_sent_cmd_data(hdev, HCI_OP_READ_RSSI); + if (!cp) { + cp = hci_sent_cmd_data(hdev, HCI_OP_READ_TX_POWER); + status = 0; + } + + if (!cp) { + BT_ERR("invalid sent_cmd in response"); + goto unlock; + } + + handle = __le16_to_cpu(cp->handle); + conn = hci_conn_hash_lookup_handle(hdev, handle); + if (!conn) { + BT_ERR("unknown handle (%d) in response", handle); + goto unlock; + } + + match.conn = conn; + match.mgmt_status = mgmt_status(status); + + /* Cache refresh is complete, now reply for mgmt request for given + * connection only. + */ + mgmt_pending_foreach(MGMT_OP_GET_CONN_INFO, hdev, + get_conn_info_complete, &match); + +unlock: + hci_dev_unlock(hdev); +} + +static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data, + u16 len) +{ + struct mgmt_cp_get_conn_info *cp = data; + struct mgmt_rp_get_conn_info rp; + struct hci_conn *conn; + unsigned long conn_info_age; + int err = 0; + + BT_DBG("%s", hdev->name); + + memset(&rp, 0, sizeof(rp)); + bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); + rp.addr.type = cp->addr.type; + + if (!bdaddr_type_is_valid(cp->addr.type)) + return cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); + + hci_dev_lock(hdev); + + if (!hdev_is_powered(hdev)) { + err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, + MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); + goto unlock; + } + + if (cp->addr.type == BDADDR_BREDR) + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, + &cp->addr.bdaddr); + else + conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr); + + if (!conn || conn->state != BT_CONNECTED) { + err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, + MGMT_STATUS_NOT_CONNECTED, &rp, sizeof(rp)); + goto unlock; + } + + /* To avoid client trying to guess when to poll again for information we + * calculate conn info age as random value between min/max set in hdev. + */ + conn_info_age = hdev->conn_info_min_age + + prandom_u32_max(hdev->conn_info_max_age - + hdev->conn_info_min_age); + + /* Query controller to refresh cached values if they are too old or were + * never read. + */ + if (time_after(jiffies, conn->conn_info_timestamp + conn_info_age) || + !conn->conn_info_timestamp) { + struct hci_request req; + struct hci_cp_read_tx_power req_txp_cp; + struct hci_cp_read_rssi req_rssi_cp; + struct pending_cmd *cmd; + + hci_req_init(&req, hdev); + req_rssi_cp.handle = cpu_to_le16(conn->handle); + hci_req_add(&req, HCI_OP_READ_RSSI, sizeof(req_rssi_cp), + &req_rssi_cp); + + req_txp_cp.handle = cpu_to_le16(conn->handle); + req_txp_cp.type = 0x00; + hci_req_add(&req, HCI_OP_READ_TX_POWER, + sizeof(req_txp_cp), &req_txp_cp); + + err = hci_req_run(&req, conn_info_refresh_complete); + if (err < 0) + goto unlock; + + cmd = mgmt_pending_add(sk, MGMT_OP_GET_CONN_INFO, hdev, + data, len); + if (!cmd) { + err = -ENOMEM; + goto unlock; + } + + hci_conn_hold(conn); + cmd->user_data = conn; + + conn->conn_info_timestamp = jiffies; + } else { + /* Cache is valid, just reply with values cached in hci_conn */ + rp.rssi = conn->rssi; + rp.tx_power = conn->tx_power; + rp.max_tx_power = HCI_TX_POWER_INVALID; + + err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, + MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); + } + +unlock: + hci_dev_unlock(hdev); + return err; +} + static const struct mgmt_handler { int (*func) (struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len); @@ -4612,6 +4807,7 @@ static const struct mgmt_handler { { set_debug_keys, false, MGMT_SETTING_SIZE }, { set_privacy, false, MGMT_SET_PRIVACY_SIZE }, { load_irks, true, MGMT_LOAD_IRKS_SIZE }, + { get_conn_info, false, MGMT_GET_CONN_INFO_SIZE }, }; -- cgit v1.2.3-71-gd317 From d0455ed996df84fd2670a655fe13ab72f8264765 Mon Sep 17 00:00:00 2001 From: Andrzej Kaczmarek Date: Wed, 14 May 2014 13:43:05 +0200 Subject: Bluetooth: Store max TX power level for connection This patch adds support to store local maximum TX power level for connection when reply for HCI_Read_Transmit_Power_Level is received. Signed-off-by: Andrzej Kaczmarek Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_conn.c | 1 + net/bluetooth/hci_event.c | 12 +++++++++++- 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index cbbab6327621..b386bf17e6c2 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -382,6 +382,7 @@ struct hci_conn { __u16 le_conn_max_interval; __s8 rssi; __s8 tx_power; + __s8 max_tx_power; unsigned long flags; unsigned long conn_info_timestamp; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 74b368bfe102..a987e7def025 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -408,6 +408,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) conn->remote_auth = 0xff; conn->key_type = 0xff; conn->tx_power = HCI_TX_POWER_INVALID; + conn->max_tx_power = HCI_TX_POWER_INVALID; set_bit(HCI_CONN_POWER_SAVE, &conn->flags); conn->disc_timeout = HCI_DISCONN_TIMEOUT; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index fa614e3430a7..492d8d5071c7 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1282,9 +1282,19 @@ static void hci_cc_read_tx_power(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle)); - if (conn && sent->type == 0x00) + if (!conn) + goto unlock; + + switch (sent->type) { + case 0x00: conn->tx_power = rp->tx_power; + break; + case 0x01: + conn->max_tx_power = rp->tx_power; + break; + } +unlock: hci_dev_unlock(hdev); } -- cgit v1.2.3-71-gd317 From 3b514d24e200fcdcde0a57c354a51d3677a86743 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 16 May 2014 13:22:47 -0400 Subject: cgroup: skip refcnting on normal root csses and cgrp_dfl_root self css 9395a4500404 ("cgroup: enable refcnting for root csses") enabled reference counting for root csses (cgroup_subsys_states) so that cgroup's self csses can be used to manage the lifetime of the containing cgroups. Unfortunately, this change was incorrect. During early init, cgrp_dfl_root self css refcnt is used. percpu_ref can't initialized during early init and its initialization is deferred till cgroup_init() time. This means that cpu was using percpu_ref which wasn't properly initialized. Due to the way percpu variables are laid out on x86, this didn't blow up immediately on x86 but ended up incrementing and decrementing the percpu variable at offset zero, whatever it may be; however, on other archs, this caused fault and early boot failure. As cgroup self csses for root cgroups of non-dfl hierarchies need working refcounting, we can't revert 9395a4500404. This patch adds CSS_NO_REF which explicitly inhibits reference counting on the css and sets it on all normal (non-self) csses and cgroup_dfl_root self css. v2: cgrp_dfl_root.self is the offending one. Set the flag on it. Signed-off-by: Tejun Heo Reported-by: Stephen Warren Tested-by: Stephen Warren Fixes: 9395a4500404 ("cgroup: enable refcnting for root csses") --- include/linux/cgroup.h | 11 ++++++++--- kernel/cgroup.c | 11 +++++++++-- 2 files changed, 17 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 76dadd77a120..1737db0c63fe 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -77,6 +77,7 @@ struct cgroup_subsys_state { /* bits in struct cgroup_subsys_state flags field */ enum { + CSS_NO_REF = (1 << 0), /* no reference counting for this css */ CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ }; @@ -88,7 +89,8 @@ enum { */ static inline void css_get(struct cgroup_subsys_state *css) { - percpu_ref_get(&css->refcnt); + if (!(css->flags & CSS_NO_REF)) + percpu_ref_get(&css->refcnt); } /** @@ -103,7 +105,9 @@ static inline void css_get(struct cgroup_subsys_state *css) */ static inline bool css_tryget_online(struct cgroup_subsys_state *css) { - return percpu_ref_tryget_live(&css->refcnt); + if (!(css->flags & CSS_NO_REF)) + return percpu_ref_tryget_live(&css->refcnt); + return true; } /** @@ -114,7 +118,8 @@ static inline bool css_tryget_online(struct cgroup_subsys_state *css) */ static inline void css_put(struct cgroup_subsys_state *css) { - percpu_ref_put(&css->refcnt); + if (!(css->flags & CSS_NO_REF)) + percpu_ref_put(&css->refcnt); } /* bits in struct cgroup flags field */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index c01e8e8dfad0..0343d7ee6d62 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4593,11 +4593,17 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) /* We don't handle early failures gracefully */ BUG_ON(IS_ERR(css)); init_and_link_css(css, ss, &cgrp_dfl_root.cgrp); + + /* + * Root csses are never destroyed and we can't initialize + * percpu_ref during early init. Disable refcnting. + */ + css->flags |= CSS_NO_REF; + if (early) { /* allocation can't be done safely during early init */ css->id = 1; } else { - BUG_ON(percpu_ref_init(&css->refcnt, css_release)); css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL); BUG_ON(css->id < 0); } @@ -4636,6 +4642,8 @@ int __init cgroup_init_early(void) int i; init_cgroup_root(&cgrp_dfl_root, &opts); + cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF; + RCU_INIT_POINTER(init_task.cgroups, &init_css_set); for_each_subsys(ss, i) { @@ -4684,7 +4692,6 @@ int __init cgroup_init(void) struct cgroup_subsys_state *css = init_css_set.subsys[ss->id]; - BUG_ON(percpu_ref_init(&css->refcnt, css_release)); css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL); BUG_ON(css->id < 0); -- cgit v1.2.3-71-gd317 From 5c9d535b893f30266ea29fe377cb9b002fcd76aa Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 16 May 2014 13:22:48 -0400 Subject: cgroup: remove css_parent() cgroup in general is moving towards using cgroup_subsys_state as the fundamental structural component and css_parent() was introduced to convert from using cgroup->parent to css->parent. It was quite some time ago and we're moving forward with making css more prominent. This patch drops the trivial wrapper css_parent() and let the users dereference css->parent. While at it, explicitly mark fields of css which are public and immutable. v2: New usage from device_cgroup.c converted. Signed-off-by: Tejun Heo Acked-by: Michal Hocko Acked-by: Neil Horman Acked-by: "David S. Miller" Acked-by: Li Zefan Cc: Vivek Goyal Cc: Jens Axboe Cc: Peter Zijlstra Cc: Johannes Weiner --- block/blk-cgroup.h | 2 +- include/linux/cgroup.h | 29 +++++++++++------------------ kernel/cgroup.c | 8 ++++---- kernel/cgroup_freezer.c | 2 +- kernel/cpuset.c | 2 +- kernel/sched/core.c | 2 +- kernel/sched/cpuacct.c | 2 +- mm/hugetlb_cgroup.c | 2 +- mm/memcontrol.c | 14 +++++++------- net/core/netclassid_cgroup.c | 2 +- net/core/netprio_cgroup.c | 2 +- security/device_cgroup.c | 8 ++++---- 12 files changed, 34 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 371fe8e92ab5..d692b29c083a 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -204,7 +204,7 @@ static inline struct blkcg *bio_blkcg(struct bio *bio) */ static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) { - return css_to_blkcg(css_parent(&blkcg->css)); + return css_to_blkcg(blkcg->css.parent); } /** diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 1737db0c63fe..2549493d518d 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -48,22 +48,28 @@ enum cgroup_subsys_id { }; #undef SUBSYS -/* Per-subsystem/per-cgroup state maintained by the system. */ +/* + * Per-subsystem/per-cgroup state maintained by the system. This is the + * fundamental structural building block that controllers deal with. + * + * Fields marked with "PI:" are public and immutable and may be accessed + * directly without synchronization. + */ struct cgroup_subsys_state { - /* the cgroup that this css is attached to */ + /* PI: the cgroup that this css is attached to */ struct cgroup *cgroup; - /* the cgroup subsystem that this css is attached to */ + /* PI: the cgroup subsystem that this css is attached to */ struct cgroup_subsys *ss; /* reference count - access via css_[try]get() and css_put() */ struct percpu_ref refcnt; - /* the parent css */ + /* PI: the parent css */ struct cgroup_subsys_state *parent; /* - * Subsys-unique ID. 0 is unused and root is always 1. The + * PI: Subsys-unique ID. 0 is unused and root is always 1. The * matching css can be looked up using css_from_id(). */ int id; @@ -669,19 +675,6 @@ struct cgroup_subsys { #include #undef SUBSYS -/** - * css_parent - find the parent css - * @css: the target cgroup_subsys_state - * - * Return the parent css of @css. This function is guaranteed to return - * non-NULL parent as long as @css isn't the root. - */ -static inline -struct cgroup_subsys_state *css_parent(struct cgroup_subsys_state *css) -{ - return css->parent; -} - /** * task_css_set_check - obtain a task's css_set with extra access conditions * @task: the task to obtain css_set for diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 0343d7ee6d62..929bbbc539e9 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3176,10 +3176,10 @@ css_next_descendant_pre(struct cgroup_subsys_state *pos, /* no child, visit my or the closest ancestor's next sibling */ while (pos != root) { - next = css_next_child(pos, css_parent(pos)); + next = css_next_child(pos, pos->parent); if (next) return next; - pos = css_parent(pos); + pos = pos->parent; } return NULL; @@ -3261,12 +3261,12 @@ css_next_descendant_post(struct cgroup_subsys_state *pos, return NULL; /* if there's an unvisited sibling, visit its leftmost descendant */ - next = css_next_child(pos, css_parent(pos)); + next = css_next_child(pos, pos->parent); if (next) return css_leftmost_descendant(next); /* no sibling left, visit parent */ - return css_parent(pos); + return pos->parent; } static bool cgroup_has_live_children(struct cgroup *cgrp) diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 6b4e60e33a9a..a79e40f9d700 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -59,7 +59,7 @@ static inline struct freezer *task_freezer(struct task_struct *task) static struct freezer *parent_freezer(struct freezer *freezer) { - return css_freezer(css_parent(&freezer->css)); + return css_freezer(freezer->css.parent); } bool cgroup_freezing(struct task_struct *task) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 2f4b08b8db24..5b2a31082f4f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -124,7 +124,7 @@ static inline struct cpuset *task_cs(struct task_struct *task) static inline struct cpuset *parent_cs(struct cpuset *cs) { - return css_cs(css_parent(&cs->css)); + return css_cs(cs->css.parent); } #ifdef CONFIG_NUMA diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 268a45ea238c..ac61ad1a5f9f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7586,7 +7586,7 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) static int cpu_cgroup_css_online(struct cgroup_subsys_state *css) { struct task_group *tg = css_tg(css); - struct task_group *parent = css_tg(css_parent(css)); + struct task_group *parent = css_tg(css->parent); if (parent) sched_online_group(tg, parent); diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index c143ee380e3a..9cf350c94ec4 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c @@ -46,7 +46,7 @@ static inline struct cpuacct *task_ca(struct task_struct *tsk) static inline struct cpuacct *parent_ca(struct cpuacct *ca) { - return css_ca(css_parent(&ca->css)); + return css_ca(ca->css.parent); } static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage); diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index a380681ab3cf..493f758445e7 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -52,7 +52,7 @@ static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg) static inline struct hugetlb_cgroup * parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg) { - return hugetlb_cgroup_from_css(css_parent(&h_cg->css)); + return hugetlb_cgroup_from_css(h_cg->css.parent); } static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b638a79209ee..a5e0417b4f9a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1540,7 +1540,7 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg) int mem_cgroup_swappiness(struct mem_cgroup *memcg) { /* root ? */ - if (!css_parent(&memcg->css)) + if (!memcg->css.parent) return vm_swappiness; return memcg->swappiness; @@ -4909,7 +4909,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css, { int retval = 0; struct mem_cgroup *memcg = mem_cgroup_from_css(css); - struct mem_cgroup *parent_memcg = mem_cgroup_from_css(css_parent(&memcg->css)); + struct mem_cgroup *parent_memcg = mem_cgroup_from_css(memcg->css.parent); mutex_lock(&memcg_create_mutex); @@ -5207,8 +5207,8 @@ static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg, if (!memcg->use_hierarchy) goto out; - while (css_parent(&memcg->css)) { - memcg = mem_cgroup_from_css(css_parent(&memcg->css)); + while (memcg->css.parent) { + memcg = mem_cgroup_from_css(memcg->css.parent); if (!memcg->use_hierarchy) break; tmp = res_counter_read_u64(&memcg->res, RES_LIMIT); @@ -5443,7 +5443,7 @@ static int mem_cgroup_swappiness_write(struct cgroup_subsys_state *css, struct cftype *cft, u64 val) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); - struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(&memcg->css)); + struct mem_cgroup *parent = mem_cgroup_from_css(memcg->css.parent); if (val > 100 || !parent) return -EINVAL; @@ -5790,7 +5790,7 @@ static int mem_cgroup_oom_control_write(struct cgroup_subsys_state *css, struct cftype *cft, u64 val) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); - struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(&memcg->css)); + struct mem_cgroup *parent = mem_cgroup_from_css(memcg->css.parent); /* cannot set to root cgroup and only 0 and 1 are allowed */ if (!parent || !((val == 0) || (val == 1))) @@ -6407,7 +6407,7 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); - struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css)); + struct mem_cgroup *parent = mem_cgroup_from_css(css->parent); if (css->id > MEM_CGROUP_ID_MAX) return -ENOSPC; diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 22931e1b99b4..30d903b19c62 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -42,7 +42,7 @@ cgrp_css_alloc(struct cgroup_subsys_state *parent_css) static int cgrp_css_online(struct cgroup_subsys_state *css) { struct cgroup_cls_state *cs = css_cls_state(css); - struct cgroup_cls_state *parent = css_cls_state(css_parent(css)); + struct cgroup_cls_state *parent = css_cls_state(css->parent); if (parent) cs->classid = parent->classid; diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index b990cefd906b..2f385b9bccc0 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -140,7 +140,7 @@ cgrp_css_alloc(struct cgroup_subsys_state *parent_css) static int cgrp_css_online(struct cgroup_subsys_state *css) { - struct cgroup_subsys_state *parent_css = css_parent(css); + struct cgroup_subsys_state *parent_css = css->parent; struct net_device *dev; int ret = 0; diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 7dbac4061b1c..ce14a31b1337 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -182,7 +182,7 @@ static inline bool is_devcg_online(const struct dev_cgroup *devcg) static int devcgroup_online(struct cgroup_subsys_state *css) { struct dev_cgroup *dev_cgroup = css_to_devcgroup(css); - struct dev_cgroup *parent_dev_cgroup = css_to_devcgroup(css_parent(css)); + struct dev_cgroup *parent_dev_cgroup = css_to_devcgroup(css->parent); int ret = 0; mutex_lock(&devcgroup_mutex); @@ -455,7 +455,7 @@ static bool verify_new_ex(struct dev_cgroup *dev_cgroup, static int parent_has_perm(struct dev_cgroup *childcg, struct dev_exception_item *ex) { - struct dev_cgroup *parent = css_to_devcgroup(css_parent(&childcg->css)); + struct dev_cgroup *parent = css_to_devcgroup(childcg->css.parent); if (!parent) return 1; @@ -476,7 +476,7 @@ static int parent_has_perm(struct dev_cgroup *childcg, static bool parent_allows_removal(struct dev_cgroup *childcg, struct dev_exception_item *ex) { - struct dev_cgroup *parent = css_to_devcgroup(css_parent(&childcg->css)); + struct dev_cgroup *parent = css_to_devcgroup(childcg->css.parent); if (!parent) return true; @@ -614,7 +614,7 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, char temp[12]; /* 11 + 1 characters needed for a u32 */ int count, rc = 0; struct dev_exception_item ex; - struct dev_cgroup *parent = css_to_devcgroup(css_parent(&devcgroup->css)); + struct dev_cgroup *parent = css_to_devcgroup(devcgroup->css.parent); if (!capable(CAP_SYS_ADMIN)) return -EPERM; -- cgit v1.2.3-71-gd317 From d51f39b05ce0008118c45945e681b20484990571 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 16 May 2014 13:22:48 -0400 Subject: cgroup: remove cgroup->parent cgroup->parent is redundant as cgroup->self.parent can also be used to determine the parent cgroup and we're moving towards using cgroup_subsys_states as the fundamental structural blocks. This patch introduces cgroup_parent() which follows cgroup->self.parent and removes cgroup->parent. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 1 - kernel/cgroup.c | 52 +++++++++++++++++++++++++++++--------------------- 2 files changed, 30 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 2549493d518d..fd538f4c2bb6 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -178,7 +178,6 @@ struct cgroup { struct list_head sibling; /* my parent's children */ struct list_head children; /* my children */ - struct cgroup *parent; /* my parent */ struct kernfs_node *kn; /* cgroup kernfs entry */ struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 929bbbc539e9..8c67a739aea4 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -218,6 +218,15 @@ static void cgroup_idr_remove(struct idr *idr, int id) spin_unlock_bh(&cgroup_idr_lock); } +static struct cgroup *cgroup_parent(struct cgroup *cgrp) +{ + struct cgroup_subsys_state *parent_css = cgrp->self.parent; + + if (parent_css) + return container_of(parent_css, struct cgroup, self); + return NULL; +} + /** * cgroup_css - obtain a cgroup's css for the specified subsystem * @cgrp: the cgroup of interest @@ -260,9 +269,9 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, if (!(cgrp->root->subsys_mask & (1 << ss->id))) return NULL; - while (cgrp->parent && - !(cgrp->parent->child_subsys_mask & (1 << ss->id))) - cgrp = cgrp->parent; + while (cgroup_parent(cgrp) && + !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ss->id))) + cgrp = cgroup_parent(cgrp); return cgroup_css(cgrp, ss); } @@ -307,7 +316,7 @@ bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor) while (cgrp) { if (cgrp == ancestor) return true; - cgrp = cgrp->parent; + cgrp = cgroup_parent(cgrp); } return false; } @@ -454,7 +463,7 @@ static void cgroup_update_populated(struct cgroup *cgrp, bool populated) if (cgrp->populated_kn) kernfs_notify(cgrp->populated_kn); - cgrp = cgrp->parent; + cgrp = cgroup_parent(cgrp); } while (cgrp); } @@ -2018,7 +2027,7 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp, * Except for the root, child_subsys_mask must be zero for a cgroup * with tasks so that child cgroups don't compete against tasks. */ - if (dst_cgrp && cgroup_on_dfl(dst_cgrp) && dst_cgrp->parent && + if (dst_cgrp && cgroup_on_dfl(dst_cgrp) && cgroup_parent(dst_cgrp) && dst_cgrp->child_subsys_mask) return -EBUSY; @@ -2427,7 +2436,7 @@ static int cgroup_controllers_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; - cgroup_print_ss_mask(seq, cgrp->parent->child_subsys_mask); + cgroup_print_ss_mask(seq, cgroup_parent(cgrp)->child_subsys_mask); return 0; } @@ -2610,8 +2619,8 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, /* unavailable or not enabled on the parent? */ if (!(cgrp_dfl_root.subsys_mask & (1 << ssid)) || - (cgrp->parent && - !(cgrp->parent->child_subsys_mask & (1 << ssid)))) { + (cgroup_parent(cgrp) && + !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ssid)))) { ret = -ENOENT; goto out_unlock; } @@ -2640,7 +2649,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, * Except for the root, child_subsys_mask must be zero for a cgroup * with tasks so that child cgroups don't compete against tasks. */ - if (enable && cgrp->parent && !list_empty(&cgrp->cset_links)) { + if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) { ret = -EBUSY; goto out_unlock; } @@ -2898,9 +2907,9 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], continue; if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp)) continue; - if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent) + if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp)) continue; - if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent) + if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgroup_parent(cgrp)) continue; if (is_add) { @@ -4092,14 +4101,14 @@ static void css_free_work_fn(struct work_struct *work) atomic_dec(&cgrp->root->nr_cgrps); cgroup_pidlist_destroy_all(cgrp); - if (cgrp->parent) { + if (cgroup_parent(cgrp)) { /* * We get a ref to the parent, and put the ref when * this cgroup is being freed, so it's guaranteed * that the parent won't be destroyed before its * children. */ - cgroup_put(cgrp->parent); + cgroup_put(cgroup_parent(cgrp)); kernfs_put(cgrp->kn); kfree(cgrp); } else { @@ -4163,8 +4172,8 @@ static void init_and_link_css(struct cgroup_subsys_state *css, css->ss = ss; css->flags = 0; - if (cgrp->parent) { - css->parent = cgroup_css(cgrp->parent, ss); + if (cgroup_parent(cgrp)) { + css->parent = cgroup_css(cgroup_parent(cgrp), ss); css_get(css->parent); } @@ -4218,7 +4227,7 @@ static void offline_css(struct cgroup_subsys_state *css) */ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss) { - struct cgroup *parent = cgrp->parent; + struct cgroup *parent = cgroup_parent(cgrp); struct cgroup_subsys_state *css; int err; @@ -4251,7 +4260,7 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss) goto err_clear_dir; if (ss->broken_hierarchy && !ss->warned_broken_hierarchy && - parent->parent) { + cgroup_parent(parent)) { pr_warn("%s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n", current->comm, current->pid, ss->name); if (!strcmp(ss->name, "memory")) @@ -4309,7 +4318,6 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, init_cgroup_housekeeping(cgrp); - cgrp->parent = parent; cgrp->self.parent = &parent->self; cgrp->root = root; @@ -4336,7 +4344,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, cgrp->serial_nr = cgroup_serial_nr_next++; /* allocation complete, commit to creation */ - list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children); + list_add_tail_rcu(&cgrp->sibling, &cgroup_parent(cgrp)->children); atomic_inc(&root->nr_cgrps); cgroup_get(parent); @@ -4531,8 +4539,8 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) */ kernfs_remove(cgrp->kn); - set_bit(CGRP_RELEASABLE, &cgrp->parent->flags); - check_for_release(cgrp->parent); + set_bit(CGRP_RELEASABLE, &cgroup_parent(cgrp)->flags); + check_for_release(cgroup_parent(cgrp)); /* put the base reference */ percpu_ref_kill(&cgrp->self.refcnt); -- cgit v1.2.3-71-gd317 From d5c419b68e368fdd9f1857bf8d4bb4480edb9b80 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 16 May 2014 13:22:48 -0400 Subject: cgroup: move cgroup->sibling and ->children into cgroup_subsys_state We're moving towards using cgroup_subsys_states as the fundamental structural blocks. Let's move cgroup->sibling and ->children into cgroup_subsys_state. This is pure move without functional change and only cgroup->self's fields are actually used. Other csses will make use of the fields later. While at it, update init_and_link_css() so that it zeroes the whole css before initializing it and remove explicit zeroing of ->flags. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 11 ++++------- kernel/cgroup.c | 38 ++++++++++++++++++++------------------ 2 files changed, 24 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index fd538f4c2bb6..cf8ba26b7c6e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -68,6 +68,10 @@ struct cgroup_subsys_state { /* PI: the parent css */ struct cgroup_subsys_state *parent; + /* siblings list anchored at the parent's ->children */ + struct list_head sibling; + struct list_head children; + /* * PI: Subsys-unique ID. 0 is unused and root is always 1. The * matching css can be looked up using css_from_id(). @@ -171,13 +175,6 @@ struct cgroup { */ int populated_cnt; - /* - * We link our 'sibling' struct into our parent's 'children'. - * Our children link their 'sibling' into our 'children'. - */ - struct list_head sibling; /* my parent's children */ - struct list_head children; /* my children */ - struct kernfs_node *kn; /* cgroup kernfs entry */ struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 8c67a739aea4..5385839e727b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -378,7 +378,7 @@ static int notify_on_release(const struct cgroup *cgrp) /* iterate over child cgrps, lock should be held throughout iteration */ #define cgroup_for_each_live_child(child, cgrp) \ - list_for_each_entry((child), &(cgrp)->children, sibling) \ + list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \ if (({ lockdep_assert_held(&cgroup_mutex); \ cgroup_is_dead(child); })) \ ; \ @@ -870,7 +870,7 @@ static void cgroup_destroy_root(struct cgroup_root *root) mutex_lock(&cgroup_mutex); BUG_ON(atomic_read(&root->nr_cgrps)); - BUG_ON(!list_empty(&cgrp->children)); + BUG_ON(!list_empty(&cgrp->self.children)); /* Rebind all subsystems back to the default hierarchy */ rebind_subsystems(&cgrp_dfl_root, root->subsys_mask); @@ -1432,7 +1432,7 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) } /* remounting is not allowed for populated hierarchies */ - if (!list_empty(&root->cgrp.children)) { + if (!list_empty(&root->cgrp.self.children)) { ret = -EBUSY; goto out_unlock; } @@ -1512,8 +1512,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) struct cgroup_subsys *ss; int ssid; - INIT_LIST_HEAD(&cgrp->sibling); - INIT_LIST_HEAD(&cgrp->children); + INIT_LIST_HEAD(&cgrp->self.sibling); + INIT_LIST_HEAD(&cgrp->self.children); INIT_LIST_HEAD(&cgrp->cset_links); INIT_LIST_HEAD(&cgrp->release_list); INIT_LIST_HEAD(&cgrp->pidlists); @@ -1612,7 +1612,7 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask) link_css_set(&tmp_links, cset, root_cgrp); up_write(&css_set_rwsem); - BUG_ON(!list_empty(&root_cgrp->children)); + BUG_ON(!list_empty(&root_cgrp->self.children)); BUG_ON(atomic_read(&root->nr_cgrps) != 1); kernfs_activate(root_cgrp->kn); @@ -3128,11 +3128,11 @@ css_next_child(struct cgroup_subsys_state *pos_css, * cgroup is removed or iteration and removal race. */ if (!pos) { - next = list_entry_rcu(cgrp->children.next, struct cgroup, sibling); + next = list_entry_rcu(cgrp->self.children.next, struct cgroup, self.sibling); } else if (likely(!cgroup_is_dead(pos))) { - next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling); + next = list_entry_rcu(pos->self.sibling.next, struct cgroup, self.sibling); } else { - list_for_each_entry_rcu(next, &cgrp->children, sibling) + list_for_each_entry_rcu(next, &cgrp->self.children, self.sibling) if (next->serial_nr > pos->serial_nr) break; } @@ -3142,12 +3142,12 @@ css_next_child(struct cgroup_subsys_state *pos_css, * the next sibling; however, it might have @ss disabled. If so, * fast-forward to the next enabled one. */ - while (&next->sibling != &cgrp->children) { + while (&next->self.sibling != &cgrp->self.children) { struct cgroup_subsys_state *next_css = cgroup_css(next, parent_css->ss); if (next_css) return next_css; - next = list_entry_rcu(next->sibling.next, struct cgroup, sibling); + next = list_entry_rcu(next->self.sibling.next, struct cgroup, self.sibling); } return NULL; } @@ -3283,7 +3283,7 @@ static bool cgroup_has_live_children(struct cgroup *cgrp) struct cgroup *child; rcu_read_lock(); - list_for_each_entry_rcu(child, &cgrp->children, sibling) { + list_for_each_entry_rcu(child, &cgrp->self.children, self.sibling) { if (!cgroup_is_dead(child)) { rcu_read_unlock(); return true; @@ -4144,7 +4144,7 @@ static void css_release_work_fn(struct work_struct *work) } else { /* cgroup release path */ mutex_lock(&cgroup_mutex); - list_del_rcu(&cgrp->sibling); + list_del_rcu(&cgrp->self.sibling); mutex_unlock(&cgroup_mutex); cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id); @@ -4168,9 +4168,11 @@ static void init_and_link_css(struct cgroup_subsys_state *css, { cgroup_get(cgrp); + memset(css, 0, sizeof(*css)); css->cgroup = cgrp; css->ss = ss; - css->flags = 0; + INIT_LIST_HEAD(&css->sibling); + INIT_LIST_HEAD(&css->children); if (cgroup_parent(cgrp)) { css->parent = cgroup_css(cgroup_parent(cgrp), ss); @@ -4344,7 +4346,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, cgrp->serial_nr = cgroup_serial_nr_next++; /* allocation complete, commit to creation */ - list_add_tail_rcu(&cgrp->sibling, &cgroup_parent(cgrp)->children); + list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children); atomic_inc(&root->nr_cgrps); cgroup_get(parent); @@ -4507,9 +4509,9 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) return -EBUSY; /* - * Make sure there's no live children. We can't test ->children - * emptiness as dead children linger on it while being destroyed; - * otherwise, "rmdir parent/child parent" may fail with -EBUSY. + * Make sure there's no live children. We can't test emptiness of + * ->self.children as dead children linger on it while being + * drained; otherwise, "rmdir parent/child parent" may fail. */ if (cgroup_has_live_children(cgrp)) return -EBUSY; -- cgit v1.2.3-71-gd317 From 0cb51d71c1fa9234afe4213089844be76ec1765a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 16 May 2014 13:22:49 -0400 Subject: cgroup: move cgroup->serial_nr into cgroup_subsys_state We're moving towards using cgroup_subsys_states as the fundamental structural blocks. All csses including the cgroup->self and actual ones now form trees through css->children and ->sibling which follow the same rules as what cgroup->children and ->sibling followed. This patch moves cgroup->serial_nr which is used to implement css iteration into css. Note that all csses, regardless of their types, allocate their serial numbers from the same monotonically increasing counter. This doesn't affect the ordering needed by css iteration or cause any other material behavior changes. This will be used to update css iteration. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 16 ++++++++-------- kernel/cgroup.c | 20 +++++++++++--------- 2 files changed, 19 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index cf8ba26b7c6e..ebe7ce49f4b7 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -80,6 +80,14 @@ struct cgroup_subsys_state { unsigned int flags; + /* + * Monotonically increasing unique serial number which defines a + * uniform order among all csses. It's guaranteed that all + * ->children lists are in the ascending order of ->serial_nr and + * used to allow interrupting and resuming iterations. + */ + u64 serial_nr; + /* percpu_ref killing and RCU release */ struct rcu_head rcu_head; struct work_struct destroy_work; @@ -178,14 +186,6 @@ struct cgroup { struct kernfs_node *kn; /* cgroup kernfs entry */ struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */ - /* - * Monotonically increasing unique serial number which defines a - * uniform order among all cgroups. It's guaranteed that all - * ->children lists are in the ascending order of ->serial_nr. - * It's used to allow interrupting and resuming iterations. - */ - u64 serial_nr; - /* the bitmask of subsystems enabled on the child cgroups */ unsigned int child_subsys_mask; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index dcb06e181ce4..d5af128ec1ec 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -157,14 +157,13 @@ static int cgroup_root_count; static DEFINE_IDR(cgroup_hierarchy_idr); /* - * Assign a monotonically increasing serial number to cgroups. It - * guarantees cgroups with bigger numbers are newer than those with smaller - * numbers. Also, as cgroups are always appended to the parent's - * ->children list, it guarantees that sibling cgroups are always sorted in - * the ascending serial number order on the list. Protected by - * cgroup_mutex. + * Assign a monotonically increasing serial number to csses. It guarantees + * cgroups with bigger numbers are newer than those with smaller numbers. + * Also, as csses are always appended to the parent's ->children list, it + * guarantees that sibling csses are always sorted in the ascending serial + * number order on the list. Protected by cgroup_mutex. */ -static u64 cgroup_serial_nr_next = 1; +static u64 css_serial_nr_next = 1; /* This flag indicates whether tasks in the fork and exit paths should * check for fork/exit handlers to call. This avoids us having to do @@ -3133,7 +3132,7 @@ css_next_child(struct cgroup_subsys_state *pos_css, next = list_entry_rcu(pos->self.sibling.next, struct cgroup, self.sibling); } else { list_for_each_entry_rcu(next, &cgrp->self.children, self.sibling) - if (next->serial_nr > pos->serial_nr) + if (next->self.serial_nr > pos->self.serial_nr) break; } @@ -4168,6 +4167,8 @@ static void css_release(struct percpu_ref *ref) static void init_and_link_css(struct cgroup_subsys_state *css, struct cgroup_subsys *ss, struct cgroup *cgrp) { + lockdep_assert_held(&cgroup_mutex); + cgroup_get(cgrp); memset(css, 0, sizeof(*css)); @@ -4175,6 +4176,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css, css->ss = ss; INIT_LIST_HEAD(&css->sibling); INIT_LIST_HEAD(&css->children); + css->serial_nr = css_serial_nr_next++; if (cgroup_parent(cgrp)) { css->parent = cgroup_css(cgroup_parent(cgrp), ss); @@ -4348,7 +4350,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, */ kernfs_get(kn); - cgrp->serial_nr = cgroup_serial_nr_next++; + cgrp->self.serial_nr = css_serial_nr_next++; /* allocation complete, commit to creation */ list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children); -- cgit v1.2.3-71-gd317 From de3f034182ecbf0efbcef7ab8b253c6c3049a592 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 16 May 2014 13:22:49 -0400 Subject: cgroup: introduce CSS_RELEASED and reduce css iteration fallback window css iterations allow the caller to drop RCU read lock. As long as the caller keeps the current position accessible, it can simply re-grab RCU read lock later and continue iteration. This is achieved by using CGRP_DEAD to detect whether the current positions next pointer is safe to dereference and if not re-iterate from the beginning to the next position using ->serial_nr. CGRP_DEAD is used as the marker to invalidate the next pointer and the only requirement is that the marker is set before the next sibling starts its RCU grace period. Because CGRP_DEAD is set at the end of cgroup_destroy_locked() but the cgroup is unlinked when the reference count reaches zero, we currently have a rather large window where this fallback re-iteration logic can be triggered. This patch introduces CSS_RELEASED which is set when a css is unlinked from its sibling list. This still keeps the re-iteration logic working while drastically reducing the window of its activation. While at it, rewrite the comment in css_next_child() to reflect the new flag and better explain the synchronization. This will also enable iterating csses directly instead of through cgroups. v2: CSS_RELEASED now assigned to 1 << 2 as 1 << 0 is used by CSS_NO_REF. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 1 + kernel/cgroup.c | 41 ++++++++++++++++++++--------------------- 2 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ebe7ce49f4b7..5375582ea5f6 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -97,6 +97,7 @@ struct cgroup_subsys_state { enum { CSS_NO_REF = (1 << 0), /* no reference counting for this css */ CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ + CSS_RELEASED = (1 << 2), /* refcnt reached zero, released */ }; /** diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d5af128ec1ec..5544e685f2da 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3108,27 +3108,28 @@ css_next_child(struct cgroup_subsys_state *pos_css, cgroup_assert_mutex_or_rcu_locked(); /* - * @pos could already have been removed. Once a cgroup is removed, - * its ->sibling.next is no longer updated when its next sibling - * changes. As CGRP_DEAD assertion is serialized and happens - * before the cgroup is taken off the ->sibling list, if we see it - * unasserted, it's guaranteed that the next sibling hasn't - * finished its grace period even if it's already removed, and thus - * safe to dereference from this RCU critical section. If - * ->sibling.next is inaccessible, cgroup_is_dead() is guaranteed - * to be visible as %true here. + * @pos could already have been unlinked from the sibling list. + * Once a cgroup is removed, its ->sibling.next is no longer + * updated when its next sibling changes. CSS_RELEASED is set when + * @pos is taken off list, at which time its next pointer is valid, + * and, as releases are serialized, the one pointed to by the next + * pointer is guaranteed to not have started release yet. This + * implies that if we observe !CSS_RELEASED on @pos in this RCU + * critical section, the one pointed to by its next pointer is + * guaranteed to not have finished its RCU grace period even if we + * have dropped rcu_read_lock() inbetween iterations. * - * If @pos is dead, its next pointer can't be dereferenced; - * however, as each cgroup is given a monotonically increasing - * unique serial number and always appended to the sibling list, - * the next one can be found by walking the parent's children until - * we see a cgroup with higher serial number than @pos's. While - * this path can be slower, it's taken only when either the current - * cgroup is removed or iteration and removal race. + * If @pos has CSS_RELEASED set, its next pointer can't be + * dereferenced; however, as each css is given a monotonically + * increasing unique serial number and always appended to the + * sibling list, the next one can be found by walking the parent's + * children until the first css with higher serial number than + * @pos's. While this path can be slower, it happens iff iteration + * races against release and the race window is very small. */ if (!pos) { next = list_entry_rcu(cgrp->self.children.next, struct cgroup, self.sibling); - } else if (likely(!cgroup_is_dead(pos))) { + } else if (likely(!(pos->self.flags & CSS_RELEASED))) { next = list_entry_rcu(pos->self.sibling.next, struct cgroup, self.sibling); } else { list_for_each_entry_rcu(next, &cgrp->self.children, self.sibling) @@ -4139,6 +4140,7 @@ static void css_release_work_fn(struct work_struct *work) mutex_lock(&cgroup_mutex); + css->flags |= CSS_RELEASED; list_del_rcu(&css->sibling); if (ss) { @@ -4525,10 +4527,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) /* * Mark @cgrp dead. This prevents further task migration and child - * creation by disabling cgroup_lock_live_group(). Note that - * CGRP_DEAD assertion is depended upon by css_next_child() to - * resume iteration after dropping RCU read lock. See - * css_next_child() for details. + * creation by disabling cgroup_lock_live_group(). */ set_bit(CGRP_DEAD, &cgrp->flags); -- cgit v1.2.3-71-gd317 From c2931b70a32c705b9bd5762f5044f9eac8a52bb3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 16 May 2014 13:22:51 -0400 Subject: cgroup: iterate cgroup_subsys_states directly Currently, css_next_child() is implemented as finding the next child cgroup which has the css enabled, which used to be the only way to do it as only cgroups participated in sibling lists and thus could be iteratd. This works as long as what's required during iteration is not missing online csses; however, it turns out that there are use cases where offlined but not yet released csses need to be iterated. This is difficult to implement through cgroup iteration the unified hierarchy as there may be multiple dying csses for the same subsystem associated with single cgroup. After the recent changes, the cgroup self and regular csses behave identically in how they're linked and unlinked from the sibling lists including assertion of CSS_RELEASED and css_next_child() can simply switch to iterating csses directly. This both simplifies the logic and ensures that all visible non-released csses are included in the iteration whether there are multiple dying csses for a subsystem or not. As all other iterators depend on css_next_child() for sibling iteration, this changes behaviors of all css iterators. Add and update explanations on the css states which are included in traversal to all iterators. As css iteration could always contain offlined csses, this shouldn't break any of the current users and new usages which need iteration of all on and offline csses can make use of the new semantics. Signed-off-by: Tejun Heo Acked-by: Li Zefan Cc: Johannes Weiner --- include/linux/cgroup.h | 44 ++++++++++++++++++++--------------- kernel/cgroup.c | 62 ++++++++++++++++++++++++++++++-------------------- 2 files changed, 63 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 5375582ea5f6..f2ff578fc03a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -764,14 +764,14 @@ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); * @pos: the css * to use as the loop cursor * @parent: css whose children to walk * - * Walk @parent's children. Must be called under rcu_read_lock(). A child - * css which hasn't finished ->css_online() or already has finished - * ->css_offline() may show up during traversal and it's each subsystem's - * responsibility to verify that each @pos is alive. + * Walk @parent's children. Must be called under rcu_read_lock(). * - * If a subsystem synchronizes against the parent in its ->css_online() and - * before starting iterating, a css which finished ->css_online() is - * guaranteed to be visible in the future iterations. + * If a subsystem synchronizes ->css_online() and the start of iteration, a + * css which finished ->css_online() is guaranteed to be visible in the + * future iterations and will stay visible until the last reference is put. + * A css which hasn't finished ->css_online() or already finished + * ->css_offline() may show up during traversal. It's each subsystem's + * responsibility to synchronize against on/offlining. * * It is allowed to temporarily drop RCU read lock during iteration. The * caller is responsible for ensuring that @pos remains accessible until @@ -794,17 +794,16 @@ css_rightmost_descendant(struct cgroup_subsys_state *pos); * @root: css whose descendants to walk * * Walk @root's descendants. @root is included in the iteration and the - * first node to be visited. Must be called under rcu_read_lock(). A - * descendant css which hasn't finished ->css_online() or already has - * finished ->css_offline() may show up during traversal and it's each - * subsystem's responsibility to verify that each @pos is alive. + * first node to be visited. Must be called under rcu_read_lock(). * - * If a subsystem synchronizes against the parent in its ->css_online() and - * before starting iterating, and synchronizes against @pos on each - * iteration, any descendant css which finished ->css_online() is - * guaranteed to be visible in the future iterations. + * If a subsystem synchronizes ->css_online() and the start of iteration, a + * css which finished ->css_online() is guaranteed to be visible in the + * future iterations and will stay visible until the last reference is put. + * A css which hasn't finished ->css_online() or already finished + * ->css_offline() may show up during traversal. It's each subsystem's + * responsibility to synchronize against on/offlining. * - * In other words, the following guarantees that a descendant can't escape + * For example, the following guarantees that a descendant can't escape * state updates of its ancestors. * * my_online(@css) @@ -860,8 +859,17 @@ css_next_descendant_post(struct cgroup_subsys_state *pos, * * Similar to css_for_each_descendant_pre() but performs post-order * traversal instead. @root is included in the iteration and the last - * node to be visited. Note that the walk visibility guarantee described - * in pre-order walk doesn't apply the same to post-order walks. + * node to be visited. + * + * If a subsystem synchronizes ->css_online() and the start of iteration, a + * css which finished ->css_online() is guaranteed to be visible in the + * future iterations and will stay visible until the last reference is put. + * A css which hasn't finished ->css_online() or already finished + * ->css_offline() may show up during traversal. It's each subsystem's + * responsibility to synchronize against on/offlining. + * + * Note that the walk visibility guarantee example described in pre-order + * walk doesn't apply the same to post-order walks. */ #define css_for_each_descendant_post(pos, css) \ for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 5544e685f2da..097a1fc1e1e8 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3089,21 +3089,25 @@ static int cgroup_task_count(const struct cgroup *cgrp) /** * css_next_child - find the next child of a given css - * @pos_css: the current position (%NULL to initiate traversal) - * @parent_css: css whose children to walk + * @pos: the current position (%NULL to initiate traversal) + * @parent: css whose children to walk * - * This function returns the next child of @parent_css and should be called + * This function returns the next child of @parent and should be called * under either cgroup_mutex or RCU read lock. The only requirement is - * that @parent_css and @pos_css are accessible. The next sibling is - * guaranteed to be returned regardless of their states. + * that @parent and @pos are accessible. The next sibling is guaranteed to + * be returned regardless of their states. + * + * If a subsystem synchronizes ->css_online() and the start of iteration, a + * css which finished ->css_online() is guaranteed to be visible in the + * future iterations and will stay visible until the last reference is put. + * A css which hasn't finished ->css_online() or already finished + * ->css_offline() may show up during traversal. It's each subsystem's + * responsibility to synchronize against on/offlining. */ -struct cgroup_subsys_state * -css_next_child(struct cgroup_subsys_state *pos_css, - struct cgroup_subsys_state *parent_css) +struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos, + struct cgroup_subsys_state *parent) { - struct cgroup *pos = pos_css ? pos_css->cgroup : NULL; - struct cgroup *cgrp = parent_css->cgroup; - struct cgroup *next; + struct cgroup_subsys_state *next; cgroup_assert_mutex_or_rcu_locked(); @@ -3128,27 +3132,21 @@ css_next_child(struct cgroup_subsys_state *pos_css, * races against release and the race window is very small. */ if (!pos) { - next = list_entry_rcu(cgrp->self.children.next, struct cgroup, self.sibling); - } else if (likely(!(pos->self.flags & CSS_RELEASED))) { - next = list_entry_rcu(pos->self.sibling.next, struct cgroup, self.sibling); + next = list_entry_rcu(parent->children.next, struct cgroup_subsys_state, sibling); + } else if (likely(!(pos->flags & CSS_RELEASED))) { + next = list_entry_rcu(pos->sibling.next, struct cgroup_subsys_state, sibling); } else { - list_for_each_entry_rcu(next, &cgrp->self.children, self.sibling) - if (next->self.serial_nr > pos->self.serial_nr) + list_for_each_entry_rcu(next, &parent->children, sibling) + if (next->serial_nr > pos->serial_nr) break; } /* * @next, if not pointing to the head, can be dereferenced and is - * the next sibling; however, it might have @ss disabled. If so, - * fast-forward to the next enabled one. + * the next sibling. */ - while (&next->self.sibling != &cgrp->self.children) { - struct cgroup_subsys_state *next_css = cgroup_css(next, parent_css->ss); - - if (next_css) - return next_css; - next = list_entry_rcu(next->self.sibling.next, struct cgroup, self.sibling); - } + if (&next->sibling != &parent->children) + return next; return NULL; } @@ -3165,6 +3163,13 @@ css_next_child(struct cgroup_subsys_state *pos_css, * doesn't require the whole traversal to be contained in a single critical * section. This function will return the correct next descendant as long * as both @pos and @root are accessible and @pos is a descendant of @root. + * + * If a subsystem synchronizes ->css_online() and the start of iteration, a + * css which finished ->css_online() is guaranteed to be visible in the + * future iterations and will stay visible until the last reference is put. + * A css which hasn't finished ->css_online() or already finished + * ->css_offline() may show up during traversal. It's each subsystem's + * responsibility to synchronize against on/offlining. */ struct cgroup_subsys_state * css_next_descendant_pre(struct cgroup_subsys_state *pos, @@ -3252,6 +3257,13 @@ css_leftmost_descendant(struct cgroup_subsys_state *pos) * section. This function will return the correct next descendant as long * as both @pos and @cgroup are accessible and @pos is a descendant of * @cgroup. + * + * If a subsystem synchronizes ->css_online() and the start of iteration, a + * css which finished ->css_online() is guaranteed to be visible in the + * future iterations and will stay visible until the last reference is put. + * A css which hasn't finished ->css_online() or already finished + * ->css_offline() may show up during traversal. It's each subsystem's + * responsibility to synchronize against on/offlining. */ struct cgroup_subsys_state * css_next_descendant_post(struct cgroup_subsys_state *pos, -- cgit v1.2.3-71-gd317 From 184faf32328c65c9d86b19577b8d8b90bdd2cd2e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 16 May 2014 13:22:51 -0400 Subject: cgroup: use CSS_ONLINE instead of CGRP_DEAD Use CSS_ONLINE on the self css to indicate whether a cgroup has been killed instead of CGRP_DEAD. This will allow re-using css online test for cgroup liveliness test. This doesn't introduce any functional change. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 2 -- kernel/cgroup.c | 7 ++++--- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f2ff578fc03a..51a339c99eb6 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -143,8 +143,6 @@ static inline void css_put(struct cgroup_subsys_state *css) /* bits in struct cgroup flags field */ enum { - /* Control Group is dead */ - CGRP_DEAD, /* * Control Group has previously had a child cgroup or a task, * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 097a1fc1e1e8..004004fd0ded 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -278,7 +278,7 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, /* convenient tests for these bits */ static inline bool cgroup_is_dead(const struct cgroup *cgrp) { - return test_bit(CGRP_DEAD, &cgrp->flags); + return !(cgrp->self.flags & CSS_ONLINE); } struct cgroup_subsys_state *of_css(struct kernfs_open_file *of) @@ -1518,6 +1518,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) INIT_LIST_HEAD(&cgrp->pidlists); mutex_init(&cgrp->pidlist_mutex); cgrp->self.cgroup = cgrp; + cgrp->self.flags |= CSS_ONLINE; for_each_subsys(ss, ssid) INIT_LIST_HEAD(&cgrp->e_csets[ssid]); @@ -4541,13 +4542,13 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) * Mark @cgrp dead. This prevents further task migration and child * creation by disabling cgroup_lock_live_group(). */ - set_bit(CGRP_DEAD, &cgrp->flags); + cgrp->self.flags &= ~CSS_ONLINE; /* initiate massacre of all css's */ for_each_css(css, ssid, cgrp) kill_css(css); - /* CGRP_DEAD is set, remove from ->release_list for the last time */ + /* CSS_ONLINE is clear, remove from ->release_list for the last time */ raw_spin_lock(&release_list_lock); if (!list_empty(&cgrp->release_list)) list_del_init(&cgrp->release_list); -- cgit v1.2.3-71-gd317 From f3d4650015301d1c880df4523f7e7ef320a38aab Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 16 May 2014 13:22:52 -0400 Subject: cgroup: convert cgroup_has_live_children() into css_has_online_children() Now that cgroup liveliness and css onliness are the same state, convert cgroup_has_live_children() into css_has_online_children() so that it can be used for actual csses too. The function now uses css_for_each_child() for iteration and is published. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 2 ++ kernel/cgroup.c | 32 ++++++++++++++++++++------------ 2 files changed, 22 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 51a339c99eb6..b76999954beb 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -873,6 +873,8 @@ css_next_descendant_post(struct cgroup_subsys_state *pos, for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ (pos) = css_next_descendant_post((pos), (css))) +bool css_has_online_children(struct cgroup_subsys_state *css); + /* A css_task_iter should be treated as an opaque object */ struct css_task_iter { struct cgroup_subsys *ss; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 004004fd0ded..082bb842b11a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -175,7 +175,6 @@ static int need_forkexit_callback __read_mostly; static struct cftype cgroup_base_files[]; static void cgroup_put(struct cgroup *cgrp); -static bool cgroup_has_live_children(struct cgroup *cgrp); static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask); static int cgroup_destroy_locked(struct cgroup *cgrp); @@ -1769,7 +1768,7 @@ static void cgroup_kill_sb(struct super_block *sb) * This prevents new mounts by disabling percpu_ref_tryget_live(). * cgroup_mount() may wait for @root's release. */ - if (cgroup_has_live_children(&root->cgrp)) + if (css_has_online_children(&root->cgrp.self)) cgroup_put(&root->cgrp); else percpu_ref_kill(&root->cgrp.self.refcnt); @@ -3291,19 +3290,28 @@ css_next_descendant_post(struct cgroup_subsys_state *pos, return pos->parent; } -static bool cgroup_has_live_children(struct cgroup *cgrp) +/** + * css_has_online_children - does a css have online children + * @css: the target css + * + * Returns %true if @css has any online children; otherwise, %false. This + * function can be called from any context but the caller is responsible + * for synchronizing against on/offlining as necessary. + */ +bool css_has_online_children(struct cgroup_subsys_state *css) { - struct cgroup *child; + struct cgroup_subsys_state *child; + bool ret = false; rcu_read_lock(); - list_for_each_entry_rcu(child, &cgrp->self.children, self.sibling) { - if (!cgroup_is_dead(child)) { - rcu_read_unlock(); - return true; + css_for_each_child(child, css) { + if (css->flags & CSS_ONLINE) { + ret = true; + break; } } rcu_read_unlock(); - return false; + return ret; } /** @@ -4535,7 +4543,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) * ->self.children as dead children linger on it while being * drained; otherwise, "rmdir parent/child parent" may fail. */ - if (cgroup_has_live_children(cgrp)) + if (css_has_online_children(&cgrp->self)) return -EBUSY; /* @@ -5014,8 +5022,8 @@ void cgroup_exit(struct task_struct *tsk) static void check_for_release(struct cgroup *cgrp) { - if (cgroup_is_releasable(cgrp) && - list_empty(&cgrp->cset_links) && !cgroup_has_live_children(cgrp)) { + if (cgroup_is_releasable(cgrp) && list_empty(&cgrp->cset_links) && + !css_has_online_children(&cgrp->self)) { /* * Control Group is currently removeable. If it's not * already queued for a userspace notification, queue -- cgit v1.2.3-71-gd317 From 6f4524d355a86769b65d5420a6ef47fb0bba9b72 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 16 May 2014 13:22:52 -0400 Subject: cgroup: implement css_tryget() Implement css_tryget() which tries to grab a cgroup_subsys_state's reference as long as it already hasn't reached zero. Combined with the recent css iterator changes to include offline && !released csses during traversal, this can be used to access csses regardless of its online state. v2: Take the new flag CSS_NO_REF into account. Signed-off-by: Tejun Heo Acked-by: Li Zefan Cc: Johannes Weiner --- include/linux/cgroup.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b76999954beb..4afe544d3547 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -112,6 +112,24 @@ static inline void css_get(struct cgroup_subsys_state *css) percpu_ref_get(&css->refcnt); } +/** + * css_tryget - try to obtain a reference on the specified css + * @css: target css + * + * Obtain a reference on @css unless it already has reached zero and is + * being released. This function doesn't care whether @css is on or + * offline. The caller naturally needs to ensure that @css is accessible + * but doesn't have to be holding a reference on it - IOW, RCU protected + * access is good enough for this function. Returns %true if a reference + * count was successfully obtained; %false otherwise. + */ +static inline bool css_tryget(struct cgroup_subsys_state *css) +{ + if (!(css->flags & CSS_NO_REF)) + return percpu_ref_tryget(&css->refcnt); + return true; +} + /** * css_tryget_online - try to obtain a reference on the specified css if online * @css: target css -- cgit v1.2.3-71-gd317 From 5cc9ed4b9a7ac579362ccebac67f7a4cdb36de06 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 16 May 2014 14:22:37 +0100 Subject: drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl By exporting the ability to map user address and inserting PTEs representing their backing pages into the GTT, we can exploit UMA in order to utilize normal application data as a texture source or even as a render target (depending upon the capabilities of the chipset). This has a number of uses, with zero-copy downloads to the GPU and efficient readback making the intermixed streaming of CPU and GPU operations fairly efficient. This ability has many widespread implications from faster rendering of client-side software rasterisers (chromium), mitigation of stalls due to read back (firefox) and to faster pipelining of texture data (such as pixel buffer objects in GL or data blobs in CL). v2: Compile with CONFIG_MMU_NOTIFIER v3: We can sleep while performing invalidate-range, which we can utilise to drop our page references prior to the kernel manipulating the vma (for either discard or cloning) and so protect normal users. v4: Only run the invalidate notifier if the range intercepts the bo. v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers v6: Recheck after reacquire mutex for lost mmu. v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary. v8: Fix rebasing error after forwarding porting the back port. v9: Limit the userptr to page aligned entries. We now expect userspace to handle all the offset-in-page adjustments itself. v10: Prevent vma from being copied across fork to avoid issues with cow. v11: Drop vma behaviour changes -- locking is nigh on impossible. Use a worker to load user pages to avoid lock inversions. v12: Use get_task_mm()/mmput() for correct refcounting of mm. v13: Use a worker to release the mmu_notifier to avoid lock inversion v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer with its own locking and tree of objects for each mm/mmu_notifier. v15: Prevent overlapping userptr objects, and invalidate all objects within the mmu_notifier range v16: Fix a typo for iterating over multiple objects in the range and rearrange error path to destroy the mmu_notifier locklessly. Also close a race between invalidate_range and the get_pages_worker. v17: Close a race between get_pages_worker/invalidate_range and fresh allocations of the same userptr range - and notice that struct_mutex was presumed to be held when during creation it wasn't. v18: Sigh. Fix the refactor of st_set_pages() to allocate enough memory for the struct sg_table and to clear it before reporting an error. v19: Always error out on read-only userptr requests as we don't have the hardware infrastructure to support them at the moment. v20: Refuse to implement read-only support until we have the required infrastructure - but reserve the bit in flags for future use. v21: use_mm() is not required for get_user_pages(). It is only meant to be used to fix up the kernel thread's current->mm for use with copy_user(). v22: Use sg_alloc_table_from_pages for that chunky feeling v23: Export a function for sanity checking dma-buf rather than encode userptr details elsewhere, and clean up comments based on suggestions by Bradley. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: "Gong, Zhipeng" Cc: Akash Goel Cc: "Volkin, Bradley D" Reviewed-by: Tvrtko Ursulin Reviewed-by: Brad Volkin [danvet: Frob ioctl allocation to pick the next one - will cause a bit of fuss with create2 apparently, but such are the rules.] [danvet2: oops, forgot to git add after manual patch application] [danvet3: Appease sparse.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/Kconfig | 1 + drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_dma.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 25 +- drivers/gpu/drm/i915/i915_gem.c | 4 + drivers/gpu/drm/i915/i915_gem_dmabuf.c | 8 + drivers/gpu/drm/i915/i915_gem_userptr.c | 711 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gpu_error.c | 2 + include/uapi/drm/i915_drm.h | 16 + 9 files changed, 768 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c (limited to 'include') diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index e4e3c01b8cbc..437e1824d0bf 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -5,6 +5,7 @@ config DRM_I915 depends on (AGP || AGP=n) select INTEL_GTT select AGP_INTEL if AGP + select INTERVAL_TREE # we need shmfs for the swappable backing store, and in particular # the shmem_readpage() which depends upon tmpfs select SHMEM diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 24469168d550..7b2f3bee3518 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -27,6 +27,7 @@ i915-y += i915_cmd_parser.o \ i915_gem.o \ i915_gem_stolen.o \ i915_gem_tiling.o \ + i915_gem_userptr.o \ i915_gpu_error.o \ i915_irq.o \ i915_trace_points.o \ diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 46f1decab76f..dea455bd889a 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1983,6 +1983,7 @@ const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_get_reset_stats_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), }; int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9e9e8166a1f3..a270e0773e2d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -178,6 +179,7 @@ enum hpd_pin { if ((intel_connector)->base.encoder == (__encoder)) struct drm_i915_private; +struct i915_mmu_object; enum intel_dpll_id { DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */ @@ -403,6 +405,7 @@ struct drm_i915_error_state { u32 tiling:2; u32 dirty:1; u32 purgeable:1; + u32 userptr:1; s32 ring:4; u32 cache_level:3; } **active_bo, **pinned_bo; @@ -1447,6 +1450,9 @@ struct drm_i915_private { struct i915_gtt gtt; /* VM representing the global address space */ struct i915_gem_mm mm; +#if defined(CONFIG_MMU_NOTIFIER) + DECLARE_HASHTABLE(mmu_notifiers, 7); +#endif /* Kernel Modesetting */ @@ -1580,6 +1586,8 @@ struct drm_i915_gem_object_ops { */ int (*get_pages)(struct drm_i915_gem_object *); void (*put_pages)(struct drm_i915_gem_object *); + int (*dmabuf_export)(struct drm_i915_gem_object *); + void (*release)(struct drm_i915_gem_object *); }; struct drm_i915_gem_object { @@ -1693,8 +1701,20 @@ struct drm_i915_gem_object { /** for phy allocated objects */ struct drm_i915_gem_phys_object *phys_obj; -}; + union { + struct i915_gem_userptr { + uintptr_t ptr; + unsigned read_only :1; + unsigned workers :4; +#define I915_GEM_USERPTR_MAX_WORKERS 15 + + struct mm_struct *mm; + struct i915_mmu_object *mn; + struct work_struct *work; + } userptr; + }; +}; #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base) /** @@ -2119,6 +2139,9 @@ int i915_gem_set_tiling(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_get_tiling(struct drm_device *dev, void *data, struct drm_file *file_priv); +int i915_gem_init_userptr(struct drm_device *dev); +int i915_gem_userptr_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_wait_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f6c03513de52..d5aad0bc71f7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4263,6 +4263,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) if (obj->base.import_attach) drm_prime_gem_destroy(&obj->base, NULL); + if (obj->ops->release) + obj->ops->release(obj); + drm_gem_object_release(&obj->base); i915_gem_info_remove_obj(dev_priv, obj->base.size); @@ -4542,6 +4545,7 @@ int i915_gem_init(struct drm_device *dev) DRM_DEBUG_DRIVER("allow wake ack timed out\n"); } + i915_gem_init_userptr(dev); i915_gem_init_global_gtt(dev); ret = i915_gem_context_init(dev); diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 321102a8374b..580aa42443ed 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -229,6 +229,14 @@ static const struct dma_buf_ops i915_dmabuf_ops = { struct dma_buf *i915_gem_prime_export(struct drm_device *dev, struct drm_gem_object *gem_obj, int flags) { + struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); + + if (obj->ops->dmabuf_export) { + int ret = obj->ops->dmabuf_export(obj); + if (ret) + return ERR_PTR(ret); + } + return dma_buf_export(gem_obj, &i915_dmabuf_ops, gem_obj->size, flags); } diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c new file mode 100644 index 000000000000..21ea92886a56 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -0,0 +1,711 @@ +/* + * Copyright © 2012-2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "drmP.h" +#include "i915_drm.h" +#include "i915_drv.h" +#include "i915_trace.h" +#include "intel_drv.h" +#include +#include +#include +#include + +#if defined(CONFIG_MMU_NOTIFIER) +#include + +struct i915_mmu_notifier { + spinlock_t lock; + struct hlist_node node; + struct mmu_notifier mn; + struct rb_root objects; + struct drm_device *dev; + struct mm_struct *mm; + struct work_struct work; + unsigned long count; + unsigned long serial; +}; + +struct i915_mmu_object { + struct i915_mmu_notifier *mmu; + struct interval_tree_node it; + struct drm_i915_gem_object *obj; +}; + +static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn); + struct interval_tree_node *it = NULL; + unsigned long serial = 0; + + end--; /* interval ranges are inclusive, but invalidate range is exclusive */ + while (start < end) { + struct drm_i915_gem_object *obj; + + obj = NULL; + spin_lock(&mn->lock); + if (serial == mn->serial) + it = interval_tree_iter_next(it, start, end); + else + it = interval_tree_iter_first(&mn->objects, start, end); + if (it != NULL) { + obj = container_of(it, struct i915_mmu_object, it)->obj; + drm_gem_object_reference(&obj->base); + serial = mn->serial; + } + spin_unlock(&mn->lock); + if (obj == NULL) + return; + + mutex_lock(&mn->dev->struct_mutex); + /* Cancel any active worker and force us to re-evaluate gup */ + obj->userptr.work = NULL; + + if (obj->pages != NULL) { + struct drm_i915_private *dev_priv = to_i915(mn->dev); + struct i915_vma *vma, *tmp; + bool was_interruptible; + + was_interruptible = dev_priv->mm.interruptible; + dev_priv->mm.interruptible = false; + + list_for_each_entry_safe(vma, tmp, &obj->vma_list, vma_link) { + int ret = i915_vma_unbind(vma); + WARN_ON(ret && ret != -EIO); + } + WARN_ON(i915_gem_object_put_pages(obj)); + + dev_priv->mm.interruptible = was_interruptible; + } + + start = obj->userptr.ptr + obj->base.size; + + drm_gem_object_unreference(&obj->base); + mutex_unlock(&mn->dev->struct_mutex); + } +} + +static const struct mmu_notifier_ops i915_gem_userptr_notifier = { + .invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start, +}; + +static struct i915_mmu_notifier * +__i915_mmu_notifier_lookup(struct drm_device *dev, struct mm_struct *mm) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + struct i915_mmu_notifier *mmu; + + /* Protected by dev->struct_mutex */ + hash_for_each_possible(dev_priv->mmu_notifiers, mmu, node, (unsigned long)mm) + if (mmu->mm == mm) + return mmu; + + return NULL; +} + +static struct i915_mmu_notifier * +i915_mmu_notifier_get(struct drm_device *dev, struct mm_struct *mm) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + struct i915_mmu_notifier *mmu; + int ret; + + lockdep_assert_held(&dev->struct_mutex); + + mmu = __i915_mmu_notifier_lookup(dev, mm); + if (mmu) + return mmu; + + mmu = kmalloc(sizeof(*mmu), GFP_KERNEL); + if (mmu == NULL) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&mmu->lock); + mmu->dev = dev; + mmu->mn.ops = &i915_gem_userptr_notifier; + mmu->mm = mm; + mmu->objects = RB_ROOT; + mmu->count = 0; + mmu->serial = 0; + + /* Protected by mmap_sem (write-lock) */ + ret = __mmu_notifier_register(&mmu->mn, mm); + if (ret) { + kfree(mmu); + return ERR_PTR(ret); + } + + /* Protected by dev->struct_mutex */ + hash_add(dev_priv->mmu_notifiers, &mmu->node, (unsigned long)mm); + return mmu; +} + +static void +__i915_mmu_notifier_destroy_worker(struct work_struct *work) +{ + struct i915_mmu_notifier *mmu = container_of(work, typeof(*mmu), work); + mmu_notifier_unregister(&mmu->mn, mmu->mm); + kfree(mmu); +} + +static void +__i915_mmu_notifier_destroy(struct i915_mmu_notifier *mmu) +{ + lockdep_assert_held(&mmu->dev->struct_mutex); + + /* Protected by dev->struct_mutex */ + hash_del(&mmu->node); + + /* Our lock ordering is: mmap_sem, mmu_notifier_scru, struct_mutex. + * We enter the function holding struct_mutex, therefore we need + * to drop our mutex prior to calling mmu_notifier_unregister in + * order to prevent lock inversion (and system-wide deadlock) + * between the mmap_sem and struct-mutex. Hence we defer the + * unregistration to a workqueue where we hold no locks. + */ + INIT_WORK(&mmu->work, __i915_mmu_notifier_destroy_worker); + schedule_work(&mmu->work); +} + +static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mmu) +{ + if (++mmu->serial == 0) + mmu->serial = 1; +} + +static void +i915_mmu_notifier_del(struct i915_mmu_notifier *mmu, + struct i915_mmu_object *mn) +{ + lockdep_assert_held(&mmu->dev->struct_mutex); + + spin_lock(&mmu->lock); + interval_tree_remove(&mn->it, &mmu->objects); + __i915_mmu_notifier_update_serial(mmu); + spin_unlock(&mmu->lock); + + /* Protected against _add() by dev->struct_mutex */ + if (--mmu->count == 0) + __i915_mmu_notifier_destroy(mmu); +} + +static int +i915_mmu_notifier_add(struct i915_mmu_notifier *mmu, + struct i915_mmu_object *mn) +{ + struct interval_tree_node *it; + int ret; + + ret = i915_mutex_lock_interruptible(mmu->dev); + if (ret) + return ret; + + /* Make sure we drop the final active reference (and thereby + * remove the objects from the interval tree) before we do + * the check for overlapping objects. + */ + i915_gem_retire_requests(mmu->dev); + + /* Disallow overlapping userptr objects */ + spin_lock(&mmu->lock); + it = interval_tree_iter_first(&mmu->objects, + mn->it.start, mn->it.last); + if (it) { + struct drm_i915_gem_object *obj; + + /* We only need to check the first object in the range as it + * either has cancelled gup work queued and we need to + * return back to the user to give time for the gup-workers + * to flush their object references upon which the object will + * be removed from the interval-tree, or the the range is + * still in use by another client and the overlap is invalid. + */ + + obj = container_of(it, struct i915_mmu_object, it)->obj; + ret = obj->userptr.workers ? -EAGAIN : -EINVAL; + } else { + interval_tree_insert(&mn->it, &mmu->objects); + __i915_mmu_notifier_update_serial(mmu); + ret = 0; + } + spin_unlock(&mmu->lock); + mutex_unlock(&mmu->dev->struct_mutex); + + return ret; +} + +static void +i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) +{ + struct i915_mmu_object *mn; + + mn = obj->userptr.mn; + if (mn == NULL) + return; + + i915_mmu_notifier_del(mn->mmu, mn); + obj->userptr.mn = NULL; +} + +static int +i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj, + unsigned flags) +{ + struct i915_mmu_notifier *mmu; + struct i915_mmu_object *mn; + int ret; + + if (flags & I915_USERPTR_UNSYNCHRONIZED) + return capable(CAP_SYS_ADMIN) ? 0 : -EPERM; + + down_write(&obj->userptr.mm->mmap_sem); + ret = i915_mutex_lock_interruptible(obj->base.dev); + if (ret == 0) { + mmu = i915_mmu_notifier_get(obj->base.dev, obj->userptr.mm); + if (!IS_ERR(mmu)) + mmu->count++; /* preemptive add to act as a refcount */ + else + ret = PTR_ERR(mmu); + mutex_unlock(&obj->base.dev->struct_mutex); + } + up_write(&obj->userptr.mm->mmap_sem); + if (ret) + return ret; + + mn = kzalloc(sizeof(*mn), GFP_KERNEL); + if (mn == NULL) { + ret = -ENOMEM; + goto destroy_mmu; + } + + mn->mmu = mmu; + mn->it.start = obj->userptr.ptr; + mn->it.last = mn->it.start + obj->base.size - 1; + mn->obj = obj; + + ret = i915_mmu_notifier_add(mmu, mn); + if (ret) + goto free_mn; + + obj->userptr.mn = mn; + return 0; + +free_mn: + kfree(mn); +destroy_mmu: + mutex_lock(&obj->base.dev->struct_mutex); + if (--mmu->count == 0) + __i915_mmu_notifier_destroy(mmu); + mutex_unlock(&obj->base.dev->struct_mutex); + return ret; +} + +#else + +static void +i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) +{ +} + +static int +i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj, + unsigned flags) +{ + if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0) + return -ENODEV; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + return 0; +} +#endif + +struct get_pages_work { + struct work_struct work; + struct drm_i915_gem_object *obj; + struct task_struct *task; +}; + + +#if IS_ENABLED(CONFIG_SWIOTLB) +#define swiotlb_active() swiotlb_nr_tbl() +#else +#define swiotlb_active() 0 +#endif + +static int +st_set_pages(struct sg_table **st, struct page **pvec, int num_pages) +{ + struct scatterlist *sg; + int ret, n; + + *st = kmalloc(sizeof(**st), GFP_KERNEL); + if (*st == NULL) + return -ENOMEM; + + if (swiotlb_active()) { + ret = sg_alloc_table(*st, num_pages, GFP_KERNEL); + if (ret) + goto err; + + for_each_sg((*st)->sgl, sg, num_pages, n) + sg_set_page(sg, pvec[n], PAGE_SIZE, 0); + } else { + ret = sg_alloc_table_from_pages(*st, pvec, num_pages, + 0, num_pages << PAGE_SHIFT, + GFP_KERNEL); + if (ret) + goto err; + } + + return 0; + +err: + kfree(*st); + *st = NULL; + return ret; +} + +static void +__i915_gem_userptr_get_pages_worker(struct work_struct *_work) +{ + struct get_pages_work *work = container_of(_work, typeof(*work), work); + struct drm_i915_gem_object *obj = work->obj; + struct drm_device *dev = obj->base.dev; + const int num_pages = obj->base.size >> PAGE_SHIFT; + struct page **pvec; + int pinned, ret; + + ret = -ENOMEM; + pinned = 0; + + pvec = kmalloc(num_pages*sizeof(struct page *), + GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); + if (pvec == NULL) + pvec = drm_malloc_ab(num_pages, sizeof(struct page *)); + if (pvec != NULL) { + struct mm_struct *mm = obj->userptr.mm; + + down_read(&mm->mmap_sem); + while (pinned < num_pages) { + ret = get_user_pages(work->task, mm, + obj->userptr.ptr + pinned * PAGE_SIZE, + num_pages - pinned, + !obj->userptr.read_only, 0, + pvec + pinned, NULL); + if (ret < 0) + break; + + pinned += ret; + } + up_read(&mm->mmap_sem); + } + + mutex_lock(&dev->struct_mutex); + if (obj->userptr.work != &work->work) { + ret = 0; + } else if (pinned == num_pages) { + ret = st_set_pages(&obj->pages, pvec, num_pages); + if (ret == 0) { + list_add_tail(&obj->global_list, &to_i915(dev)->mm.unbound_list); + pinned = 0; + } + } + + obj->userptr.work = ERR_PTR(ret); + obj->userptr.workers--; + drm_gem_object_unreference(&obj->base); + mutex_unlock(&dev->struct_mutex); + + release_pages(pvec, pinned, 0); + drm_free_large(pvec); + + put_task_struct(work->task); + kfree(work); +} + +static int +i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) +{ + const int num_pages = obj->base.size >> PAGE_SHIFT; + struct page **pvec; + int pinned, ret; + + /* If userspace should engineer that these pages are replaced in + * the vma between us binding this page into the GTT and completion + * of rendering... Their loss. If they change the mapping of their + * pages they need to create a new bo to point to the new vma. + * + * However, that still leaves open the possibility of the vma + * being copied upon fork. Which falls under the same userspace + * synchronisation issue as a regular bo, except that this time + * the process may not be expecting that a particular piece of + * memory is tied to the GPU. + * + * Fortunately, we can hook into the mmu_notifier in order to + * discard the page references prior to anything nasty happening + * to the vma (discard or cloning) which should prevent the more + * egregious cases from causing harm. + */ + + pvec = NULL; + pinned = 0; + if (obj->userptr.mm == current->mm) { + pvec = kmalloc(num_pages*sizeof(struct page *), + GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); + if (pvec == NULL) { + pvec = drm_malloc_ab(num_pages, sizeof(struct page *)); + if (pvec == NULL) + return -ENOMEM; + } + + pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages, + !obj->userptr.read_only, pvec); + } + if (pinned < num_pages) { + if (pinned < 0) { + ret = pinned; + pinned = 0; + } else { + /* Spawn a worker so that we can acquire the + * user pages without holding our mutex. Access + * to the user pages requires mmap_sem, and we have + * a strict lock ordering of mmap_sem, struct_mutex - + * we already hold struct_mutex here and so cannot + * call gup without encountering a lock inversion. + * + * Userspace will keep on repeating the operation + * (thanks to EAGAIN) until either we hit the fast + * path or the worker completes. If the worker is + * cancelled or superseded, the task is still run + * but the results ignored. (This leads to + * complications that we may have a stray object + * refcount that we need to be wary of when + * checking for existing objects during creation.) + * If the worker encounters an error, it reports + * that error back to this function through + * obj->userptr.work = ERR_PTR. + */ + ret = -EAGAIN; + if (obj->userptr.work == NULL && + obj->userptr.workers < I915_GEM_USERPTR_MAX_WORKERS) { + struct get_pages_work *work; + + work = kmalloc(sizeof(*work), GFP_KERNEL); + if (work != NULL) { + obj->userptr.work = &work->work; + obj->userptr.workers++; + + work->obj = obj; + drm_gem_object_reference(&obj->base); + + work->task = current; + get_task_struct(work->task); + + INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker); + schedule_work(&work->work); + } else + ret = -ENOMEM; + } else { + if (IS_ERR(obj->userptr.work)) { + ret = PTR_ERR(obj->userptr.work); + obj->userptr.work = NULL; + } + } + } + } else { + ret = st_set_pages(&obj->pages, pvec, num_pages); + if (ret == 0) { + obj->userptr.work = NULL; + pinned = 0; + } + } + + release_pages(pvec, pinned, 0); + drm_free_large(pvec); + return ret; +} + +static void +i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj) +{ + struct scatterlist *sg; + int i; + + BUG_ON(obj->userptr.work != NULL); + + if (obj->madv != I915_MADV_WILLNEED) + obj->dirty = 0; + + for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) { + struct page *page = sg_page(sg); + + if (obj->dirty) + set_page_dirty(page); + + mark_page_accessed(page); + page_cache_release(page); + } + obj->dirty = 0; + + sg_free_table(obj->pages); + kfree(obj->pages); +} + +static void +i915_gem_userptr_release(struct drm_i915_gem_object *obj) +{ + i915_gem_userptr_release__mmu_notifier(obj); + + if (obj->userptr.mm) { + mmput(obj->userptr.mm); + obj->userptr.mm = NULL; + } +} + +static int +i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj) +{ + if (obj->userptr.mn) + return 0; + + return i915_gem_userptr_init__mmu_notifier(obj, 0); +} + +static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { + .dmabuf_export = i915_gem_userptr_dmabuf_export, + .get_pages = i915_gem_userptr_get_pages, + .put_pages = i915_gem_userptr_put_pages, + .release = i915_gem_userptr_release, +}; + +/** + * Creates a new mm object that wraps some normal memory from the process + * context - user memory. + * + * We impose several restrictions upon the memory being mapped + * into the GPU. + * 1. It must be page aligned (both start/end addresses, i.e ptr and size). + * 2. It cannot overlap any other userptr object in the same address space. + * 3. It must be normal system memory, not a pointer into another map of IO + * space (e.g. it must not be a GTT mmapping of another object). + * 4. We only allow a bo as large as we could in theory map into the GTT, + * that is we limit the size to the total size of the GTT. + * 5. The bo is marked as being snoopable. The backing pages are left + * accessible directly by the CPU, but reads and writes by the GPU may + * incur the cost of a snoop (unless you have an LLC architecture). + * + * Synchronisation between multiple users and the GPU is left to userspace + * through the normal set-domain-ioctl. The kernel will enforce that the + * GPU relinquishes the VMA before it is returned back to the system + * i.e. upon free(), munmap() or process termination. However, the userspace + * malloc() library may not immediately relinquish the VMA after free() and + * instead reuse it whilst the GPU is still reading and writing to the VMA. + * Caveat emptor. + * + * Also note, that the object created here is not currently a "first class" + * object, in that several ioctls are banned. These are the CPU access + * ioctls: mmap(), pwrite and pread. In practice, you are expected to use + * direct access via your pointer rather than use those ioctls. + * + * If you think this is a good interface to use to pass GPU memory between + * drivers, please use dma-buf instead. In fact, wherever possible use + * dma-buf instead. + */ +int +i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_gem_userptr *args = data; + struct drm_i915_gem_object *obj; + int ret; + u32 handle; + + if (args->flags & ~(I915_USERPTR_READ_ONLY | + I915_USERPTR_UNSYNCHRONIZED)) + return -EINVAL; + + if (offset_in_page(args->user_ptr | args->user_size)) + return -EINVAL; + + if (args->user_size > dev_priv->gtt.base.total) + return -E2BIG; + + if (!access_ok(args->flags & I915_USERPTR_READ_ONLY ? VERIFY_READ : VERIFY_WRITE, + (char __user *)(unsigned long)args->user_ptr, args->user_size)) + return -EFAULT; + + if (args->flags & I915_USERPTR_READ_ONLY) { + /* On almost all of the current hw, we cannot tell the GPU that a + * page is readonly, so this is just a placeholder in the uAPI. + */ + return -ENODEV; + } + + /* Allocate the new object */ + obj = i915_gem_object_alloc(dev); + if (obj == NULL) + return -ENOMEM; + + drm_gem_private_object_init(dev, &obj->base, args->user_size); + i915_gem_object_init(obj, &i915_gem_userptr_ops); + obj->cache_level = I915_CACHE_LLC; + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + + obj->userptr.ptr = args->user_ptr; + obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY); + + /* And keep a pointer to the current->mm for resolving the user pages + * at binding. This means that we need to hook into the mmu_notifier + * in order to detect if the mmu is destroyed. + */ + ret = -ENOMEM; + if ((obj->userptr.mm = get_task_mm(current))) + ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags); + if (ret == 0) + ret = drm_gem_handle_create(file, &obj->base, &handle); + + /* drop reference from allocate - handle holds it now */ + drm_gem_object_unreference_unlocked(&obj->base); + if (ret) + return ret; + + args->handle = handle; + return 0; +} + +int +i915_gem_init_userptr(struct drm_device *dev) +{ +#if defined(CONFIG_MMU_NOTIFIER) + struct drm_i915_private *dev_priv = to_i915(dev); + hash_init(dev_priv->mmu_notifiers); +#endif + return 0; +} diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 2d819858c19b..dcbec692f60f 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -205,6 +205,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, err_puts(m, tiling_flag(err->tiling)); err_puts(m, dirty_flag(err->dirty)); err_puts(m, purgeable_flag(err->purgeable)); + err_puts(m, err->userptr ? " userptr" : ""); err_puts(m, err->ring != -1 ? " " : ""); err_puts(m, ring_str(err->ring)); err_puts(m, i915_cache_level_str(err->cache_level)); @@ -641,6 +642,7 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->tiling = obj->tiling_mode; err->dirty = obj->dirty; err->purgeable = obj->madv != I915_MADV_WILLNEED; + err->userptr = obj->userptr.mm != NULL; err->ring = obj->ring ? obj->ring->id : -1; err->cache_level = obj->cache_level; } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 8a3e4ef00c3d..ff57f07c3249 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -223,6 +223,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_GEM_GET_CACHING 0x30 #define DRM_I915_REG_READ 0x31 #define DRM_I915_GET_RESET_STATS 0x32 +#define DRM_I915_GEM_USERPTR 0x33 #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -273,6 +274,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy) #define DRM_IOCTL_I915_REG_READ DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read) #define DRM_IOCTL_I915_GET_RESET_STATS DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats) +#define DRM_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -1050,4 +1052,18 @@ struct drm_i915_reset_stats { __u32 pad; }; +struct drm_i915_gem_userptr { + __u64 user_ptr; + __u64 user_size; + __u32 flags; +#define I915_USERPTR_READ_ONLY 0x1 +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000 + /** + * Returned handle for the object. + * + * Object handles are nonzero. + */ + __u32 handle; +}; + #endif /* _UAPI_I915_DRM_H_ */ -- cgit v1.2.3-71-gd317 From 1836eea209546b870dd83f3f4ef234d6598a560d Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Sat, 10 May 2014 10:32:57 -0400 Subject: lib/crc7: Shift crc7() output left 1 bit This eliminates a 1-bit left shift in every single caller, and makes the inner loop of the CRC computation more efficient. Renamed crc7 to crc7_be (big-endian) since the interface changed. Also purged #include from files that don't use it at all. Signed-off-by: George Spelvin Reviewed-by: Pavel Machek Acked-by: Ulf Hansson Signed-off-by: John W. Linville --- drivers/mmc/host/mmc_spi.c | 2 +- drivers/net/wireless/ti/wl1251/acx.c | 1 - drivers/net/wireless/ti/wl1251/cmd.c | 1 - drivers/net/wireless/ti/wl1251/spi.c | 3 +- drivers/net/wireless/ti/wlcore/spi.c | 3 +- include/linux/crc7.h | 8 ++-- lib/crc7.c | 84 ++++++++++++++++++++---------------- 7 files changed, 53 insertions(+), 49 deletions(-) (limited to 'include') diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index 0a87e5691341..338e2202eaaa 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -472,7 +472,7 @@ mmc_spi_command_send(struct mmc_spi_host *host, *cp++ = (u8)(arg >> 16); *cp++ = (u8)(arg >> 8); *cp++ = (u8)arg; - *cp++ = (crc7(0, &data->status[1], 5) << 1) | 0x01; + *cp++ = crc7_be(0, &data->status[1], 5) | 0x01; /* Then, read up to 13 bytes (while writing all-ones): * - N(CR) (== 1..8) bytes of all-ones diff --git a/drivers/net/wireless/ti/wl1251/acx.c b/drivers/net/wireless/ti/wl1251/acx.c index 5a4ec56c83d0..5695628757ee 100644 --- a/drivers/net/wireless/ti/wl1251/acx.c +++ b/drivers/net/wireless/ti/wl1251/acx.c @@ -2,7 +2,6 @@ #include #include -#include #include "wl1251.h" #include "reg.h" diff --git a/drivers/net/wireless/ti/wl1251/cmd.c b/drivers/net/wireless/ti/wl1251/cmd.c index bf1fa18b9786..ede31f048ef9 100644 --- a/drivers/net/wireless/ti/wl1251/cmd.c +++ b/drivers/net/wireless/ti/wl1251/cmd.c @@ -2,7 +2,6 @@ #include #include -#include #include #include "wl1251.h" diff --git a/drivers/net/wireless/ti/wl1251/spi.c b/drivers/net/wireless/ti/wl1251/spi.c index b06d36d99362..e94b57cd5a22 100644 --- a/drivers/net/wireless/ti/wl1251/spi.c +++ b/drivers/net/wireless/ti/wl1251/spi.c @@ -122,8 +122,7 @@ static void wl1251_spi_wake(struct wl1251 *wl) crc[3] = cmd[6]; crc[4] = cmd[5]; - cmd[4] |= crc7(0, crc, WSPI_INIT_CMD_CRC_LEN) << 1; - cmd[4] |= WSPI_INIT_CMD_END; + cmd[4] = crc7_be(0, crc, WSPI_INIT_CMD_CRC_LEN) | WSPI_INIT_CMD_END; t.tx_buf = cmd; t.len = WSPI_INIT_CMD_LEN; diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c index 5f3a389dd74c..1d4ddabe6063 100644 --- a/drivers/net/wireless/ti/wlcore/spi.c +++ b/drivers/net/wireless/ti/wlcore/spi.c @@ -151,8 +151,7 @@ static void wl12xx_spi_init(struct device *child) crc[3] = cmd[6]; crc[4] = cmd[5]; - cmd[4] |= crc7(0, crc, WSPI_INIT_CMD_CRC_LEN) << 1; - cmd[4] |= WSPI_INIT_CMD_END; + cmd[4] = crc7_be(0, crc, WSPI_INIT_CMD_CRC_LEN) | WSPI_INIT_CMD_END; t.tx_buf = cmd; t.len = WSPI_INIT_CMD_LEN; diff --git a/include/linux/crc7.h b/include/linux/crc7.h index 1786e772d5c6..d590765106f3 100644 --- a/include/linux/crc7.h +++ b/include/linux/crc7.h @@ -2,13 +2,13 @@ #define _LINUX_CRC7_H #include -extern const u8 crc7_syndrome_table[256]; +extern const u8 crc7_be_syndrome_table[256]; -static inline u8 crc7_byte(u8 crc, u8 data) +static inline u8 crc7_be_byte(u8 crc, u8 data) { - return crc7_syndrome_table[(crc << 1) ^ data]; + return crc7_be_syndrome_table[crc ^ data]; } -extern u8 crc7(u8 crc, const u8 *buffer, size_t len); +extern u8 crc7_be(u8 crc, const u8 *buffer, size_t len); #endif diff --git a/lib/crc7.c b/lib/crc7.c index f1c3a144cec1..bf6255e23919 100644 --- a/lib/crc7.c +++ b/lib/crc7.c @@ -10,42 +10,47 @@ #include -/* Table for CRC-7 (polynomial x^7 + x^3 + 1) */ -const u8 crc7_syndrome_table[256] = { - 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, - 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, - 0x19, 0x10, 0x0b, 0x02, 0x3d, 0x34, 0x2f, 0x26, - 0x51, 0x58, 0x43, 0x4a, 0x75, 0x7c, 0x67, 0x6e, - 0x32, 0x3b, 0x20, 0x29, 0x16, 0x1f, 0x04, 0x0d, - 0x7a, 0x73, 0x68, 0x61, 0x5e, 0x57, 0x4c, 0x45, - 0x2b, 0x22, 0x39, 0x30, 0x0f, 0x06, 0x1d, 0x14, - 0x63, 0x6a, 0x71, 0x78, 0x47, 0x4e, 0x55, 0x5c, - 0x64, 0x6d, 0x76, 0x7f, 0x40, 0x49, 0x52, 0x5b, - 0x2c, 0x25, 0x3e, 0x37, 0x08, 0x01, 0x1a, 0x13, - 0x7d, 0x74, 0x6f, 0x66, 0x59, 0x50, 0x4b, 0x42, - 0x35, 0x3c, 0x27, 0x2e, 0x11, 0x18, 0x03, 0x0a, - 0x56, 0x5f, 0x44, 0x4d, 0x72, 0x7b, 0x60, 0x69, - 0x1e, 0x17, 0x0c, 0x05, 0x3a, 0x33, 0x28, 0x21, - 0x4f, 0x46, 0x5d, 0x54, 0x6b, 0x62, 0x79, 0x70, - 0x07, 0x0e, 0x15, 0x1c, 0x23, 0x2a, 0x31, 0x38, - 0x41, 0x48, 0x53, 0x5a, 0x65, 0x6c, 0x77, 0x7e, - 0x09, 0x00, 0x1b, 0x12, 0x2d, 0x24, 0x3f, 0x36, - 0x58, 0x51, 0x4a, 0x43, 0x7c, 0x75, 0x6e, 0x67, - 0x10, 0x19, 0x02, 0x0b, 0x34, 0x3d, 0x26, 0x2f, - 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c, - 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, - 0x6a, 0x63, 0x78, 0x71, 0x4e, 0x47, 0x5c, 0x55, - 0x22, 0x2b, 0x30, 0x39, 0x06, 0x0f, 0x14, 0x1d, - 0x25, 0x2c, 0x37, 0x3e, 0x01, 0x08, 0x13, 0x1a, - 0x6d, 0x64, 0x7f, 0x76, 0x49, 0x40, 0x5b, 0x52, - 0x3c, 0x35, 0x2e, 0x27, 0x18, 0x11, 0x0a, 0x03, - 0x74, 0x7d, 0x66, 0x6f, 0x50, 0x59, 0x42, 0x4b, - 0x17, 0x1e, 0x05, 0x0c, 0x33, 0x3a, 0x21, 0x28, - 0x5f, 0x56, 0x4d, 0x44, 0x7b, 0x72, 0x69, 0x60, - 0x0e, 0x07, 0x1c, 0x15, 0x2a, 0x23, 0x38, 0x31, - 0x46, 0x4f, 0x54, 0x5d, 0x62, 0x6b, 0x70, 0x79 +/* + * Table for CRC-7 (polynomial x^7 + x^3 + 1). + * This is a big-endian CRC (msbit is highest power of x), + * aligned so the msbit of the byte is the x^6 coefficient + * and the lsbit is not used. + */ +const u8 crc7_be_syndrome_table[256] = { + 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, + 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee, + 0x32, 0x20, 0x16, 0x04, 0x7a, 0x68, 0x5e, 0x4c, + 0xa2, 0xb0, 0x86, 0x94, 0xea, 0xf8, 0xce, 0xdc, + 0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x08, 0x1a, + 0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a, + 0x56, 0x44, 0x72, 0x60, 0x1e, 0x0c, 0x3a, 0x28, + 0xc6, 0xd4, 0xe2, 0xf0, 0x8e, 0x9c, 0xaa, 0xb8, + 0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6, + 0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x02, 0x34, 0x26, + 0xfa, 0xe8, 0xde, 0xcc, 0xb2, 0xa0, 0x96, 0x84, + 0x6a, 0x78, 0x4e, 0x5c, 0x22, 0x30, 0x06, 0x14, + 0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6, 0xc0, 0xd2, + 0x3c, 0x2e, 0x18, 0x0a, 0x74, 0x66, 0x50, 0x42, + 0x9e, 0x8c, 0xba, 0xa8, 0xd6, 0xc4, 0xf2, 0xe0, + 0x0e, 0x1c, 0x2a, 0x38, 0x46, 0x54, 0x62, 0x70, + 0x82, 0x90, 0xa6, 0xb4, 0xca, 0xd8, 0xee, 0xfc, + 0x12, 0x00, 0x36, 0x24, 0x5a, 0x48, 0x7e, 0x6c, + 0xb0, 0xa2, 0x94, 0x86, 0xf8, 0xea, 0xdc, 0xce, + 0x20, 0x32, 0x04, 0x16, 0x68, 0x7a, 0x4c, 0x5e, + 0xe6, 0xf4, 0xc2, 0xd0, 0xae, 0xbc, 0x8a, 0x98, + 0x76, 0x64, 0x52, 0x40, 0x3e, 0x2c, 0x1a, 0x08, + 0xd4, 0xc6, 0xf0, 0xe2, 0x9c, 0x8e, 0xb8, 0xaa, + 0x44, 0x56, 0x60, 0x72, 0x0c, 0x1e, 0x28, 0x3a, + 0x4a, 0x58, 0x6e, 0x7c, 0x02, 0x10, 0x26, 0x34, + 0xda, 0xc8, 0xfe, 0xec, 0x92, 0x80, 0xb6, 0xa4, + 0x78, 0x6a, 0x5c, 0x4e, 0x30, 0x22, 0x14, 0x06, + 0xe8, 0xfa, 0xcc, 0xde, 0xa0, 0xb2, 0x84, 0x96, + 0x2e, 0x3c, 0x0a, 0x18, 0x66, 0x74, 0x42, 0x50, + 0xbe, 0xac, 0x9a, 0x88, 0xf6, 0xe4, 0xd2, 0xc0, + 0x1c, 0x0e, 0x38, 0x2a, 0x54, 0x46, 0x70, 0x62, + 0x8c, 0x9e, 0xa8, 0xba, 0xc4, 0xd6, 0xe0, 0xf2 }; -EXPORT_SYMBOL(crc7_syndrome_table); +EXPORT_SYMBOL(crc7_be_syndrome_table); /** * crc7 - update the CRC7 for the data buffer @@ -55,14 +60,17 @@ EXPORT_SYMBOL(crc7_syndrome_table); * Context: any * * Returns the updated CRC7 value. + * The CRC7 is left-aligned in the byte (the lsbit is always 0), as that + * makes the computation easier, and all callers want it in that form. + * */ -u8 crc7(u8 crc, const u8 *buffer, size_t len) +u8 crc7_be(u8 crc, const u8 *buffer, size_t len) { while (len--) - crc = crc7_byte(crc, *buffer++); + crc = crc7_be_byte(crc, *buffer++); return crc; } -EXPORT_SYMBOL(crc7); +EXPORT_SYMBOL(crc7_be); MODULE_DESCRIPTION("CRC7 calculations"); MODULE_LICENSE("GPL"); -- cgit v1.2.3-71-gd317 From a75951217472c522c324adb0a4de3ba69d656ef5 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 16 May 2014 16:14:04 +0200 Subject: net: phy: extend fixed driver with fixed_phy_register() The existing fixed_phy_add() function has several drawbacks that prevents it from being used as is for OF-based declaration of fixed PHYs: * The address of the PHY on the fake bus needs to be passed, while a dynamic allocation is desired. * Since the phy_device instantiation is post-poned until the next mdiobus scan, there is no way to associate the fixed PHY with its OF node, which later prevents of_phy_connect() from finding this fixed PHY from a given OF node. To solve this, this commit introduces fixed_phy_register(), which will allocate an available PHY address, add the PHY using fixed_phy_add() and instantiate the phy_device structure associated with the provided OF node. Signed-off-by: Thomas Petazzoni Acked-by: Florian Fainelli Acked-by: Grant Likely Tested-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/fixed.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/phy_fixed.h | 11 +++++++++ 2 files changed, 72 insertions(+) (limited to 'include') diff --git a/drivers/net/phy/fixed.c b/drivers/net/phy/fixed.c index e41546da105e..d60d875cb445 100644 --- a/drivers/net/phy/fixed.c +++ b/drivers/net/phy/fixed.c @@ -21,6 +21,7 @@ #include #include #include +#include #define MII_REGS_NUM 29 @@ -203,6 +204,66 @@ err_regs: } EXPORT_SYMBOL_GPL(fixed_phy_add); +void fixed_phy_del(int phy_addr) +{ + struct fixed_mdio_bus *fmb = &platform_fmb; + struct fixed_phy *fp, *tmp; + + list_for_each_entry_safe(fp, tmp, &fmb->phys, node) { + if (fp->addr == phy_addr) { + list_del(&fp->node); + kfree(fp); + return; + } + } +} +EXPORT_SYMBOL_GPL(fixed_phy_del); + +static int phy_fixed_addr; +static DEFINE_SPINLOCK(phy_fixed_addr_lock); + +int fixed_phy_register(unsigned int irq, + struct fixed_phy_status *status, + struct device_node *np) +{ + struct fixed_mdio_bus *fmb = &platform_fmb; + struct phy_device *phy; + int phy_addr; + int ret; + + /* Get the next available PHY address, up to PHY_MAX_ADDR */ + spin_lock(&phy_fixed_addr_lock); + if (phy_fixed_addr == PHY_MAX_ADDR) { + spin_unlock(&phy_fixed_addr_lock); + return -ENOSPC; + } + phy_addr = phy_fixed_addr++; + spin_unlock(&phy_fixed_addr_lock); + + ret = fixed_phy_add(PHY_POLL, phy_addr, status); + if (ret < 0) + return ret; + + phy = get_phy_device(fmb->mii_bus, phy_addr, false); + if (!phy || IS_ERR(phy)) { + fixed_phy_del(phy_addr); + return -EINVAL; + } + + of_node_get(np); + phy->dev.of_node = np; + + ret = phy_device_register(phy); + if (ret) { + phy_device_free(phy); + of_node_put(np); + fixed_phy_del(phy_addr); + return ret; + } + + return 0; +} + static int __init fixed_mdio_bus_init(void) { struct fixed_mdio_bus *fmb = &platform_fmb; diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 509d8f5f984e..4f2478b47136 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -9,15 +9,26 @@ struct fixed_phy_status { int asym_pause; }; +struct device_node; + #ifdef CONFIG_FIXED_PHY extern int fixed_phy_add(unsigned int irq, int phy_id, struct fixed_phy_status *status); +extern int fixed_phy_register(unsigned int irq, + struct fixed_phy_status *status, + struct device_node *np); #else static inline int fixed_phy_add(unsigned int irq, int phy_id, struct fixed_phy_status *status) { return -ENODEV; } +static inline int fixed_phy_register(unsigned int irq, + struct fixed_phy_status *status, + struct device_node *np) +{ + return -ENODEV; +} #endif /* CONFIG_FIXED_PHY */ /* -- cgit v1.2.3-71-gd317 From 3be2a49e5c08d268f8af0dd4fe89a24ea8cdc339 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 16 May 2014 16:14:05 +0200 Subject: of: provide a binding for fixed link PHYs Some Ethernet MACs have a "fixed link", and are not connected to a normal MDIO-managed PHY device. For those situations, a Device Tree binding allows to describe a "fixed link" using a special PHY node. This patch adds: * A documentation for the fixed PHY Device Tree binding. * An of_phy_is_fixed_link() function that an Ethernet driver can call on its PHY phandle to find out whether it's a fixed link PHY or not. It should typically be used to know if of_phy_register_fixed_link() should be called. * An of_phy_register_fixed_link() function that instantiates the fixed PHY into the PHY subsystem, so that when the driver calls of_phy_connect(), the PHY device associated to the OF node will be found. These two additional functions also support the old fixed-link Device Tree binding used on PowerPC platforms, so that ultimately, the network device drivers for those platforms could be converted to use of_phy_is_fixed_link() and of_phy_register_fixed_link() instead of of_phy_connect_fixed_link(), while keeping compatibility with their respective Device Tree bindings. Signed-off-by: Thomas Petazzoni Reviewed-by: Florian Fainelli Tested-by: Florian Fainelli Signed-off-by: David S. Miller --- .../devicetree/bindings/net/fixed-link.txt | 30 ++++++++++ drivers/of/of_mdio.c | 67 ++++++++++++++++++++++ include/linux/of_mdio.h | 15 +++++ 3 files changed, 112 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/fixed-link.txt (limited to 'include') diff --git a/Documentation/devicetree/bindings/net/fixed-link.txt b/Documentation/devicetree/bindings/net/fixed-link.txt new file mode 100644 index 000000000000..e956de1be935 --- /dev/null +++ b/Documentation/devicetree/bindings/net/fixed-link.txt @@ -0,0 +1,30 @@ +Fixed link Device Tree binding +------------------------------ + +Some Ethernet MACs have a "fixed link", and are not connected to a +normal MDIO-managed PHY device. For those situations, a Device Tree +binding allows to describe a "fixed link". + +Such a fixed link situation is described by creating a 'fixed-link' +sub-node of the Ethernet MAC device node, with the following +properties: + +* 'speed' (integer, mandatory), to indicate the link speed. Accepted + values are 10, 100 and 1000 +* 'full-duplex' (boolean, optional), to indicate that full duplex is + used. When absent, half duplex is assumed. +* 'pause' (boolean, optional), to indicate that pause should be + enabled. +* 'asym-pause' (boolean, optional), to indicate that asym_pause should + be enabled. + +Example: + +ethernet@0 { + ... + fixed-link { + speed = <1000>; + full-duplex; + }; + ... +}; diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 9a95831bd065..1def0bb5cb37 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -301,3 +302,69 @@ struct phy_device *of_phy_attach(struct net_device *dev, return phy_attach_direct(dev, phy, flags, iface) ? NULL : phy; } EXPORT_SYMBOL(of_phy_attach); + +#if defined(CONFIG_FIXED_PHY) +/* + * of_phy_is_fixed_link() and of_phy_register_fixed_link() must + * support two DT bindings: + * - the old DT binding, where 'fixed-link' was a property with 5 + * cells encoding various informations about the fixed PHY + * - the new DT binding, where 'fixed-link' is a sub-node of the + * Ethernet device. + */ +bool of_phy_is_fixed_link(struct device_node *np) +{ + struct device_node *dn; + int len; + + /* New binding */ + dn = of_get_child_by_name(np, "fixed-link"); + if (dn) { + of_node_put(dn); + return true; + } + + /* Old binding */ + if (of_get_property(np, "fixed-link", &len) && + len == (5 * sizeof(__be32))) + return true; + + return false; +} +EXPORT_SYMBOL(of_phy_is_fixed_link); + +int of_phy_register_fixed_link(struct device_node *np) +{ + struct fixed_phy_status status = {}; + struct device_node *fixed_link_node; + const __be32 *fixed_link_prop; + int len; + + /* New binding */ + fixed_link_node = of_get_child_by_name(np, "fixed-link"); + if (fixed_link_node) { + status.link = 1; + status.duplex = of_property_read_bool(np, "full-duplex"); + if (of_property_read_u32(fixed_link_node, "speed", &status.speed)) + return -EINVAL; + status.pause = of_property_read_bool(np, "pause"); + status.asym_pause = of_property_read_bool(np, "asym-pause"); + of_node_put(fixed_link_node); + return fixed_phy_register(PHY_POLL, &status, np); + } + + /* Old binding */ + fixed_link_prop = of_get_property(np, "fixed-link", &len); + if (fixed_link_prop && len == (5 * sizeof(__be32))) { + status.link = 1; + status.duplex = be32_to_cpu(fixed_link_prop[1]); + status.speed = be32_to_cpu(fixed_link_prop[2]); + status.pause = be32_to_cpu(fixed_link_prop[3]); + status.asym_pause = be32_to_cpu(fixed_link_prop[4]); + return fixed_phy_register(PHY_POLL, &status, np); + } + + return -ENODEV; +} +EXPORT_SYMBOL(of_phy_register_fixed_link); +#endif diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index 881a7c3571f4..0aa367e316cb 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -72,4 +72,19 @@ static inline struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np) } #endif /* CONFIG_OF */ +#if defined(CONFIG_OF) && defined(CONFIG_FIXED_PHY) +extern int of_phy_register_fixed_link(struct device_node *np); +extern bool of_phy_is_fixed_link(struct device_node *np); +#else +static inline int of_phy_register_fixed_link(struct device_node *np) +{ + return -ENOSYS; +} +static inline bool of_phy_is_fixed_link(struct device_node *np) +{ + return false; +} +#endif + + #endif /* __LINUX_OF_MDIO_H */ -- cgit v1.2.3-71-gd317 From dc20759f281c0f9e6c4fca8be251deca2954862a Mon Sep 17 00:00:00 2001 From: Phoebe Buckheister Date: Fri, 16 May 2014 17:46:35 +0200 Subject: ieee802154: add types for link-layer security The added structures match 802.15.4-2011 link-layer security PIBs as closely as is reasonable. Some lists required by the standard were modeled as bitmaps (frame_types and command_frame_ids in *llsec_key, 802.15.4-2011 7.5/Table 61), since using lists for those seems a bit excessive and not particularly useful. The DeviceDescriptorHandleList was inverted and is here a per-device list, since operations on this list are likely to have both a key and a device at hand, and per-device lists of keys are shorter than per-key lists of devices. Signed-off-by: Phoebe Buckheister Signed-off-by: David S. Miller --- include/net/ieee802154_netdev.h | 95 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) (limited to 'include') diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index bc9a7475e57e..6f8f9c2f6037 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -242,6 +242,88 @@ static inline struct ieee802154_mac_cb *mac_cb_init(struct sk_buff *skb) return mac_cb(skb); } +#define IEEE802154_LLSEC_KEY_SIZE 16 + +struct ieee802154_llsec_key_id { + u8 mode; + u8 id; + union { + struct ieee802154_addr device_addr; + __le32 short_source; + __le64 extended_source; + }; +}; + +struct ieee802154_llsec_key { + u8 frame_types; + u32 cmd_frame_ids; + u8 key[IEEE802154_LLSEC_KEY_SIZE]; +}; + +struct ieee802154_llsec_key_entry { + struct list_head list; + + struct ieee802154_llsec_key_id id; + struct ieee802154_llsec_key *key; +}; + +struct ieee802154_llsec_device_key { + struct list_head list; + + struct ieee802154_llsec_key_id key_id; + u32 frame_counter; +}; + +enum { + IEEE802154_LLSEC_DEVKEY_IGNORE, + IEEE802154_LLSEC_DEVKEY_RESTRICT, + + __IEEE802154_LLSEC_DEVKEY_MAX, +}; + +struct ieee802154_llsec_device { + struct list_head list; + + __le16 pan_id; + __le16 short_addr; + __le64 hwaddr; + u32 frame_counter; + bool seclevel_exempt; + + u8 key_mode; + struct list_head keys; +}; + +struct ieee802154_llsec_seclevel { + struct list_head list; + + u8 frame_type; + u8 cmd_frame_id; + bool device_override; + u32 sec_levels; +}; + +struct ieee802154_llsec_params { + bool enabled; + + __be32 frame_counter; + u8 out_level; + struct ieee802154_llsec_key_id out_key; + + __le64 default_key_source; + + __le16 pan_id; + __le64 hwaddr; + __le64 coord_hwaddr; + __le16 coord_shortaddr; +}; + +struct ieee802154_llsec_table { + struct list_head keys; + struct list_head devices; + struct list_head security_levels; +}; + #define IEEE802154_MAC_SCAN_ED 0 #define IEEE802154_MAC_SCAN_ACTIVE 1 #define IEEE802154_MAC_SCAN_PASSIVE 2 @@ -260,6 +342,19 @@ struct ieee802154_mac_params { }; struct wpan_phy; + +enum { + IEEE802154_LLSEC_PARAM_ENABLED = 1 << 0, + IEEE802154_LLSEC_PARAM_FRAME_COUNTER = 1 << 1, + IEEE802154_LLSEC_PARAM_OUT_LEVEL = 1 << 2, + IEEE802154_LLSEC_PARAM_OUT_KEY = 1 << 3, + IEEE802154_LLSEC_PARAM_KEY_SOURCE = 1 << 4, + IEEE802154_LLSEC_PARAM_PAN_ID = 1 << 5, + IEEE802154_LLSEC_PARAM_HWADDR = 1 << 6, + IEEE802154_LLSEC_PARAM_COORD_HWADDR = 1 << 7, + IEEE802154_LLSEC_PARAM_COORD_SHORTADDR = 1 << 8, +}; + /* * This should be located at net_device->ml_priv * -- cgit v1.2.3-71-gd317 From f30be4d53cada48598dab0983866ae4b16af46dc Mon Sep 17 00:00:00 2001 From: Phoebe Buckheister Date: Fri, 16 May 2014 17:46:40 +0200 Subject: mac802154: integrate llsec with wpan devices Signed-off-by: Phoebe Buckheister Signed-off-by: David S. Miller --- include/net/ieee802154_netdev.h | 3 + net/mac802154/mac802154.h | 10 ++++ net/mac802154/wpan.c | 118 ++++++++++++++++++++++++++++++---------- 3 files changed, 103 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index 6f8f9c2f6037..000c8552d5de 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -225,6 +225,9 @@ struct ieee802154_mac_cb { u8 type; bool ackreq; bool secen; + bool secen_override; + u8 seclevel; + bool seclevel_override; struct ieee802154_addr source; struct ieee802154_addr dest; }; diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h index e05f66e2eda3..a8d7cbc701a0 100644 --- a/net/mac802154/mac802154.h +++ b/net/mac802154/mac802154.h @@ -23,9 +23,12 @@ #ifndef MAC802154_H #define MAC802154_H +#include #include #include +#include "llsec.h" + /* mac802154 device private data */ struct mac802154_priv { struct ieee802154_dev hw; @@ -91,6 +94,13 @@ struct mac802154_sub_if_data { u8 bsn; /* MAC DSN field */ u8 dsn; + + /* protects sec from concurrent access by netlink. access by + * encrypt/decrypt/header_create safe without additional protection. + */ + struct mutex sec_mtx; + + struct mac802154_llsec sec; }; #define mac802154_to_priv(_hw) container_of(_hw, struct mac802154_priv, hw) diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c index ed105774c15d..00729ca1e30a 100644 --- a/net/mac802154/wpan.c +++ b/net/mac802154/wpan.c @@ -183,6 +183,38 @@ out: return rc; } +static int mac802154_set_header_security(struct mac802154_sub_if_data *priv, + struct ieee802154_hdr *hdr, + const struct ieee802154_mac_cb *cb) +{ + struct ieee802154_llsec_params params; + u8 level; + + mac802154_llsec_get_params(&priv->sec, ¶ms); + + if (!params.enabled && cb->secen_override && cb->secen) + return -EINVAL; + if (!params.enabled || + (cb->secen_override && !cb->secen) || + !params.out_level) + return 0; + if (cb->seclevel_override && !cb->seclevel) + return -EINVAL; + + level = cb->seclevel_override ? cb->seclevel : params.out_level; + + hdr->fc.security_enabled = 1; + hdr->sec.level = level; + hdr->sec.key_id_mode = params.out_key.mode; + if (params.out_key.mode == IEEE802154_SCF_KEY_SHORT_INDEX) + hdr->sec.short_src = params.out_key.short_source; + else if (params.out_key.mode == IEEE802154_SCF_KEY_HW_INDEX) + hdr->sec.extended_src = params.out_key.extended_source; + hdr->sec.key_id = params.out_key.id; + + return 0; +} + static int mac802154_header_create(struct sk_buff *skb, struct net_device *dev, unsigned short type, @@ -204,6 +236,9 @@ static int mac802154_header_create(struct sk_buff *skb, hdr.fc.ack_request = cb->ackreq; hdr.seq = ieee802154_mlme_ops(dev)->get_dsn(dev); + if (mac802154_set_header_security(priv, &hdr, cb) < 0) + return -EINVAL; + if (!saddr) { spin_lock_bh(&priv->mib_lock); @@ -259,6 +294,7 @@ mac802154_wpan_xmit(struct sk_buff *skb, struct net_device *dev) { struct mac802154_sub_if_data *priv; u8 chan, page; + int rc; priv = netdev_priv(dev); @@ -274,6 +310,13 @@ mac802154_wpan_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } + rc = mac802154_llsec_encrypt(&priv->sec, skb); + if (rc) { + pr_warn("encryption failed: %i\n", rc); + kfree_skb(skb); + return NETDEV_TX_OK; + } + skb->skb_iif = dev->ifindex; dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; @@ -294,6 +337,15 @@ static const struct net_device_ops mac802154_wpan_ops = { .ndo_set_mac_address = mac802154_wpan_mac_addr, }; +static void mac802154_wpan_free(struct net_device *dev) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + + mac802154_llsec_destroy(&priv->sec); + + free_netdev(dev); +} + void mac802154_wpan_setup(struct net_device *dev) { struct mac802154_sub_if_data *priv; @@ -303,14 +355,14 @@ void mac802154_wpan_setup(struct net_device *dev) dev->hard_header_len = MAC802154_FRAME_HARD_HEADER_LEN; dev->header_ops = &mac802154_header_ops; - dev->needed_tailroom = 2; /* FCS */ + dev->needed_tailroom = 2 + 16; /* FCS + MIC */ dev->mtu = IEEE802154_MTU; dev->tx_queue_len = 300; dev->type = ARPHRD_IEEE802154; dev->flags = IFF_NOARP | IFF_BROADCAST; dev->watchdog_timeo = 0; - dev->destructor = free_netdev; + dev->destructor = mac802154_wpan_free; dev->netdev_ops = &mac802154_wpan_ops; dev->ml_priv = &mac802154_mlme_wpan; @@ -321,6 +373,7 @@ void mac802154_wpan_setup(struct net_device *dev) priv->page = 0; spin_lock_init(&priv->mib_lock); + mutex_init(&priv->sec_mtx); get_random_bytes(&priv->bsn, 1); get_random_bytes(&priv->dsn, 1); @@ -333,6 +386,8 @@ void mac802154_wpan_setup(struct net_device *dev) priv->pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST); priv->short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST); + + mac802154_llsec_init(&priv->sec); } static int mac802154_process_data(struct net_device *dev, struct sk_buff *skb) @@ -341,9 +396,11 @@ static int mac802154_process_data(struct net_device *dev, struct sk_buff *skb) } static int -mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb) +mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb, + const struct ieee802154_hdr *hdr) { __le16 span, sshort; + int rc; pr_debug("getting packet via slave interface %s\n", sdata->dev->name); @@ -390,6 +447,12 @@ mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb) skb->dev = sdata->dev; + rc = mac802154_llsec_decrypt(&sdata->sec, skb); + if (rc) { + pr_debug("decryption failed: %i\n", rc); + return NET_RX_DROP; + } + sdata->dev->stats.rx_packets++; sdata->dev->stats.rx_bytes += skb->len; @@ -421,60 +484,58 @@ static void mac802154_print_addr(const char *name, } } -static int mac802154_parse_frame_start(struct sk_buff *skb) +static int mac802154_parse_frame_start(struct sk_buff *skb, + struct ieee802154_hdr *hdr) { int hlen; - struct ieee802154_hdr hdr; struct ieee802154_mac_cb *cb = mac_cb_init(skb); - hlen = ieee802154_hdr_pull(skb, &hdr); + hlen = ieee802154_hdr_pull(skb, hdr); if (hlen < 0) return -EINVAL; skb->mac_len = hlen; - pr_debug("fc: %04x dsn: %02x\n", le16_to_cpup((__le16 *)&hdr.fc), - hdr.seq); + pr_debug("fc: %04x dsn: %02x\n", le16_to_cpup((__le16 *)&hdr->fc), + hdr->seq); - cb->type = hdr.fc.type; - cb->ackreq = hdr.fc.ack_request; - cb->secen = hdr.fc.security_enabled; + cb->type = hdr->fc.type; + cb->ackreq = hdr->fc.ack_request; + cb->secen = hdr->fc.security_enabled; - mac802154_print_addr("destination", &hdr.dest); - mac802154_print_addr("source", &hdr.source); + mac802154_print_addr("destination", &hdr->dest); + mac802154_print_addr("source", &hdr->source); - cb->source = hdr.source; - cb->dest = hdr.dest; + cb->source = hdr->source; + cb->dest = hdr->dest; - if (hdr.fc.security_enabled) { + if (hdr->fc.security_enabled) { u64 key; - pr_debug("seclevel %i\n", hdr.sec.level); + pr_debug("seclevel %i\n", hdr->sec.level); - switch (hdr.sec.key_id_mode) { + switch (hdr->sec.key_id_mode) { case IEEE802154_SCF_KEY_IMPLICIT: pr_debug("implicit key\n"); break; case IEEE802154_SCF_KEY_INDEX: - pr_debug("key %02x\n", hdr.sec.key_id); + pr_debug("key %02x\n", hdr->sec.key_id); break; case IEEE802154_SCF_KEY_SHORT_INDEX: pr_debug("key %04x:%04x %02x\n", - le32_to_cpu(hdr.sec.short_src) >> 16, - le32_to_cpu(hdr.sec.short_src) & 0xffff, - hdr.sec.key_id); + le32_to_cpu(hdr->sec.short_src) >> 16, + le32_to_cpu(hdr->sec.short_src) & 0xffff, + hdr->sec.key_id); break; case IEEE802154_SCF_KEY_HW_INDEX: - key = swab64((__force u64) hdr.sec.extended_src); + key = swab64((__force u64) hdr->sec.extended_src); pr_debug("key source %8phC %02x\n", &key, - hdr.sec.key_id); + hdr->sec.key_id); break; } - - return -EINVAL; } return 0; @@ -485,8 +546,9 @@ void mac802154_wpans_rx(struct mac802154_priv *priv, struct sk_buff *skb) int ret; struct sk_buff *sskb; struct mac802154_sub_if_data *sdata; + struct ieee802154_hdr hdr; - ret = mac802154_parse_frame_start(skb); + ret = mac802154_parse_frame_start(skb, &hdr); if (ret) { pr_debug("got invalid frame\n"); return; @@ -499,7 +561,7 @@ void mac802154_wpans_rx(struct mac802154_priv *priv, struct sk_buff *skb) sskb = skb_clone(skb, GFP_ATOMIC); if (sskb) - mac802154_subif_frame(sdata, sskb); + mac802154_subif_frame(sdata, sskb, &hdr); } rcu_read_unlock(); } -- cgit v1.2.3-71-gd317 From af9eed5bbf0fb4e398081e79a707545dcca5ebda Mon Sep 17 00:00:00 2001 From: Phoebe Buckheister Date: Fri, 16 May 2014 17:46:41 +0200 Subject: ieee802154: add dgram sockopts for security control Allow datagram sockets to override the security settings of the device they send from on a per-socket basis. Requires CAP_NET_ADMIN or CAP_NET_RAW, since raw sockets can send arbitrary packets anyway. Signed-off-by: Phoebe Buckheister Signed-off-by: David S. Miller --- include/net/af_ieee802154.h | 10 ++++++- net/ieee802154/dgram.c | 66 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/af_ieee802154.h b/include/net/af_ieee802154.h index f79ae2aa76d6..085940f7eeec 100644 --- a/include/net/af_ieee802154.h +++ b/include/net/af_ieee802154.h @@ -57,6 +57,14 @@ struct sockaddr_ieee802154 { /* get/setsockopt */ #define SOL_IEEE802154 0 -#define WPAN_WANTACK 0 +#define WPAN_WANTACK 0 +#define WPAN_SECURITY 1 +#define WPAN_SECURITY_LEVEL 2 + +#define WPAN_SECURITY_DEFAULT 0 +#define WPAN_SECURITY_OFF 1 +#define WPAN_SECURITY_ON 2 + +#define WPAN_SECURITY_LEVEL_DEFAULT (-1) #endif diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index 76c77256a56e..4f0ed8780194 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -21,6 +21,7 @@ * Dmitry Eremin-Solenikov */ +#include #include #include #include @@ -47,6 +48,10 @@ struct dgram_sock { unsigned int bound:1; unsigned int connected:1; unsigned int want_ack:1; + unsigned int secen:1; + unsigned int secen_override:1; + unsigned int seclevel:3; + unsigned int seclevel_override:1; }; static inline struct dgram_sock *dgram_sk(const struct sock *sk) @@ -264,6 +269,11 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, dst_addr = ro->dst_addr; } + cb->secen = ro->secen; + cb->secen_override = ro->secen_override; + cb->seclevel = ro->seclevel; + cb->seclevel_override = ro->seclevel_override; + err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &dst_addr, ro->bound ? &ro->src_addr : NULL, size); if (err < 0) @@ -427,6 +437,20 @@ static int dgram_getsockopt(struct sock *sk, int level, int optname, case WPAN_WANTACK: val = ro->want_ack; break; + case WPAN_SECURITY: + if (!ro->secen_override) + val = WPAN_SECURITY_DEFAULT; + else if (ro->secen) + val = WPAN_SECURITY_ON; + else + val = WPAN_SECURITY_OFF; + break; + case WPAN_SECURITY_LEVEL: + if (!ro->seclevel_override) + val = WPAN_SECURITY_LEVEL_DEFAULT; + else + val = ro->seclevel; + break; default: return -ENOPROTOOPT; } @@ -442,6 +466,7 @@ static int dgram_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { struct dgram_sock *ro = dgram_sk(sk); + struct net *net = sock_net(sk); int val; int err = 0; @@ -457,6 +482,47 @@ static int dgram_setsockopt(struct sock *sk, int level, int optname, case WPAN_WANTACK: ro->want_ack = !!val; break; + case WPAN_SECURITY: + if (!ns_capable(net->user_ns, CAP_NET_ADMIN) && + !ns_capable(net->user_ns, CAP_NET_RAW)) { + err = -EPERM; + break; + } + + switch (val) { + case WPAN_SECURITY_DEFAULT: + ro->secen_override = 0; + break; + case WPAN_SECURITY_ON: + ro->secen_override = 1; + ro->secen = 1; + break; + case WPAN_SECURITY_OFF: + ro->secen_override = 1; + ro->secen = 0; + break; + default: + err = -EINVAL; + break; + } + break; + case WPAN_SECURITY_LEVEL: + if (!ns_capable(net->user_ns, CAP_NET_ADMIN) && + !ns_capable(net->user_ns, CAP_NET_RAW)) { + err = -EPERM; + break; + } + + if (val < WPAN_SECURITY_LEVEL_DEFAULT || + val > IEEE802154_SCF_SECLEVEL_ENC_MIC128) { + err = -EINVAL; + } else if (val == WPAN_SECURITY_LEVEL_DEFAULT) { + ro->seclevel_override = 0; + } else { + ro->seclevel_override = 1; + ro->seclevel = val; + } + break; default: err = -ENOPROTOOPT; break; -- cgit v1.2.3-71-gd317 From 29e023746a672e4ff702ca9dc63a06145fd8f4b0 Mon Sep 17 00:00:00 2001 From: Phoebe Buckheister Date: Fri, 16 May 2014 17:46:42 +0200 Subject: mac802154: add llsec configuration functions Signed-off-by: Phoebe Buckheister Signed-off-by: David S. Miller --- include/net/ieee802154_netdev.h | 36 ++++++++ net/mac802154/mac802154.h | 33 +++++++ net/mac802154/mac_cmd.c | 18 ++++ net/mac802154/mib.c | 187 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 274 insertions(+) (limited to 'include') diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index 000c8552d5de..eb9f850a51b6 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -358,6 +358,40 @@ enum { IEEE802154_LLSEC_PARAM_COORD_SHORTADDR = 1 << 8, }; +struct ieee802154_llsec_ops { + int (*get_params)(struct net_device *dev, + struct ieee802154_llsec_params *params); + int (*set_params)(struct net_device *dev, + const struct ieee802154_llsec_params *params, + int changed); + + int (*add_key)(struct net_device *dev, + const struct ieee802154_llsec_key_id *id, + const struct ieee802154_llsec_key *key); + int (*del_key)(struct net_device *dev, + const struct ieee802154_llsec_key_id *id); + + int (*add_dev)(struct net_device *dev, + const struct ieee802154_llsec_device *llsec_dev); + int (*del_dev)(struct net_device *dev, __le64 dev_addr); + + int (*add_devkey)(struct net_device *dev, + __le64 device_addr, + const struct ieee802154_llsec_device_key *key); + int (*del_devkey)(struct net_device *dev, + __le64 device_addr, + const struct ieee802154_llsec_device_key *key); + + int (*add_seclevel)(struct net_device *dev, + const struct ieee802154_llsec_seclevel *sl); + int (*del_seclevel)(struct net_device *dev, + const struct ieee802154_llsec_seclevel *sl); + + void (*lock_table)(struct net_device *dev); + void (*get_table)(struct net_device *dev, + struct ieee802154_llsec_table **t); + void (*unlock_table)(struct net_device *dev); +}; /* * This should be located at net_device->ml_priv * @@ -388,6 +422,8 @@ struct ieee802154_mlme_ops { void (*get_mac_params)(struct net_device *dev, struct ieee802154_mac_params *params); + struct ieee802154_llsec_ops *llsec; + /* The fields below are required. */ struct wpan_phy *(*get_phy)(const struct net_device *dev); diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h index a8d7cbc701a0..762a6f849c6b 100644 --- a/net/mac802154/mac802154.h +++ b/net/mac802154/mac802154.h @@ -136,4 +136,37 @@ int mac802154_set_mac_params(struct net_device *dev, void mac802154_get_mac_params(struct net_device *dev, struct ieee802154_mac_params *params); +int mac802154_get_params(struct net_device *dev, + struct ieee802154_llsec_params *params); +int mac802154_set_params(struct net_device *dev, + const struct ieee802154_llsec_params *params, + int changed); + +int mac802154_add_key(struct net_device *dev, + const struct ieee802154_llsec_key_id *id, + const struct ieee802154_llsec_key *key); +int mac802154_del_key(struct net_device *dev, + const struct ieee802154_llsec_key_id *id); + +int mac802154_add_dev(struct net_device *dev, + const struct ieee802154_llsec_device *llsec_dev); +int mac802154_del_dev(struct net_device *dev, __le64 dev_addr); + +int mac802154_add_devkey(struct net_device *dev, + __le64 device_addr, + const struct ieee802154_llsec_device_key *key); +int mac802154_del_devkey(struct net_device *dev, + __le64 device_addr, + const struct ieee802154_llsec_device_key *key); + +int mac802154_add_seclevel(struct net_device *dev, + const struct ieee802154_llsec_seclevel *sl); +int mac802154_del_seclevel(struct net_device *dev, + const struct ieee802154_llsec_seclevel *sl); + +void mac802154_lock_table(struct net_device *dev); +void mac802154_get_table(struct net_device *dev, + struct ieee802154_llsec_table **t); +void mac802154_unlock_table(struct net_device *dev); + #endif /* MAC802154_H */ diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c index d40c0928bc62..afb4e2cbc00a 100644 --- a/net/mac802154/mac_cmd.c +++ b/net/mac802154/mac_cmd.c @@ -64,6 +64,22 @@ static struct wpan_phy *mac802154_get_phy(const struct net_device *dev) return to_phy(get_device(&priv->hw->phy->dev)); } +static struct ieee802154_llsec_ops mac802154_llsec_ops = { + .get_params = mac802154_get_params, + .set_params = mac802154_set_params, + .add_key = mac802154_add_key, + .del_key = mac802154_del_key, + .add_dev = mac802154_add_dev, + .del_dev = mac802154_del_dev, + .add_devkey = mac802154_add_devkey, + .del_devkey = mac802154_del_devkey, + .add_seclevel = mac802154_add_seclevel, + .del_seclevel = mac802154_del_seclevel, + .lock_table = mac802154_lock_table, + .get_table = mac802154_get_table, + .unlock_table = mac802154_unlock_table, +}; + struct ieee802154_reduced_mlme_ops mac802154_mlme_reduced = { .get_phy = mac802154_get_phy, }; @@ -75,6 +91,8 @@ struct ieee802154_mlme_ops mac802154_mlme_wpan = { .get_short_addr = mac802154_dev_get_short_addr, .get_dsn = mac802154_dev_get_dsn, + .llsec = &mac802154_llsec_ops, + .set_mac_params = mac802154_set_mac_params, .get_mac_params = mac802154_get_mac_params, }; diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c index f0991f2344d4..15aa2f2b03a7 100644 --- a/net/mac802154/mib.c +++ b/net/mac802154/mib.c @@ -213,3 +213,190 @@ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan) } else mutex_unlock(&priv->hw->phy->pib_lock); } + + +int mac802154_get_params(struct net_device *dev, + struct ieee802154_llsec_params *params) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_get_params(&priv->sec, params); + mutex_unlock(&priv->sec_mtx); + + return res; +} + +int mac802154_set_params(struct net_device *dev, + const struct ieee802154_llsec_params *params, + int changed) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_set_params(&priv->sec, params, changed); + mutex_unlock(&priv->sec_mtx); + + return res; +} + + +int mac802154_add_key(struct net_device *dev, + const struct ieee802154_llsec_key_id *id, + const struct ieee802154_llsec_key *key) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_key_add(&priv->sec, id, key); + mutex_unlock(&priv->sec_mtx); + + return res; +} + +int mac802154_del_key(struct net_device *dev, + const struct ieee802154_llsec_key_id *id) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_key_del(&priv->sec, id); + mutex_unlock(&priv->sec_mtx); + + return res; +} + + +int mac802154_add_dev(struct net_device *dev, + const struct ieee802154_llsec_device *llsec_dev) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_dev_add(&priv->sec, llsec_dev); + mutex_unlock(&priv->sec_mtx); + + return res; +} + +int mac802154_del_dev(struct net_device *dev, __le64 dev_addr) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_dev_del(&priv->sec, dev_addr); + mutex_unlock(&priv->sec_mtx); + + return res; +} + + +int mac802154_add_devkey(struct net_device *dev, + __le64 device_addr, + const struct ieee802154_llsec_device_key *key) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_devkey_add(&priv->sec, device_addr, key); + mutex_unlock(&priv->sec_mtx); + + return res; +} + +int mac802154_del_devkey(struct net_device *dev, + __le64 device_addr, + const struct ieee802154_llsec_device_key *key) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_devkey_del(&priv->sec, device_addr, key); + mutex_unlock(&priv->sec_mtx); + + return res; +} + + +int mac802154_add_seclevel(struct net_device *dev, + const struct ieee802154_llsec_seclevel *sl) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_seclevel_add(&priv->sec, sl); + mutex_unlock(&priv->sec_mtx); + + return res; +} + +int mac802154_del_seclevel(struct net_device *dev, + const struct ieee802154_llsec_seclevel *sl) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_seclevel_del(&priv->sec, sl); + mutex_unlock(&priv->sec_mtx); + + return res; +} + + +void mac802154_lock_table(struct net_device *dev) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); +} + +void mac802154_get_table(struct net_device *dev, + struct ieee802154_llsec_table **t) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + *t = &priv->sec.table; +} + +void mac802154_unlock_table(struct net_device *dev) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_unlock(&priv->sec_mtx); +} -- cgit v1.2.3-71-gd317 From 3e9c156e2c210ab67b12b1b692983a6b97c19d3f Mon Sep 17 00:00:00 2001 From: Phoebe Buckheister Date: Fri, 16 May 2014 17:46:44 +0200 Subject: ieee802154: add netlink interfaces for llsec This patch adds user-visible interfaces for the llsec infrastructure. For the added methods, the only major difference between all add/remove implementation lies in how the specific object is parsed, and for dump requests, how objects are written into netlink messages. To save on boilerplate code, table dumps are routed through a helper function that handles netlink dump state, leaving the actual dumping code to care only about iterating over the table to be dumped and filling netlink messages. For add/remove methods, the boilerplate required to work is not quite as large, but still enough to also move into a local helper. Signed-off-by: Phoebe Buckheister Signed-off-by: David S. Miller --- include/linux/nl802154.h | 31 ++ net/ieee802154/ieee802154.h | 19 ++ net/ieee802154/netlink.c | 20 ++ net/ieee802154/nl-mac.c | 807 ++++++++++++++++++++++++++++++++++++++++++++ net/ieee802154/nl_policy.c | 16 + 5 files changed, 893 insertions(+) (limited to 'include') diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h index c8d7f3965fff..20163b9a0eae 100644 --- a/include/linux/nl802154.h +++ b/include/linux/nl802154.h @@ -80,6 +80,22 @@ enum { IEEE802154_ATTR_FRAME_RETRIES, + IEEE802154_ATTR_LLSEC_ENABLED, + IEEE802154_ATTR_LLSEC_SECLEVEL, + IEEE802154_ATTR_LLSEC_KEY_MODE, + IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT, + IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED, + IEEE802154_ATTR_LLSEC_KEY_ID, + IEEE802154_ATTR_LLSEC_FRAME_COUNTER, + IEEE802154_ATTR_LLSEC_KEY_BYTES, + IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES, + IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS, + IEEE802154_ATTR_LLSEC_FRAME_TYPE, + IEEE802154_ATTR_LLSEC_CMD_FRAME_ID, + IEEE802154_ATTR_LLSEC_SECLEVELS, + IEEE802154_ATTR_LLSEC_DEV_OVERRIDE, + IEEE802154_ATTR_LLSEC_DEV_KEY_MODE, + __IEEE802154_ATTR_MAX, }; @@ -134,6 +150,21 @@ enum { IEEE802154_SET_MACPARAMS, + IEEE802154_LLSEC_GETPARAMS, + IEEE802154_LLSEC_SETPARAMS, + IEEE802154_LLSEC_LIST_KEY, + IEEE802154_LLSEC_ADD_KEY, + IEEE802154_LLSEC_DEL_KEY, + IEEE802154_LLSEC_LIST_DEV, + IEEE802154_LLSEC_ADD_DEV, + IEEE802154_LLSEC_DEL_DEV, + IEEE802154_LLSEC_LIST_DEVKEY, + IEEE802154_LLSEC_ADD_DEVKEY, + IEEE802154_LLSEC_DEL_DEVKEY, + IEEE802154_LLSEC_LIST_SECLEVEL, + IEEE802154_LLSEC_ADD_SECLEVEL, + IEEE802154_LLSEC_DEL_SECLEVEL, + __IEEE802154_CMD_MAX, }; diff --git a/net/ieee802154/ieee802154.h b/net/ieee802154/ieee802154.h index 6693a5cf01ce..8b83a231299e 100644 --- a/net/ieee802154/ieee802154.h +++ b/net/ieee802154/ieee802154.h @@ -68,4 +68,23 @@ int ieee802154_list_iface(struct sk_buff *skb, struct genl_info *info); int ieee802154_dump_iface(struct sk_buff *skb, struct netlink_callback *cb); int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_setparams(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_add_key(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_del_key(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_dump_keys(struct sk_buff *skb, + struct netlink_callback *cb); +int ieee802154_llsec_add_dev(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_del_dev(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_dump_devs(struct sk_buff *skb, + struct netlink_callback *cb); +int ieee802154_llsec_add_devkey(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_del_devkey(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_dump_devkeys(struct sk_buff *skb, + struct netlink_callback *cb); +int ieee802154_llsec_add_seclevel(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_del_seclevel(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_dump_seclevels(struct sk_buff *skb, + struct netlink_callback *cb); + #endif diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 04b20589d97a..26efcf4fd2ff 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -124,6 +124,26 @@ static const struct genl_ops ieee8021154_ops[] = { IEEE802154_DUMP(IEEE802154_LIST_IFACE, ieee802154_list_iface, ieee802154_dump_iface), IEEE802154_OP(IEEE802154_SET_MACPARAMS, ieee802154_set_macparams), + IEEE802154_OP(IEEE802154_LLSEC_GETPARAMS, ieee802154_llsec_getparams), + IEEE802154_OP(IEEE802154_LLSEC_SETPARAMS, ieee802154_llsec_setparams), + IEEE802154_DUMP(IEEE802154_LLSEC_LIST_KEY, NULL, + ieee802154_llsec_dump_keys), + IEEE802154_OP(IEEE802154_LLSEC_ADD_KEY, ieee802154_llsec_add_key), + IEEE802154_OP(IEEE802154_LLSEC_DEL_KEY, ieee802154_llsec_del_key), + IEEE802154_DUMP(IEEE802154_LLSEC_LIST_DEV, NULL, + ieee802154_llsec_dump_devs), + IEEE802154_OP(IEEE802154_LLSEC_ADD_DEV, ieee802154_llsec_add_dev), + IEEE802154_OP(IEEE802154_LLSEC_DEL_DEV, ieee802154_llsec_del_dev), + IEEE802154_DUMP(IEEE802154_LLSEC_LIST_DEVKEY, NULL, + ieee802154_llsec_dump_devkeys), + IEEE802154_OP(IEEE802154_LLSEC_ADD_DEVKEY, ieee802154_llsec_add_devkey), + IEEE802154_OP(IEEE802154_LLSEC_DEL_DEVKEY, ieee802154_llsec_del_devkey), + IEEE802154_DUMP(IEEE802154_LLSEC_LIST_SECLEVEL, NULL, + ieee802154_llsec_dump_seclevels), + IEEE802154_OP(IEEE802154_LLSEC_ADD_SECLEVEL, + ieee802154_llsec_add_seclevel), + IEEE802154_OP(IEEE802154_LLSEC_DEL_SECLEVEL, + ieee802154_llsec_del_seclevel), }; static const struct genl_multicast_group ieee802154_mcgrps[] = { diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 5d285498c0f6..5617b4c6d6d5 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -715,3 +715,810 @@ out: dev_put(dev); return rc; } + + + +static int +ieee802154_llsec_parse_key_id(struct genl_info *info, + struct ieee802154_llsec_key_id *desc) +{ + memset(desc, 0, sizeof(*desc)); + + if (!info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]) + return -EINVAL; + + desc->mode = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]); + + if (desc->mode == IEEE802154_SCF_KEY_IMPLICIT) { + if (!info->attrs[IEEE802154_ATTR_PAN_ID] && + !(info->attrs[IEEE802154_ATTR_SHORT_ADDR] || + info->attrs[IEEE802154_ATTR_HW_ADDR])) + return -EINVAL; + + desc->device_addr.pan_id = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_PAN_ID]); + + if (info->attrs[IEEE802154_ATTR_SHORT_ADDR]) { + desc->device_addr.mode = IEEE802154_ADDR_SHORT; + desc->device_addr.short_addr = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_SHORT_ADDR]); + } else { + desc->device_addr.mode = IEEE802154_ADDR_LONG; + desc->device_addr.extended_addr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); + } + } + + if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT && + !info->attrs[IEEE802154_ATTR_LLSEC_KEY_ID]) + return -EINVAL; + + if (desc->mode == IEEE802154_SCF_KEY_SHORT_INDEX && + !info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT]) + return -EINVAL; + + if (desc->mode == IEEE802154_SCF_KEY_HW_INDEX && + !info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED]) + return -EINVAL; + + if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT) + desc->id = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_ID]); + + switch (desc->mode) { + case IEEE802154_SCF_KEY_SHORT_INDEX: + { + u32 source = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT]); + desc->short_source = cpu_to_le32(source); + break; + } + case IEEE802154_SCF_KEY_HW_INDEX: + desc->extended_source = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED]); + break; + } + + return 0; +} + +static int +ieee802154_llsec_fill_key_id(struct sk_buff *msg, + const struct ieee802154_llsec_key_id *desc) +{ + if (nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_MODE, desc->mode)) + return -EMSGSIZE; + + if (desc->mode == IEEE802154_SCF_KEY_IMPLICIT) { + if (nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID, + desc->device_addr.pan_id)) + return -EMSGSIZE; + + if (desc->device_addr.mode == IEEE802154_ADDR_SHORT && + nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR, + desc->device_addr.short_addr)) + return -EMSGSIZE; + + if (desc->device_addr.mode == IEEE802154_ADDR_LONG && + nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, + desc->device_addr.extended_addr)) + return -EMSGSIZE; + } + + if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT && + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_ID, desc->id)) + return -EMSGSIZE; + + if (desc->mode == IEEE802154_SCF_KEY_SHORT_INDEX && + nla_put_u32(msg, IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT, + le32_to_cpu(desc->short_source))) + return -EMSGSIZE; + + if (desc->mode == IEEE802154_SCF_KEY_HW_INDEX && + nla_put_hwaddr(msg, IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED, + desc->extended_source)) + return -EMSGSIZE; + + return 0; +} + +int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *msg; + struct net_device *dev = NULL; + int rc = -ENOBUFS; + struct ieee802154_mlme_ops *ops; + void *hdr; + struct ieee802154_llsec_params params; + + pr_debug("%s\n", __func__); + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + ops = ieee802154_mlme_ops(dev); + if (!ops->llsec) + return -EOPNOTSUPP; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + goto out_dev; + + hdr = genlmsg_put(msg, 0, info->snd_seq, &nl802154_family, 0, + IEEE802154_LLSEC_GETPARAMS); + if (!hdr) + goto out_free; + + rc = ops->llsec->get_params(dev, ¶ms); + if (rc < 0) + goto out_free; + + if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || + nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_ENABLED, params.enabled) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVEL, params.out_level) || + nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER, + be32_to_cpu(params.frame_counter)) || + ieee802154_llsec_fill_key_id(msg, ¶ms.out_key)) + goto out_free; + + dev_put(dev); + + return ieee802154_nl_reply(msg, info); +out_free: + nlmsg_free(msg); +out_dev: + dev_put(dev); + return rc; +} + +int ieee802154_llsec_setparams(struct sk_buff *skb, struct genl_info *info) +{ + struct net_device *dev = NULL; + int rc = -EINVAL; + struct ieee802154_mlme_ops *ops; + struct ieee802154_llsec_params params; + int changed = 0; + + pr_debug("%s\n", __func__); + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + if (!info->attrs[IEEE802154_ATTR_LLSEC_ENABLED] && + !info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE] && + !info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]) + goto out; + + ops = ieee802154_mlme_ops(dev); + if (!ops->llsec) { + rc = -EOPNOTSUPP; + goto out; + } + + if (info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL] && + nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]) > 7) + goto out; + + if (info->attrs[IEEE802154_ATTR_LLSEC_ENABLED]) { + params.enabled = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_ENABLED]); + changed |= IEEE802154_LLSEC_PARAM_ENABLED; + } + + if (info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]) { + if (ieee802154_llsec_parse_key_id(info, ¶ms.out_key)) + goto out; + + changed |= IEEE802154_LLSEC_PARAM_OUT_KEY; + } + + if (info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]) { + params.out_level = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]); + changed |= IEEE802154_LLSEC_PARAM_OUT_LEVEL; + } + + if (info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]) { + u32 fc = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]); + + params.frame_counter = cpu_to_be32(fc); + changed |= IEEE802154_LLSEC_PARAM_FRAME_COUNTER; + } + + rc = ops->llsec->set_params(dev, ¶ms, changed); + + dev_put(dev); + + return rc; +out: + dev_put(dev); + return rc; +} + + + +struct llsec_dump_data { + struct sk_buff *skb; + int s_idx, s_idx2; + int portid; + int nlmsg_seq; + struct net_device *dev; + struct ieee802154_mlme_ops *ops; + struct ieee802154_llsec_table *table; +}; + +static int +ieee802154_llsec_dump_table(struct sk_buff *skb, struct netlink_callback *cb, + int (*step)(struct llsec_dump_data*)) +{ + struct net *net = sock_net(skb->sk); + struct net_device *dev; + struct llsec_dump_data data; + int idx = 0; + int first_dev = cb->args[0]; + int rc; + + for_each_netdev(net, dev) { + if (idx < first_dev || dev->type != ARPHRD_IEEE802154) + goto skip; + + data.ops = ieee802154_mlme_ops(dev); + if (!data.ops->llsec) + goto skip; + + data.skb = skb; + data.s_idx = cb->args[1]; + data.s_idx2 = cb->args[2]; + data.dev = dev; + data.portid = NETLINK_CB(cb->skb).portid; + data.nlmsg_seq = cb->nlh->nlmsg_seq; + + data.ops->llsec->lock_table(dev); + data.ops->llsec->get_table(data.dev, &data.table); + rc = step(&data); + data.ops->llsec->unlock_table(dev); + + if (rc < 0) + break; + +skip: + idx++; + } + cb->args[0] = idx; + + return skb->len; +} + +static int +ieee802154_nl_llsec_change(struct sk_buff *skb, struct genl_info *info, + int (*fn)(struct net_device*, struct genl_info*)) +{ + struct net_device *dev = NULL; + int rc = -EINVAL; + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + if (!ieee802154_mlme_ops(dev)->llsec) + rc = -EOPNOTSUPP; + else + rc = fn(dev, info); + + dev_put(dev); + return rc; +} + + + +static int +ieee802154_llsec_parse_key(struct genl_info *info, + struct ieee802154_llsec_key *key) +{ + u8 frames; + u32 commands[256 / 32]; + + memset(key, 0, sizeof(*key)); + + if (!info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES] || + !info->attrs[IEEE802154_ATTR_LLSEC_KEY_BYTES]) + return -EINVAL; + + frames = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES]); + if ((frames & BIT(IEEE802154_FC_TYPE_MAC_CMD)) && + !info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS]) + return -EINVAL; + + if (info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS]) { + nla_memcpy(commands, + info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS], + 256 / 8); + + if (commands[0] || commands[1] || commands[2] || commands[3] || + commands[4] || commands[5] || commands[6] || + commands[7] >= BIT(IEEE802154_CMD_GTS_REQ + 1)) + return -EINVAL; + + key->cmd_frame_ids = commands[7]; + } + + key->frame_types = frames; + + nla_memcpy(key->key, info->attrs[IEEE802154_ATTR_LLSEC_KEY_BYTES], + IEEE802154_LLSEC_KEY_SIZE); + + return 0; +} + +static int llsec_add_key(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_key key; + struct ieee802154_llsec_key_id id; + + if (ieee802154_llsec_parse_key(info, &key) || + ieee802154_llsec_parse_key_id(info, &id)) + return -EINVAL; + + return ops->llsec->add_key(dev, &id, &key); +} + +int ieee802154_llsec_add_key(struct sk_buff *skb, struct genl_info *info) +{ + if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) != + (NLM_F_CREATE | NLM_F_EXCL)) + return -EINVAL; + + return ieee802154_nl_llsec_change(skb, info, llsec_add_key); +} + +static int llsec_remove_key(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_key_id id; + + if (ieee802154_llsec_parse_key_id(info, &id)) + return -EINVAL; + + return ops->llsec->del_key(dev, &id); +} + +int ieee802154_llsec_del_key(struct sk_buff *skb, struct genl_info *info) +{ + return ieee802154_nl_llsec_change(skb, info, llsec_remove_key); +} + +static int +ieee802154_nl_fill_key(struct sk_buff *msg, u32 portid, u32 seq, + const struct ieee802154_llsec_key_entry *key, + const struct net_device *dev) +{ + void *hdr; + u32 commands[256 / 32]; + + hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI, + IEEE802154_LLSEC_LIST_KEY); + if (!hdr) + goto out; + + if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || + nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || + ieee802154_llsec_fill_key_id(msg, &key->id) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES, + key->key->frame_types)) + goto nla_put_failure; + + if (key->key->frame_types & BIT(IEEE802154_FC_TYPE_MAC_CMD)) { + memset(commands, 0, sizeof(commands)); + commands[7] = key->key->cmd_frame_ids; + if (nla_put(msg, IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS, + sizeof(commands), commands)) + goto nla_put_failure; + } + + if (nla_put(msg, IEEE802154_ATTR_LLSEC_KEY_BYTES, + IEEE802154_LLSEC_KEY_SIZE, key->key->key)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +out: + return -EMSGSIZE; +} + +static int llsec_iter_keys(struct llsec_dump_data *data) +{ + struct ieee802154_llsec_key_entry *pos; + int rc = 0, idx = 0; + + list_for_each_entry(pos, &data->table->keys, list) { + if (idx++ < data->s_idx) + continue; + + if (ieee802154_nl_fill_key(data->skb, data->portid, + data->nlmsg_seq, pos, data->dev)) { + rc = -EMSGSIZE; + break; + } + + data->s_idx++; + } + + return rc; +} + +int ieee802154_llsec_dump_keys(struct sk_buff *skb, struct netlink_callback *cb) +{ + return ieee802154_llsec_dump_table(skb, cb, llsec_iter_keys); +} + + + +static int +llsec_parse_dev(struct genl_info *info, + struct ieee802154_llsec_device *dev) +{ + memset(dev, 0, sizeof(*dev)); + + if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER] || + !info->attrs[IEEE802154_ATTR_HW_ADDR] || + !info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE] || + !info->attrs[IEEE802154_ATTR_LLSEC_DEV_KEY_MODE] || + (!!info->attrs[IEEE802154_ATTR_PAN_ID] != + !!info->attrs[IEEE802154_ATTR_SHORT_ADDR])) + return -EINVAL; + + if (info->attrs[IEEE802154_ATTR_PAN_ID]) { + dev->pan_id = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_PAN_ID]); + dev->short_addr = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_SHORT_ADDR]); + } else { + dev->short_addr = cpu_to_le16(IEEE802154_ADDR_UNDEF); + } + + dev->hwaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); + dev->frame_counter = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]); + dev->seclevel_exempt = !!nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE]); + dev->key_mode = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_KEY_MODE]); + + if (dev->key_mode >= __IEEE802154_LLSEC_DEVKEY_MAX) + return -EINVAL; + + return 0; +} + +static int llsec_add_dev(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_device desc; + + if (llsec_parse_dev(info, &desc)) + return -EINVAL; + + return ops->llsec->add_dev(dev, &desc); +} + +int ieee802154_llsec_add_dev(struct sk_buff *skb, struct genl_info *info) +{ + if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) != + (NLM_F_CREATE | NLM_F_EXCL)) + return -EINVAL; + + return ieee802154_nl_llsec_change(skb, info, llsec_add_dev); +} + +static int llsec_del_dev(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + __le64 devaddr; + + if (!info->attrs[IEEE802154_ATTR_HW_ADDR]) + return -EINVAL; + + devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); + + return ops->llsec->del_dev(dev, devaddr); +} + +int ieee802154_llsec_del_dev(struct sk_buff *skb, struct genl_info *info) +{ + return ieee802154_nl_llsec_change(skb, info, llsec_del_dev); +} + +static int +ieee802154_nl_fill_dev(struct sk_buff *msg, u32 portid, u32 seq, + const struct ieee802154_llsec_device *desc, + const struct net_device *dev) +{ + void *hdr; + + hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI, + IEEE802154_LLSEC_LIST_DEV); + if (!hdr) + goto out; + + if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || + nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || + nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID, desc->pan_id) || + nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR, + desc->short_addr) || + nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, desc->hwaddr) || + nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER, + desc->frame_counter) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_OVERRIDE, + desc->seclevel_exempt) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_KEY_MODE, desc->key_mode)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +out: + return -EMSGSIZE; +} + +static int llsec_iter_devs(struct llsec_dump_data *data) +{ + struct ieee802154_llsec_device *pos; + int rc = 0, idx = 0; + + list_for_each_entry(pos, &data->table->devices, list) { + if (idx++ < data->s_idx) + continue; + + if (ieee802154_nl_fill_dev(data->skb, data->portid, + data->nlmsg_seq, pos, data->dev)) { + rc = -EMSGSIZE; + break; + } + + data->s_idx++; + } + + return rc; +} + +int ieee802154_llsec_dump_devs(struct sk_buff *skb, struct netlink_callback *cb) +{ + return ieee802154_llsec_dump_table(skb, cb, llsec_iter_devs); +} + + + +static int llsec_add_devkey(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_device_key key; + __le64 devaddr; + + if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER] || + !info->attrs[IEEE802154_ATTR_HW_ADDR] || + ieee802154_llsec_parse_key_id(info, &key.key_id)) + return -EINVAL; + + devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); + key.frame_counter = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]); + + return ops->llsec->add_devkey(dev, devaddr, &key); +} + +int ieee802154_llsec_add_devkey(struct sk_buff *skb, struct genl_info *info) +{ + if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) != + (NLM_F_CREATE | NLM_F_EXCL)) + return -EINVAL; + + return ieee802154_nl_llsec_change(skb, info, llsec_add_devkey); +} + +static int llsec_del_devkey(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_device_key key; + __le64 devaddr; + + if (!info->attrs[IEEE802154_ATTR_HW_ADDR] || + ieee802154_llsec_parse_key_id(info, &key.key_id)) + return -EINVAL; + + devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); + + return ops->llsec->del_devkey(dev, devaddr, &key); +} + +int ieee802154_llsec_del_devkey(struct sk_buff *skb, struct genl_info *info) +{ + return ieee802154_nl_llsec_change(skb, info, llsec_del_devkey); +} + +static int +ieee802154_nl_fill_devkey(struct sk_buff *msg, u32 portid, u32 seq, + __le64 devaddr, + const struct ieee802154_llsec_device_key *devkey, + const struct net_device *dev) +{ + void *hdr; + + hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI, + IEEE802154_LLSEC_LIST_DEVKEY); + if (!hdr) + goto out; + + if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || + nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || + nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, devaddr) || + nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER, + devkey->frame_counter) || + ieee802154_llsec_fill_key_id(msg, &devkey->key_id)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +out: + return -EMSGSIZE; +} + +static int llsec_iter_devkeys(struct llsec_dump_data *data) +{ + struct ieee802154_llsec_device *dpos; + struct ieee802154_llsec_device_key *kpos; + int rc = 0, idx = 0, idx2; + + list_for_each_entry(dpos, &data->table->devices, list) { + if (idx++ < data->s_idx) + continue; + + idx2 = 0; + + list_for_each_entry(kpos, &dpos->keys, list) { + if (idx2++ < data->s_idx2) + continue; + + if (ieee802154_nl_fill_devkey(data->skb, data->portid, + data->nlmsg_seq, + dpos->hwaddr, kpos, + data->dev)) { + return rc = -EMSGSIZE; + } + + data->s_idx2++; + } + + data->s_idx++; + } + + return rc; +} + +int ieee802154_llsec_dump_devkeys(struct sk_buff *skb, + struct netlink_callback *cb) +{ + return ieee802154_llsec_dump_table(skb, cb, llsec_iter_devkeys); +} + + + +static int +llsec_parse_seclevel(struct genl_info *info, + struct ieee802154_llsec_seclevel *sl) +{ + memset(sl, 0, sizeof(*sl)); + + if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_TYPE] || + !info->attrs[IEEE802154_ATTR_LLSEC_SECLEVELS] || + !info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE]) + return -EINVAL; + + sl->frame_type = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_TYPE]); + if (sl->frame_type == IEEE802154_FC_TYPE_MAC_CMD) { + if (!info->attrs[IEEE802154_ATTR_LLSEC_CMD_FRAME_ID]) + return -EINVAL; + + sl->cmd_frame_id = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_CMD_FRAME_ID]); + } + + sl->sec_levels = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVELS]); + sl->device_override = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE]); + + return 0; +} + +static int llsec_add_seclevel(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_seclevel sl; + + if (llsec_parse_seclevel(info, &sl)) + return -EINVAL; + + return ops->llsec->add_seclevel(dev, &sl); +} + +int ieee802154_llsec_add_seclevel(struct sk_buff *skb, struct genl_info *info) +{ + if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) != + (NLM_F_CREATE | NLM_F_EXCL)) + return -EINVAL; + + return ieee802154_nl_llsec_change(skb, info, llsec_add_seclevel); +} + +static int llsec_del_seclevel(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_seclevel sl; + + if (llsec_parse_seclevel(info, &sl)) + return -EINVAL; + + return ops->llsec->del_seclevel(dev, &sl); +} + +int ieee802154_llsec_del_seclevel(struct sk_buff *skb, struct genl_info *info) +{ + return ieee802154_nl_llsec_change(skb, info, llsec_del_seclevel); +} + +static int +ieee802154_nl_fill_seclevel(struct sk_buff *msg, u32 portid, u32 seq, + const struct ieee802154_llsec_seclevel *sl, + const struct net_device *dev) +{ + void *hdr; + + hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI, + IEEE802154_LLSEC_LIST_SECLEVEL); + if (!hdr) + goto out; + + if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || + nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_FRAME_TYPE, sl->frame_type) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVELS, sl->sec_levels) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_OVERRIDE, + sl->device_override)) + goto nla_put_failure; + + if (sl->frame_type == IEEE802154_FC_TYPE_MAC_CMD && + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_CMD_FRAME_ID, + sl->cmd_frame_id)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +out: + return -EMSGSIZE; +} + +static int llsec_iter_seclevels(struct llsec_dump_data *data) +{ + struct ieee802154_llsec_seclevel *pos; + int rc = 0, idx = 0; + + list_for_each_entry(pos, &data->table->security_levels, list) { + if (idx++ < data->s_idx) + continue; + + if (ieee802154_nl_fill_seclevel(data->skb, data->portid, + data->nlmsg_seq, pos, + data->dev)) { + rc = -EMSGSIZE; + break; + } + + data->s_idx++; + } + + return rc; +} + +int ieee802154_llsec_dump_seclevels(struct sk_buff *skb, + struct netlink_callback *cb) +{ + return ieee802154_llsec_dump_table(skb, cb, llsec_iter_seclevels); +} diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c index fd7be5e45cef..3a703ab88348 100644 --- a/net/ieee802154/nl_policy.c +++ b/net/ieee802154/nl_policy.c @@ -62,5 +62,21 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_CSMA_MAX_BE] = { .type = NLA_U8, }, [IEEE802154_ATTR_FRAME_RETRIES] = { .type = NLA_S8, }, + + [IEEE802154_ATTR_LLSEC_ENABLED] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_SECLEVEL] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_KEY_MODE] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT] = { .type = NLA_U32, }, + [IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED] = { .type = NLA_HW_ADDR, }, + [IEEE802154_ATTR_LLSEC_KEY_ID] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_FRAME_COUNTER] = { .type = NLA_U32 }, + [IEEE802154_ATTR_LLSEC_KEY_BYTES] = { .len = 16, }, + [IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS] = { .len = 258 / 8 }, + [IEEE802154_ATTR_LLSEC_FRAME_TYPE] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_CMD_FRAME_ID] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_SECLEVELS] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_DEV_OVERRIDE] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_DEV_KEY_MODE] = { .type = NLA_U8, }, }; -- cgit v1.2.3-71-gd317 From f0f77dc6be76ed1854b08688390e156e4b351ab5 Mon Sep 17 00:00:00 2001 From: Phoebe Buckheister Date: Fri, 16 May 2014 17:46:45 +0200 Subject: ieee802154, mac802154: implement devkey record option The 802.15.4-2011 standard states that for each key, a list of devices that use this key shall be kept. Previous patches have only considered two options: * a device "uses" (or may use) all keys, rendering the list useless * a device is restricted to a certain set of keys Another option would be that a device *may* use all keys, but need not do so, and we are interested in the actual set of keys the device uses. Recording keys used by any given device may have a noticable performance impact and might not be needed as often. The common case, in which a device will not switch keys too often, should still perform well. Signed-off-by: Phoebe Buckheister Signed-off-by: David S. Miller --- include/net/ieee802154_netdev.h | 1 + net/mac802154/llsec.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'include') diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index eb9f850a51b6..3b53c8e405e4 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -280,6 +280,7 @@ struct ieee802154_llsec_device_key { enum { IEEE802154_LLSEC_DEVKEY_IGNORE, IEEE802154_LLSEC_DEVKEY_RESTRICT, + IEEE802154_LLSEC_DEVKEY_RECORD, __IEEE802154_LLSEC_DEVKEY_MAX, }; diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c index 392653b1b5a3..a83674edaafd 100644 --- a/net/mac802154/llsec.c +++ b/net/mac802154/llsec.c @@ -920,6 +920,37 @@ llsec_do_decrypt(struct sk_buff *skb, const struct mac802154_llsec *sec, return llsec_do_decrypt_auth(skb, sec, hdr, key, dev_addr); } +static int +llsec_update_devkey_record(struct mac802154_llsec_device *dev, + const struct ieee802154_llsec_key_id *in_key) +{ + struct mac802154_llsec_device_key *devkey; + + devkey = llsec_devkey_find(dev, in_key); + + if (!devkey) { + struct mac802154_llsec_device_key *next; + + next = kzalloc(sizeof(*devkey), GFP_ATOMIC); + if (!next) + return -ENOMEM; + + next->devkey.key_id = *in_key; + + spin_lock_bh(&dev->lock); + + devkey = llsec_devkey_find(dev, in_key); + if (!devkey) + list_add_rcu(&next->devkey.list, &dev->dev.keys); + else + kfree(next); + + spin_unlock_bh(&dev->lock); + } + + return 0; +} + static int llsec_update_devkey_info(struct mac802154_llsec_device *dev, const struct ieee802154_llsec_key_id *in_key, @@ -933,6 +964,13 @@ llsec_update_devkey_info(struct mac802154_llsec_device *dev, return -ENOENT; } + if (dev->dev.key_mode == IEEE802154_LLSEC_DEVKEY_RECORD) { + int rc = llsec_update_devkey_record(dev, in_key); + + if (rc < 0) + return rc; + } + spin_lock_bh(&dev->lock); if ((!devkey && frame_counter < dev->dev.frame_counter) || -- cgit v1.2.3-71-gd317 From 6c4e548ff36672eeb78f8288a2920d66fa4a6a66 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 16 May 2014 21:48:22 +0200 Subject: net: cdc_ncm: use ethtool to tune coalescing settings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Datagram coalescing is an integral part of the NCM and MBIM protocols, intended to reduce the interrupt load primarily on the device end of the USB link. As with all coalescing solutions, there is a trade-off between buffering and interrupts. The current defaults are based on the assumption that device side buffers should be the limiting factor. However, many modern high speed LTE modems suffers from buffer-bloat, making this assumption fail. This results in sub-optimal performance due to excessive coalescing. And in cases where such modems are connected to cheap embedded hosts there is often severe buffer allocation issues, giving very noticeable performance degradation . A start on improving this is going from build time hard coded limits to per device user configurable limits. The ethtool coalescing API was selected as user interface because, although the tuned values are buffer sizes, these settings directly control datagram coalescing. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 71 +++++++++++++++++++++++++++++++++++++++++++-- include/linux/usb/cdc_ncm.h | 6 +++- 2 files changed, 74 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 2ec3790a4db8..141dbec912be 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -65,6 +65,67 @@ static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx); static enum hrtimer_restart cdc_ncm_tx_timer_cb(struct hrtimer *hr_timer); static struct usb_driver cdc_ncm_driver; +static int cdc_ncm_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + struct usbnet *dev = netdev_priv(netdev); + struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; + + /* assuming maximum sized dgrams and ignoring NDPs */ + ec->rx_max_coalesced_frames = ctx->rx_max / ctx->max_datagram_size; + ec->tx_max_coalesced_frames = ctx->tx_max / ctx->max_datagram_size; + + /* the timer will fire CDC_NCM_TIMER_PENDING_CNT times in a row */ + ec->tx_coalesce_usecs = (ctx->timer_interval * CDC_NCM_TIMER_PENDING_CNT) / NSEC_PER_USEC; + return 0; +} + +static void cdc_ncm_update_rxtx_max(struct usbnet *dev, u32 new_rx, u32 new_tx); + +static int cdc_ncm_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + struct usbnet *dev = netdev_priv(netdev); + struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; + u32 new_rx_max = ctx->rx_max; + u32 new_tx_max = ctx->tx_max; + + /* assuming maximum sized dgrams and a single NDP */ + if (ec->rx_max_coalesced_frames) + new_rx_max = ec->rx_max_coalesced_frames * ctx->max_datagram_size; + if (ec->tx_max_coalesced_frames) + new_tx_max = ec->tx_max_coalesced_frames * ctx->max_datagram_size; + + if (ec->tx_coalesce_usecs && + (ec->tx_coalesce_usecs < CDC_NCM_TIMER_INTERVAL_MIN * CDC_NCM_TIMER_PENDING_CNT || + ec->tx_coalesce_usecs > CDC_NCM_TIMER_INTERVAL_MAX * CDC_NCM_TIMER_PENDING_CNT)) + return -EINVAL; + + spin_lock_bh(&ctx->mtx); + ctx->timer_interval = ec->tx_coalesce_usecs * NSEC_PER_USEC / CDC_NCM_TIMER_PENDING_CNT; + if (!ctx->timer_interval) + ctx->tx_timer_pending = 0; + spin_unlock_bh(&ctx->mtx); + + /* inform device of new values */ + if (new_rx_max != ctx->rx_max || new_tx_max != ctx->tx_max) + cdc_ncm_update_rxtx_max(dev, new_rx_max, new_tx_max); + return 0; +} + +static const struct ethtool_ops cdc_ncm_ethtool_ops = { + .get_settings = usbnet_get_settings, + .set_settings = usbnet_set_settings, + .get_link = usbnet_get_link, + .nway_reset = usbnet_nway_reset, + .get_drvinfo = usbnet_get_drvinfo, + .get_msglevel = usbnet_get_msglevel, + .set_msglevel = usbnet_set_msglevel, + .get_ts_info = ethtool_op_get_ts_info, + .get_coalesce = cdc_ncm_get_coalesce, + .set_coalesce = cdc_ncm_set_coalesce, +}; + /* handle rx_max and tx_max changes */ static void cdc_ncm_update_rxtx_max(struct usbnet *dev, u32 new_rx, u32 new_tx) { @@ -257,6 +318,9 @@ static int cdc_ncm_init(struct usbnet *dev) (ctx->tx_max_datagrams > CDC_NCM_DPT_DATAGRAMS_MAX)) ctx->tx_max_datagrams = CDC_NCM_DPT_DATAGRAMS_MAX; + /* initial coalescing timer interval */ + ctx->timer_interval = CDC_NCM_TIMER_INTERVAL_USEC * NSEC_PER_USEC; + return 0; } @@ -596,6 +660,9 @@ advance: /* finish setting up the device specific data */ cdc_ncm_setup(dev); + /* override ethtool_ops */ + dev->net->ethtool_ops = &cdc_ncm_ethtool_ops; + return 0; error2: @@ -863,7 +930,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) ctx->tx_curr_skb = skb_out; goto exit_no_skb; - } else if ((n < ctx->tx_max_datagrams) && (ready2send == 0)) { + } else if ((n < ctx->tx_max_datagrams) && (ready2send == 0) && (ctx->timer_interval > 0)) { /* wait for more frames */ /* push variables */ ctx->tx_curr_skb = skb_out; @@ -915,7 +982,7 @@ static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx) /* start timer, if not already started */ if (!(hrtimer_active(&ctx->tx_timer) || atomic_read(&ctx->stop))) hrtimer_start(&ctx->tx_timer, - ktime_set(0, CDC_NCM_TIMER_INTERVAL), + ktime_set(0, ctx->timer_interval), HRTIMER_MODE_REL); } diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 55b6feead93b..5c1066b4dc41 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -72,7 +72,9 @@ /* Restart the timer, if amount of datagrams is less than given value */ #define CDC_NCM_RESTART_TIMER_DATAGRAM_CNT 3 #define CDC_NCM_TIMER_PENDING_CNT 2 -#define CDC_NCM_TIMER_INTERVAL (400UL * NSEC_PER_USEC) +#define CDC_NCM_TIMER_INTERVAL_USEC 400UL +#define CDC_NCM_TIMER_INTERVAL_MIN 5UL +#define CDC_NCM_TIMER_INTERVAL_MAX (15UL * USEC_PER_SEC) /* The following macro defines the minimum header space */ #define CDC_NCM_MIN_HDR_SIZE \ @@ -107,6 +109,8 @@ struct cdc_ncm_ctx { spinlock_t mtx; atomic_t stop; + u64 timer_interval; + u32 tx_timer_pending; u32 tx_curr_frame_num; u32 rx_max; -- cgit v1.2.3-71-gd317 From 70559b8970e52aa9962dc823fd4498af06809544 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 16 May 2014 21:48:23 +0200 Subject: net: cdc_ncm: use true max dgram count for header estimates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Many newer NCM and MBIM devices will request a maximum tx datagram count which is much smaller than our hard-coded absolute max. We can reduce the overhead without sacrificing any of the simplicity for these devices, by simply using the true negotiated count in when calculated the maximum NTH and NDP header sizes. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 9 ++++++--- include/linux/usb/cdc_ncm.h | 10 +--------- 2 files changed, 7 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 141dbec912be..b9b562b9128a 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -174,7 +174,7 @@ static void cdc_ncm_update_rxtx_max(struct usbnet *dev, u32 new_rx, u32 new_tx) } /* clamp new_tx to sane values */ - min = CDC_NCM_MIN_HDR_SIZE + ctx->max_datagram_size; + min = ctx->max_datagram_size + ctx->max_ndp_size + sizeof(struct usb_cdc_ncm_nth16); max = min_t(u32, CDC_NCM_NTB_MAX_SIZE_TX, le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize)); /* some devices set dwNtbOutMaxSize too low for the above default */ @@ -318,6 +318,9 @@ static int cdc_ncm_init(struct usbnet *dev) (ctx->tx_max_datagrams > CDC_NCM_DPT_DATAGRAMS_MAX)) ctx->tx_max_datagrams = CDC_NCM_DPT_DATAGRAMS_MAX; + /* set up maximum NDP size */ + ctx->max_ndp_size = sizeof(struct usb_cdc_ncm_ndp16) + (ctx->tx_max_datagrams + 1) * sizeof(struct usb_cdc_ncm_dpe16); + /* initial coalescing timer interval */ ctx->timer_interval = CDC_NCM_TIMER_INTERVAL_USEC * NSEC_PER_USEC; @@ -800,7 +803,7 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_ cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_max); /* verify that there is room for the NDP and the datagram (reserve) */ - if ((ctx->tx_max - skb->len - reserve) < CDC_NCM_NDP_SIZE) + if ((ctx->tx_max - skb->len - reserve) < ctx->max_ndp_size) return NULL; /* link to it */ @@ -810,7 +813,7 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_ nth16->wNdpIndex = cpu_to_le16(skb->len); /* push a new empty NDP */ - ndp16 = (struct usb_cdc_ncm_ndp16 *)memset(skb_put(skb, CDC_NCM_NDP_SIZE), 0, CDC_NCM_NDP_SIZE); + ndp16 = (struct usb_cdc_ncm_ndp16 *)memset(skb_put(skb, ctx->max_ndp_size), 0, ctx->max_ndp_size); ndp16->dwSignature = sign; ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16) + sizeof(struct usb_cdc_ncm_dpe16)); return ndp16; diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 5c1066b4dc41..60a44b8a464e 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -76,15 +76,6 @@ #define CDC_NCM_TIMER_INTERVAL_MIN 5UL #define CDC_NCM_TIMER_INTERVAL_MAX (15UL * USEC_PER_SEC) -/* The following macro defines the minimum header space */ -#define CDC_NCM_MIN_HDR_SIZE \ - (sizeof(struct usb_cdc_ncm_nth16) + sizeof(struct usb_cdc_ncm_ndp16) + \ - (CDC_NCM_DPT_DATAGRAMS_MAX + 1) * sizeof(struct usb_cdc_ncm_dpe16)) - -#define CDC_NCM_NDP_SIZE \ - (sizeof(struct usb_cdc_ncm_ndp16) + \ - (CDC_NCM_DPT_DATAGRAMS_MAX + 1) * sizeof(struct usb_cdc_ncm_dpe16)) - #define cdc_ncm_comm_intf_is_mbim(x) ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \ (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE) #define cdc_ncm_data_intf_is_mbim(x) ((x)->desc.bInterfaceProtocol == USB_CDC_MBIM_PROTO_NTB) @@ -110,6 +101,7 @@ struct cdc_ncm_ctx { atomic_t stop; u64 timer_interval; + u32 max_ndp_size; u32 tx_timer_pending; u32 tx_curr_frame_num; -- cgit v1.2.3-71-gd317 From 43e4c6dfc0fd781e68f20caf563a06f5c6ece995 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 16 May 2014 21:48:24 +0200 Subject: net: cdc_ncm: set reasonable padding limits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We pad frames larger than X to maximum size for devices which don't need a ZLP after maximum sized frames. This allows the device to optimize its transfers for one fixed buffer size. X was arbitrarily set at 512 bytes regardless of real buffer maximum, causing extreme overheads due to excessive padding of larger tx buffers. Limit the padding to at most 3 full USB packets, still allowing the overhead to payload ratio of 3/1. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 8 ++++++-- include/linux/usb/cdc_ncm.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index b9b562b9128a..9592d4669435 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -213,6 +213,10 @@ static void cdc_ncm_update_rxtx_max(struct usbnet *dev, u32 new_rx, u32 new_tx) /* max qlen depend on hard_mtu and rx_urb_size */ usbnet_update_max_qlen(dev); + + /* never pad more than 3 full USB packets per transfer */ + ctx->min_tx_pkt = clamp_t(u16, ctx->tx_max - 3 * usb_maxpacket(dev->udev, dev->out, 1), + CDC_NCM_MIN_TX_PKT, ctx->tx_max); } /* helpers for NCM and MBIM differences */ @@ -947,7 +951,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) /* variables will be reset at next call */ } - /* If collected data size is less or equal CDC_NCM_MIN_TX_PKT + /* If collected data size is less or equal ctx->min_tx_pkt * bytes, we send buffers as it is. If we get more data, it * would be more efficient for USB HS mobile device with DMA * engine to receive a full size NTB, than canceling DMA @@ -957,7 +961,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) * a ZLP after full sized NTBs. */ if (!(dev->driver_info->flags & FLAG_SEND_ZLP) && - skb_out->len > CDC_NCM_MIN_TX_PKT) + skb_out->len > ctx->min_tx_pkt) memset(skb_put(skb_out, ctx->tx_max - skb_out->len), 0, ctx->tx_max - skb_out->len); else if (skb_out->len < ctx->tx_max && (skb_out->len % dev->maxpacket) == 0) diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 60a44b8a464e..79de6724d398 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -115,6 +115,7 @@ struct cdc_ncm_ctx { u16 tx_seq; u16 rx_seq; u16 connected; + u16 min_tx_pkt; }; u8 cdc_ncm_select_altsetting(struct usb_interface *intf); -- cgit v1.2.3-71-gd317 From beeecd42c3b41d17d0bf1d839db99274c287f514 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 16 May 2014 21:48:25 +0200 Subject: net: cdc_ncm/cdc_mbim: adding NCM protocol statistics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To have an idea of the effects of the protocol coalescing it's useful to have some counters showing the different aspects. Due to the asymmetrical usbnet interface the netdev rx_bytes counter has been counting real received payload, while the tx_bytes counter has included the NCM/MBIM framing overhead. This overhead can be many times the payload because of the aggressive padding strategy of this driver, and will vary a lot depending on device and traffic. With very few exceptions, users are only interested in the payload size. Having an somewhat accurate payload byte counter is particularly important for mobile broadband devices, which many NCM devices and of course all MBIM devices are. Users and userspace applications will use this counter to monitor account quotas. Having protocol specific counters for the overhead, we are now able to correct the tx_bytes netdev counter so that it shows the real payload Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_mbim.c | 6 +++ drivers/net/usb/cdc_ncm.c | 91 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/usb/cdc_ncm.h | 11 ++++++ 3 files changed, 108 insertions(+) (limited to 'include') diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index bc23273d0455..5ee7a1dbc023 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -420,6 +420,7 @@ static int cdc_mbim_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in) struct usb_cdc_ncm_dpe16 *dpe16; int ndpoffset; int loopcount = 50; /* arbitrary max preventing infinite loop */ + u32 payload = 0; u8 *c; u16 tci; @@ -482,6 +483,7 @@ next_ndp: if (!skb) goto error; usbnet_skb_return(dev, skb); + payload += len; /* count payload bytes in this NTB */ } } err_ndp: @@ -490,6 +492,10 @@ err_ndp: if (ndpoffset && loopcount--) goto next_ndp; + /* update stats */ + ctx->rx_overhead += skb_in->len - payload; + ctx->rx_ntbs++; + return 1; error: return 0; diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 9592d4669435..f4b439847d04 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -65,6 +65,68 @@ static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx); static enum hrtimer_restart cdc_ncm_tx_timer_cb(struct hrtimer *hr_timer); static struct usb_driver cdc_ncm_driver; +struct cdc_ncm_stats { + char stat_string[ETH_GSTRING_LEN]; + int sizeof_stat; + int stat_offset; +}; + +#define CDC_NCM_STAT(str, m) { \ + .stat_string = str, \ + .sizeof_stat = sizeof(((struct cdc_ncm_ctx *)0)->m), \ + .stat_offset = offsetof(struct cdc_ncm_ctx, m) } +#define CDC_NCM_SIMPLE_STAT(m) CDC_NCM_STAT(__stringify(m), m) + +static const struct cdc_ncm_stats cdc_ncm_gstrings_stats[] = { + CDC_NCM_SIMPLE_STAT(tx_reason_ntb_full), + CDC_NCM_SIMPLE_STAT(tx_reason_ndp_full), + CDC_NCM_SIMPLE_STAT(tx_reason_timeout), + CDC_NCM_SIMPLE_STAT(tx_reason_max_datagram), + CDC_NCM_SIMPLE_STAT(tx_overhead), + CDC_NCM_SIMPLE_STAT(tx_ntbs), + CDC_NCM_SIMPLE_STAT(rx_overhead), + CDC_NCM_SIMPLE_STAT(rx_ntbs), +}; + +static int cdc_ncm_get_sset_count(struct net_device __always_unused *netdev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return ARRAY_SIZE(cdc_ncm_gstrings_stats); + default: + return -EOPNOTSUPP; + } +} + +static void cdc_ncm_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats __always_unused *stats, + u64 *data) +{ + struct usbnet *dev = netdev_priv(netdev); + struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; + int i; + char *p = NULL; + + for (i = 0; i < ARRAY_SIZE(cdc_ncm_gstrings_stats); i++) { + p = (char *)ctx + cdc_ncm_gstrings_stats[i].stat_offset; + data[i] = (cdc_ncm_gstrings_stats[i].sizeof_stat == sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } +} + +static void cdc_ncm_get_strings(struct net_device __always_unused *netdev, u32 stringset, u8 *data) +{ + u8 *p = data; + int i; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < ARRAY_SIZE(cdc_ncm_gstrings_stats); i++) { + memcpy(p, cdc_ncm_gstrings_stats[i].stat_string, ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + } +} + static int cdc_ncm_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) { @@ -122,6 +184,9 @@ static const struct ethtool_ops cdc_ncm_ethtool_ops = { .get_msglevel = usbnet_get_msglevel, .set_msglevel = usbnet_set_msglevel, .get_ts_info = ethtool_op_get_ts_info, + .get_sset_count = cdc_ncm_get_sset_count, + .get_strings = cdc_ncm_get_strings, + .get_ethtool_stats = cdc_ncm_get_ethtool_stats, .get_coalesce = cdc_ncm_get_coalesce, .set_coalesce = cdc_ncm_set_coalesce, }; @@ -862,6 +927,9 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) /* count total number of frames in this NTB */ ctx->tx_curr_frame_num = 0; + + /* recent payload counter for this skb_out */ + ctx->tx_curr_frame_payload = 0; } for (n = ctx->tx_curr_frame_num; n < ctx->tx_max_datagrams; n++) { @@ -899,6 +967,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) ctx->tx_rem_sign = sign; skb = NULL; ready2send = 1; + ctx->tx_reason_ntb_full++; /* count reason for transmitting */ } break; } @@ -912,12 +981,14 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) ndp16->dpe16[index].wDatagramIndex = cpu_to_le16(skb_out->len); ndp16->wLength = cpu_to_le16(ndplen + sizeof(struct usb_cdc_ncm_dpe16)); memcpy(skb_put(skb_out, skb->len), skb->data, skb->len); + ctx->tx_curr_frame_payload += skb->len; /* count real tx payload data */ dev_kfree_skb_any(skb); skb = NULL; /* send now if this NDP is full */ if (index >= CDC_NCM_DPT_DATAGRAMS_MAX) { ready2send = 1; + ctx->tx_reason_ndp_full++; /* count reason for transmitting */ break; } } @@ -947,6 +1018,8 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) goto exit_no_skb; } else { + if (n == ctx->tx_max_datagrams) + ctx->tx_reason_max_datagram++; /* count reason for transmitting */ /* frame goes out */ /* variables will be reset at next call */ } @@ -974,6 +1047,17 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) /* return skb */ ctx->tx_curr_skb = NULL; dev->net->stats.tx_packets += ctx->tx_curr_frame_num; + + /* keep private stats: framing overhead and number of NTBs */ + ctx->tx_overhead += skb_out->len - ctx->tx_curr_frame_payload; + ctx->tx_ntbs++; + + /* usbnet has already counted all the framing overhead. + * Adjust the stats so that the tx_bytes counter show real + * payload data instead. + */ + dev->net->stats.tx_bytes -= skb_out->len - ctx->tx_curr_frame_payload; + return skb_out; exit_no_skb: @@ -1014,6 +1098,7 @@ static void cdc_ncm_txpath_bh(unsigned long param) cdc_ncm_tx_timeout_start(ctx); spin_unlock_bh(&ctx->mtx); } else if (dev->net != NULL) { + ctx->tx_reason_timeout++; /* count reason for transmitting */ spin_unlock_bh(&ctx->mtx); netif_tx_lock_bh(dev->net); usbnet_start_xmit(NULL, dev->net); @@ -1149,6 +1234,7 @@ int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in) struct usb_cdc_ncm_dpe16 *dpe16; int ndpoffset; int loopcount = 50; /* arbitrary max preventing infinite loop */ + u32 payload = 0; ndpoffset = cdc_ncm_rx_verify_nth16(ctx, skb_in); if (ndpoffset < 0) @@ -1201,6 +1287,7 @@ next_ndp: skb->data = ((u8 *)skb_in->data) + offset; skb_set_tail_pointer(skb, len); usbnet_skb_return(dev, skb); + payload += len; /* count payload bytes in this NTB */ } } err_ndp: @@ -1209,6 +1296,10 @@ err_ndp: if (ndpoffset && loopcount--) goto next_ndp; + /* update stats */ + ctx->rx_overhead += skb_in->len - payload; + ctx->rx_ntbs++; + return 1; error: return 0; diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 79de6724d398..88d2d7f1820f 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -116,6 +116,17 @@ struct cdc_ncm_ctx { u16 rx_seq; u16 connected; u16 min_tx_pkt; + + /* statistics */ + u32 tx_curr_frame_payload; + u32 tx_reason_ntb_full; + u32 tx_reason_ndp_full; + u32 tx_reason_timeout; + u32 tx_reason_max_datagram; + u64 tx_overhead; + u64 tx_ntbs; + u64 rx_overhead; + u64 rx_ntbs; }; u8 cdc_ncm_select_altsetting(struct usb_interface *intf); -- cgit v1.2.3-71-gd317 From 50f1cb1cc8f50fa88dbeaf990ae2bae91b9ff306 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 16 May 2014 21:48:26 +0200 Subject: net: cdc_ncm: use sane defaults for rx/tx buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lots of devices request much larger buffers than reasonable. This cause real problems for users of hosts with limited resources. Reducing the default buffer size to 16kB for such devices is a reasonable trade-off between allowing them to aggregate traffic and avoiding memory exhaustion on resource restrained hosts. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 12 ++++++++++-- include/linux/usb/cdc_ncm.h | 4 ++++ 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index f4b439847d04..bb53abe1f3a1 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -492,10 +492,18 @@ static void cdc_ncm_fix_modulus(struct usbnet *dev) static int cdc_ncm_setup(struct usbnet *dev) { struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; + u32 def_rx, def_tx; + + /* be conservative when selecting intial buffer size to + * increase the number of hosts this will work for + */ + def_rx = min_t(u32, CDC_NCM_NTB_DEF_SIZE_RX, + le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize)); + def_tx = min_t(u32, CDC_NCM_NTB_DEF_SIZE_TX, + le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize)); /* clamp rx_max and tx_max and inform device */ - cdc_ncm_update_rxtx_max(dev, le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize), - le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize)); + cdc_ncm_update_rxtx_max(dev, def_rx, def_tx); /* sanitize the modulus and remainder values */ cdc_ncm_fix_modulus(dev); diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 88d2d7f1820f..cde506731c48 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -52,6 +52,10 @@ #define CDC_NCM_NTB_MAX_SIZE_TX 32768 /* bytes */ #define CDC_NCM_NTB_MAX_SIZE_RX 32768 /* bytes */ +/* Initial NTB length */ +#define CDC_NCM_NTB_DEF_SIZE_TX 16384 /* bytes */ +#define CDC_NCM_NTB_DEF_SIZE_RX 16384 /* bytes */ + /* Minimum value for MaxDatagramSize, ch. 6.2.9 */ #define CDC_NCM_MIN_DATAGRAM_SIZE 1514 /* bytes */ -- cgit v1.2.3-71-gd317 From fa83dbeee55865678025b6c1637ca08860209f87 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 16 May 2014 21:48:28 +0200 Subject: net: cdc_ncm: remove redundant "disconnected" flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calling netif_carrier_{on,off} is sufficient. There is no need to duplicate the carrier state in a driver specific flag. Acked-by: Enrico Mioso Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 19 ++----------------- drivers/net/usb/huawei_cdc_ncm.c | 13 ------------- include/linux/usb/cdc_ncm.h | 1 - 3 files changed, 2 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 1d1ff2fa8ae1..783c4ed96395 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1364,11 +1364,10 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb) * USB_CDC_NOTIFY_NETWORK_CONNECTION notification shall be * sent by device after USB_CDC_NOTIFY_SPEED_CHANGE. */ - ctx->connected = le16_to_cpu(event->wValue); netif_info(dev, link, dev->net, "network connection: %sconnected\n", - ctx->connected ? "" : "dis"); - usbnet_link_change(dev, ctx->connected, 0); + !!event->wValue ? "" : "dis"); + usbnet_link_change(dev, !!event->wValue, 0); break; case USB_CDC_NOTIFY_SPEED_CHANGE: @@ -1388,23 +1387,11 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb) } } -static int cdc_ncm_check_connect(struct usbnet *dev) -{ - struct cdc_ncm_ctx *ctx; - - ctx = (struct cdc_ncm_ctx *)dev->data[0]; - if (ctx == NULL) - return 1; /* disconnected */ - - return !ctx->connected; -} - static const struct driver_info cdc_ncm_info = { .description = "CDC NCM", .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET, .bind = cdc_ncm_bind, .unbind = cdc_ncm_unbind, - .check_connect = cdc_ncm_check_connect, .manage_power = usbnet_manage_power, .status = cdc_ncm_status, .rx_fixup = cdc_ncm_rx_fixup, @@ -1418,7 +1405,6 @@ static const struct driver_info wwan_info = { | FLAG_WWAN, .bind = cdc_ncm_bind, .unbind = cdc_ncm_unbind, - .check_connect = cdc_ncm_check_connect, .manage_power = usbnet_manage_power, .status = cdc_ncm_status, .rx_fixup = cdc_ncm_rx_fixup, @@ -1432,7 +1418,6 @@ static const struct driver_info wwan_noarp_info = { | FLAG_WWAN | FLAG_NOARP, .bind = cdc_ncm_bind, .unbind = cdc_ncm_unbind, - .check_connect = cdc_ncm_check_connect, .manage_power = usbnet_manage_power, .status = cdc_ncm_status, .rx_fixup = cdc_ncm_rx_fixup, diff --git a/drivers/net/usb/huawei_cdc_ncm.c b/drivers/net/usb/huawei_cdc_ncm.c index 312178d7b698..f9822bc75425 100644 --- a/drivers/net/usb/huawei_cdc_ncm.c +++ b/drivers/net/usb/huawei_cdc_ncm.c @@ -172,24 +172,11 @@ err: return ret; } -static int huawei_cdc_ncm_check_connect(struct usbnet *usbnet_dev) -{ - struct cdc_ncm_ctx *ctx; - - ctx = (struct cdc_ncm_ctx *)usbnet_dev->data[0]; - - if (ctx == NULL) - return 1; /* disconnected */ - - return !ctx->connected; -} - static const struct driver_info huawei_cdc_ncm_info = { .description = "Huawei CDC NCM device", .flags = FLAG_NO_SETINT | FLAG_MULTI_PACKET | FLAG_WWAN, .bind = huawei_cdc_ncm_bind, .unbind = huawei_cdc_ncm_unbind, - .check_connect = huawei_cdc_ncm_check_connect, .manage_power = huawei_cdc_ncm_manage_power, .rx_fixup = cdc_ncm_rx_fixup, .tx_fixup = cdc_ncm_tx_fixup, diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index cde506731c48..8c5e38819828 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -118,7 +118,6 @@ struct cdc_ncm_ctx { u16 tx_ndp_modulus; u16 tx_seq; u16 rx_seq; - u16 connected; u16 min_tx_pkt; /* statistics */ -- cgit v1.2.3-71-gd317 From a17597d3b418ca5a394d14724ccfc295cb3186c8 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Fri, 16 May 2014 11:42:43 +0930 Subject: virtio-rng: fixes for device registration/unregistration There are several fixes in this patch (mostly because it's hard splitting them up): - Revert the name field in struct hwrng back to 'const'. Also, don't do an extra kmalloc for the name - just wasteful. - Deal with allocation failures properly. - Use IDA to allocate device number instead of brute forcing one. Signed-off-by: Sasha Levin Signed-off-by: Rusty Russell --- drivers/char/hw_random/virtio-rng.c | 41 +++++++++++++++++++++---------------- include/linux/hw_random.h | 2 +- 2 files changed, 24 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index 5b25daa7f798..f3e71501de54 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -25,6 +25,7 @@ #include #include +static DEFINE_IDA(rng_index_ida); struct virtrng_info { struct virtio_device *vdev; @@ -33,6 +34,8 @@ struct virtrng_info { unsigned int data_avail; struct completion have_data; bool busy; + char name[25]; + int index; }; static void random_recv_done(struct virtqueue *vq) @@ -92,41 +95,45 @@ static void virtio_cleanup(struct hwrng *rng) static int probe_common(struct virtio_device *vdev) { - int err, i; + int err, index; struct virtrng_info *vi = NULL; vi = kzalloc(sizeof(struct virtrng_info), GFP_KERNEL); - vi->hwrng.name = kmalloc(40, GFP_KERNEL); + if (!vi) + return -ENOMEM; + + vi->index = index = ida_simple_get(&rng_index_ida, 0, 0, GFP_KERNEL); + if (index < 0) { + kfree(vi); + return index; + } + sprintf(vi->name, "virtio_rng.%d", index); init_completion(&vi->have_data); - vi->hwrng.read = virtio_read; - vi->hwrng.cleanup = virtio_cleanup; - vi->hwrng.priv = (unsigned long)vi; + vi->hwrng = (struct hwrng) { + .read = virtio_read, + .cleanup = virtio_cleanup, + .priv = (unsigned long)vi, + .name = vi->name, + }; vdev->priv = vi; /* We expect a single virtqueue. */ vi->vq = virtio_find_single_vq(vdev, random_recv_done, "input"); if (IS_ERR(vi->vq)) { err = PTR_ERR(vi->vq); - kfree(vi->hwrng.name); vi->vq = NULL; kfree(vi); - vi = NULL; + ida_simple_remove(&rng_index_ida, index); return err; } - i = 0; - do { - sprintf(vi->hwrng.name, "virtio_rng.%d", i++); - err = hwrng_register(&vi->hwrng); - } while (err == -EEXIST); - + err = hwrng_register(&vi->hwrng); if (err) { vdev->config->del_vqs(vdev); - kfree(vi->hwrng.name); vi->vq = NULL; kfree(vi); - vi = NULL; + ida_simple_remove(&rng_index_ida, index); return err; } @@ -140,10 +147,8 @@ static void remove_common(struct virtio_device *vdev) vi->busy = false; hwrng_unregister(&vi->hwrng); vdev->config->del_vqs(vdev); - kfree(vi->hwrng.name); - vi->vq = NULL; + ida_simple_remove(&rng_index_ida, vi->index); kfree(vi); - vi = NULL; } static int virtrng_probe(struct virtio_device *vdev) diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 02d9c87be54c..b4b0eef5fddf 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -31,7 +31,7 @@ * @priv: Private data, for use by the RNG driver. */ struct hwrng { - char *name; + const char *name; int (*init)(struct hwrng *rng); void (*cleanup)(struct hwrng *rng); int (*data_present)(struct hwrng *rng, int wait); -- cgit v1.2.3-71-gd317 From 7455fa2422898eee3464032351d20695930d9542 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 15 May 2014 01:41:23 +0100 Subject: ethtool: Name the 'no change' value for setting RSS hash key but not indir table We usually allocate special values of u32 fields starting from the top down, so also change the value to 0xffffffff. As these operations haven't been included in a stable release yet, it's not too late to change. Signed-off-by: Ben Hutchings --- include/uapi/linux/ethtool.h | 12 +++++++----- net/core/ethtool.c | 12 +++++++----- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index d47d31d6fa0e..cba18e3f8fab 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -850,7 +850,8 @@ struct ethtool_rxfh_indir { * struct ethtool_rxfh - command to get/set RX flow hash indir or/and hash key. * @cmd: Specific command number - %ETHTOOL_GRSSH or %ETHTOOL_SRSSH * @rss_context: RSS context identifier. - * @indir_size: On entry, the array size of the user buffer, which may be zero. + * @indir_size: On entry, the array size of the user buffer, which may be zero, + * or (for %ETHTOOL_SRSSH), %ETH_RXFH_INDIR_NO_CHANGE. * On return from %ETHTOOL_GRSSH, the array size of the hardware * indirection table. * @key_size: On entry, the array size of the user buffer in bytes, @@ -861,10 +862,10 @@ struct ethtool_rxfh_indir { * of size @indir_size followed by hash key of size @key_size. * * For %ETHTOOL_GRSSH, a @indir_size and key_size of zero means that only the - * size should be returned. For %ETHTOOL_SRSSH, a @indir_size of 0xDEADBEEF - * means that indir table setting is not requested and a @indir_size of zero - * means the indir table should be reset to default values. This last feature - * is not supported by the original implementations. + * size should be returned. For %ETHTOOL_SRSSH, an @indir_size of + * %ETH_RXFH_INDIR_NO_CHANGE means that indir table setting is not requested + * and a @indir_size of zero means the indir table should be reset to default + * values. */ struct ethtool_rxfh { __u32 cmd; @@ -874,6 +875,7 @@ struct ethtool_rxfh { __u32 rsvd[2]; __u32 rss_config[0]; }; +#define ETH_RXFH_INDIR_NO_CHANGE 0xffffffff /** * struct ethtool_rx_ntuple_flow_spec - specification for RX flow filter diff --git a/net/core/ethtool.c b/net/core/ethtool.c index c834cb29f682..7156fe5ca876 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -803,12 +803,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, /* If either indir or hash key is valid, proceed further. */ - if ((user_indir_size && ((user_indir_size != 0xDEADBEEF) && - user_indir_size != dev_indir_size)) || + if ((user_indir_size && + user_indir_size != ETH_RXFH_INDIR_NO_CHANGE && + user_indir_size != dev_indir_size) || (user_key_size && (user_key_size != dev_key_size))) return -EINVAL; - if (user_indir_size != 0xDEADBEEF) + if (user_indir_size != ETH_RXFH_INDIR_NO_CHANGE) indir_bytes = dev_indir_size * sizeof(indir[0]); rss_config = kzalloc(indir_bytes + user_key_size, GFP_USER); @@ -821,9 +822,10 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, goto out; /* user_indir_size == 0 means reset the indir table to default. - * user_indir_size == 0xDEADBEEF means indir setting is not requested. + * user_indir_size == ETH_RXFH_INDIR_NO_CHANGE means leave it unchanged. */ - if (user_indir_size && user_indir_size != 0xDEADBEEF) { + if (user_indir_size && + user_indir_size != ETH_RXFH_INDIR_NO_CHANGE) { indir = (u32 *)rss_config; ret = ethtool_copy_validate_indir(indir, useraddr + rss_cfg_offset, -- cgit v1.2.3-71-gd317 From 38c891a49dec43dbb1575cc40d10dbd49c4961ab Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 15 May 2014 01:07:16 +0100 Subject: ethtool: Improve explanation of the two arrays following struct ethtool_rxfh The use of two variable-length arrays is unusual so deserves a bit more explanation. Signed-off-by: Ben Hutchings --- include/uapi/linux/ethtool.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index cba18e3f8fab..e3c7a719c76b 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -850,16 +850,17 @@ struct ethtool_rxfh_indir { * struct ethtool_rxfh - command to get/set RX flow hash indir or/and hash key. * @cmd: Specific command number - %ETHTOOL_GRSSH or %ETHTOOL_SRSSH * @rss_context: RSS context identifier. - * @indir_size: On entry, the array size of the user buffer, which may be zero, - * or (for %ETHTOOL_SRSSH), %ETH_RXFH_INDIR_NO_CHANGE. - * On return from %ETHTOOL_GRSSH, the array size of the hardware - * indirection table. - * @key_size: On entry, the array size of the user buffer in bytes, - * which may be zero. - * On return from %ETHTOOL_GRSSH, the size of the RSS hash key. + * @indir_size: On entry, the array size of the user buffer for the + * indirection table, which may be zero, or (for %ETHTOOL_SRSSH), + * %ETH_RXFH_INDIR_NO_CHANGE. On return from %ETHTOOL_GRSSH, + * the array size of the hardware indirection table. + * @key_size: On entry, the array size of the user buffer for the hash key, + * which may be zero. On return from %ETHTOOL_GRSSH, the size of the + * hardware hash key. * @rsvd: Reserved for future extensions. * @rss_config: RX ring/queue index for each hash value i.e., indirection table - * of size @indir_size followed by hash key of size @key_size. + * of @indir_size __u32 elements, followed by hash key of @key_size + * bytes. * * For %ETHTOOL_GRSSH, a @indir_size and key_size of zero means that only the * size should be returned. For %ETHTOOL_SRSSH, an @indir_size of -- cgit v1.2.3-71-gd317 From 678e30df2e5664619e06fcfea5490a476826d8fe Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 19 May 2014 01:25:59 +0100 Subject: ethtool: Expand documentation of ethtool_ops::{get,set}_rxfh() Some corner-cases are not explained properly. Signed-off-by: Ben Hutchings --- include/linux/ethtool.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 212f537fc686..886e127d51a6 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -162,15 +162,16 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) * Will not be called if @get_rxfh_indir_size returns zero. * @get_rxfh: Get the contents of the RX flow hash indirection table and hash * key. - * Will not be called if @get_rxfh_indir_size and @get_rxfh_key_size - * returns zero. + * Will only be called if one or both of @get_rxfh_indir_size and + * @get_rxfh_key_size are implemented and return non-zero. * Returns a negative error code or zero. * @set_rxfh_indir: Set the contents of the RX flow hash indirection table. * Will not be called if @get_rxfh_indir_size returns zero. - * @set_rxfh: Set the contents of the RX flow hash indirection table and - * hash key. - * Will not be called if @get_rxfh_indir_size and @get_rxfh_key_size - * returns zero. + * @set_rxfh: Set the contents of the RX flow hash indirection table and/or + * hash key. Either or both arguments may be %NULL if that attribute + * is not to be changed. + * Will only be called if one or both of @get_rxfh_indir_size and + * @get_rxfh_key_size are implemented and return non-zero. * Returns a negative error code or zero. * @get_channels: Get number of channels. * @set_channels: Set number of channels. Returns a negative error code or @@ -244,8 +245,8 @@ struct ethtool_ops { int (*reset)(struct net_device *, u32 *); u32 (*get_rxfh_key_size)(struct net_device *); u32 (*get_rxfh_indir_size)(struct net_device *); - int (*get_rxfh)(struct net_device *, u32 *, u8 *); - int (*set_rxfh)(struct net_device *, u32 *, u8 *); + int (*get_rxfh)(struct net_device *, u32 *indir, u8 *key); + int (*set_rxfh)(struct net_device *, u32 *indir, u8 *key); int (*get_rxfh_indir)(struct net_device *, u32 *); int (*set_rxfh_indir)(struct net_device *, const u32 *); void (*get_channels)(struct net_device *, struct ethtool_channels *); -- cgit v1.2.3-71-gd317 From 61d88c6811f216de4ec26aafe24e650dc1aeb00e Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 19 May 2014 01:29:42 +0100 Subject: ethtool: Disallow ETHTOOL_SRSSH with both indir table and hash key unchanged This would be a no-op, so there is no reason to request it. This also allows conversion of the current implementations of ethtool_ops::{get,set}_rxfh_indir to ethtool_ops::{get,set}_rxfh with no change other than their parameters. Signed-off-by: Ben Hutchings --- include/linux/ethtool.h | 4 ++-- net/core/ethtool.c | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 886e127d51a6..de687a97c6e7 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -168,8 +168,8 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) * @set_rxfh_indir: Set the contents of the RX flow hash indirection table. * Will not be called if @get_rxfh_indir_size returns zero. * @set_rxfh: Set the contents of the RX flow hash indirection table and/or - * hash key. Either or both arguments may be %NULL if that attribute - * is not to be changed. + * hash key. In case only the indirection table or hash key is to be + * changed, the other argument will be %NULL. * Will only be called if one or both of @get_rxfh_indir_size and * @get_rxfh_key_size are implemented and return non-zero. * Returns a negative error code or zero. diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 7156fe5ca876..b8857348bdf3 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -802,11 +802,14 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, return -EFAULT; /* If either indir or hash key is valid, proceed further. + * It is not valid to request that both be unchanged. */ if ((user_indir_size && user_indir_size != ETH_RXFH_INDIR_NO_CHANGE && user_indir_size != dev_indir_size) || - (user_key_size && (user_key_size != dev_key_size))) + (user_key_size && (user_key_size != dev_key_size)) || + (user_indir_size == ETH_RXFH_INDIR_NO_CHANGE && + user_key_size == 0)) return -EINVAL; if (user_indir_size != ETH_RXFH_INDIR_NO_CHANGE) -- cgit v1.2.3-71-gd317 From 33cb0fa7888510b5bd2096352b200cfe29db10fe Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 15 May 2014 02:01:23 +0100 Subject: ethtool, be2net: constify array pointer parameters to ethtool_ops::set_rxfh Signed-off-by: Ben Hutchings --- drivers/net/ethernet/emulex/benet/be_cmds.c | 2 +- drivers/net/ethernet/emulex/benet/be_cmds.h | 2 +- drivers/net/ethernet/emulex/benet/be_ethtool.c | 3 ++- include/linux/ethtool.h | 3 ++- 4 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 476752d0a6a4..7b59da241ccb 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -2033,7 +2033,7 @@ int be_cmd_reset_function(struct be_adapter *adapter) } int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, - u32 rss_hash_opts, u16 table_size, u8 *rss_hkey) + u32 rss_hash_opts, u16 table_size, const u8 *rss_hkey) { struct be_mcc_wrb *wrb; struct be_cmd_req_rss_config *req; diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index 228d4b611084..451f3138b8fb 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -2068,7 +2068,7 @@ int be_cmd_query_fw_cfg(struct be_adapter *adapter, u32 *port_num, u32 *function_mode, u32 *function_caps, u16 *asic_rev); int be_cmd_reset_function(struct be_adapter *adapter); int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, - u32 rss_hash_opts, u16 table_size, u8 *rss_hkey); + u32 rss_hash_opts, u16 table_size, const u8 *rss_hkey); int be_process_mcc(struct be_adapter *adapter); int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, u8 beacon, u8 status, u8 state); diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index 970ae337daac..e2da4d20dd3d 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -1117,7 +1117,8 @@ static int be_get_rxfh(struct net_device *netdev, u32 *indir, u8 *hkey) return 0; } -static int be_set_rxfh(struct net_device *netdev, u32 *indir, u8 *hkey) +static int be_set_rxfh(struct net_device *netdev, const u32 *indir, + const u8 *hkey) { int rc = 0, i, j; struct be_adapter *adapter = netdev_priv(netdev); diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index de687a97c6e7..874fde01d398 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -246,7 +246,8 @@ struct ethtool_ops { u32 (*get_rxfh_key_size)(struct net_device *); u32 (*get_rxfh_indir_size)(struct net_device *); int (*get_rxfh)(struct net_device *, u32 *indir, u8 *key); - int (*set_rxfh)(struct net_device *, u32 *indir, u8 *key); + int (*set_rxfh)(struct net_device *, const u32 *indir, + const u8 *key); int (*get_rxfh_indir)(struct net_device *, u32 *); int (*set_rxfh_indir)(struct net_device *, const u32 *); void (*get_channels)(struct net_device *, struct ethtool_channels *); -- cgit v1.2.3-71-gd317 From d3091298570006fa538ec9beacbfb1098964962e Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Fri, 16 May 2014 23:26:05 +0200 Subject: sparc: fix sparse warnings in smp_32.c + smp_64.c Fix following warnings: smp_32.c:177:5: warning: symbol 'setup_profiling_timer' was not declared. Should it be static? smp_64.c:1202:5: warning: symbol 'setup_profiling_timer' was not declared. Should it be static? smp_64.c:989:6: warning: symbol 'kgdb_roundup_cpus' was not declared. Should it be static? Add prototype to include/linux/profile.h of setup_profiling_timer Add missing include to smp_64.c Signed-off-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/kernel/smp_32.c | 1 + arch/sparc/kernel/smp_64.c | 1 + include/linux/profile.h | 1 + 3 files changed, 3 insertions(+) (limited to 'include') diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c index 9d3297d8d730..7958242d63c5 100644 --- a/arch/sparc/kernel/smp_32.c +++ b/arch/sparc/kernel/smp_32.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index df91e78dbd95..afc71bf719b1 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include diff --git a/include/linux/profile.h b/include/linux/profile.h index aaad3861beb8..b537a25ffa17 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -44,6 +44,7 @@ extern int prof_on __read_mostly; int profile_init(void); int profile_setup(char *str); void profile_tick(int type); +int setup_profiling_timer(unsigned int multiplier); /* * Add multiple profiler hits to a given address: -- cgit v1.2.3-71-gd317 From 3b36fbb01dc50f58e7803006f5a99683daf26c8c Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Sun, 18 May 2014 22:44:35 -0700 Subject: Input: pixcir_i2c_ts - initialize interrupt mode and power mode Introduce helper functions to configure power and interrupt registers. Default to IDLE mode on probe as device supports auto wakeup to ACVIE mode on detecting finger touch. Configure interrupt mode and polarity on start up. Power down on device closure or module removal. Signed-off-by: Roger Quadros Acked-by: Mugunthan V N Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/pixcir_i2c_ts.c | 182 ++++++++++++++++++++++++++++-- include/linux/input/pixcir_ts.h | 42 +++++++ 2 files changed, 216 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/input/touchscreen/pixcir_i2c_ts.c b/drivers/input/touchscreen/pixcir_i2c_ts.c index 5d36243bdc79..6c6f6dacb858 100644 --- a/drivers/input/touchscreen/pixcir_i2c_ts.c +++ b/drivers/input/touchscreen/pixcir_i2c_ts.c @@ -29,7 +29,7 @@ struct pixcir_i2c_ts_data { struct i2c_client *client; struct input_dev *input; const struct pixcir_ts_platform_data *chip; - bool exiting; + bool running; }; static void pixcir_ts_poscheck(struct pixcir_i2c_ts_data *data) @@ -88,7 +88,7 @@ static irqreturn_t pixcir_ts_isr(int irq, void *dev_id) { struct pixcir_i2c_ts_data *tsdata = dev_id; - while (!tsdata->exiting) { + while (tsdata->running) { pixcir_ts_poscheck(tsdata); if (tsdata->chip->attb_read_val()) @@ -100,6 +100,164 @@ static irqreturn_t pixcir_ts_isr(int irq, void *dev_id) return IRQ_HANDLED; } +static int pixcir_set_power_mode(struct pixcir_i2c_ts_data *ts, + enum pixcir_power_mode mode) +{ + struct device *dev = &ts->client->dev; + int ret; + + ret = i2c_smbus_read_byte_data(ts->client, PIXCIR_REG_POWER_MODE); + if (ret < 0) { + dev_err(dev, "%s: can't read reg 0x%x : %d\n", + __func__, PIXCIR_REG_POWER_MODE, ret); + return ret; + } + + ret &= ~PIXCIR_POWER_MODE_MASK; + ret |= mode; + + /* Always AUTO_IDLE */ + ret |= PIXCIR_POWER_ALLOW_IDLE; + + ret = i2c_smbus_write_byte_data(ts->client, PIXCIR_REG_POWER_MODE, ret); + if (ret < 0) { + dev_err(dev, "%s: can't write reg 0x%x : %d\n", + __func__, PIXCIR_REG_POWER_MODE, ret); + return ret; + } + + return 0; +} + +/* + * Set the interrupt mode for the device i.e. ATTB line behaviour + * + * @polarity : 1 for active high, 0 for active low. + */ +static int pixcir_set_int_mode(struct pixcir_i2c_ts_data *ts, + enum pixcir_int_mode mode, bool polarity) +{ + struct device *dev = &ts->client->dev; + int ret; + + ret = i2c_smbus_read_byte_data(ts->client, PIXCIR_REG_INT_MODE); + if (ret < 0) { + dev_err(dev, "%s: can't read reg 0x%x : %d\n", + __func__, PIXCIR_REG_INT_MODE, ret); + return ret; + } + + ret &= ~PIXCIR_INT_MODE_MASK; + ret |= mode; + + if (polarity) + ret |= PIXCIR_INT_POL_HIGH; + else + ret &= ~PIXCIR_INT_POL_HIGH; + + ret = i2c_smbus_write_byte_data(ts->client, PIXCIR_REG_INT_MODE, ret); + if (ret < 0) { + dev_err(dev, "%s: can't write reg 0x%x : %d\n", + __func__, PIXCIR_REG_INT_MODE, ret); + return ret; + } + + return 0; +} + +/* + * Enable/disable interrupt generation + */ +static int pixcir_int_enable(struct pixcir_i2c_ts_data *ts, bool enable) +{ + struct device *dev = &ts->client->dev; + int ret; + + ret = i2c_smbus_read_byte_data(ts->client, PIXCIR_REG_INT_MODE); + if (ret < 0) { + dev_err(dev, "%s: can't read reg 0x%x : %d\n", + __func__, PIXCIR_REG_INT_MODE, ret); + return ret; + } + + if (enable) + ret |= PIXCIR_INT_ENABLE; + else + ret &= ~PIXCIR_INT_ENABLE; + + ret = i2c_smbus_write_byte_data(ts->client, PIXCIR_REG_INT_MODE, ret); + if (ret < 0) { + dev_err(dev, "%s: can't write reg 0x%x : %d\n", + __func__, PIXCIR_REG_INT_MODE, ret); + return ret; + } + + return 0; +} + +static int pixcir_start(struct pixcir_i2c_ts_data *ts) +{ + struct device *dev = &ts->client->dev; + int error; + + /* LEVEL_TOUCH interrupt with active low polarity */ + error = pixcir_set_int_mode(ts, PIXCIR_INT_LEVEL_TOUCH, 0); + if (error) { + dev_err(dev, "Failed to set interrupt mode: %d\n", error); + return error; + } + + ts->running = true; + mb(); /* Update status before IRQ can fire */ + + /* enable interrupt generation */ + error = pixcir_int_enable(ts, true); + if (error) { + dev_err(dev, "Failed to enable interrupt generation: %d\n", + error); + return error; + } + + return 0; +} + +static int pixcir_stop(struct pixcir_i2c_ts_data *ts) +{ + int error; + + /* Disable interrupt generation */ + error = pixcir_int_enable(ts, false); + if (error) { + dev_err(&ts->client->dev, + "Failed to disable interrupt generation: %d\n", + error); + return error; + } + + /* Exit ISR if running, no more report parsing */ + ts->running = false; + mb(); /* update status before we synchronize irq */ + + /* Wait till running ISR is complete */ + synchronize_irq(ts->client->irq); + + return 0; +} + +static int pixcir_input_open(struct input_dev *dev) +{ + struct pixcir_i2c_ts_data *ts = input_get_drvdata(dev); + + return pixcir_start(ts); +} + +static void pixcir_input_close(struct input_dev *dev) +{ + struct pixcir_i2c_ts_data *ts = input_get_drvdata(dev); + + pixcir_stop(ts); +} + #ifdef CONFIG_PM_SLEEP static int pixcir_i2c_ts_suspend(struct device *dev) { @@ -156,6 +314,8 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client, input->name = client->name; input->id.bustype = BUS_I2C; + input->open = pixcir_input_open; + input->close = pixcir_input_close; input->dev.parent = &client->dev; __set_bit(EV_KEY, input->evbit); @@ -176,11 +336,22 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client, return error; } + /* Always be in IDLE mode to save power, device supports auto wake */ + error = pixcir_set_power_mode(tsdata, PIXCIR_POWER_IDLE); + if (error) { + dev_err(dev, "Failed to set IDLE mode\n"); + return error; + } + + /* Stop device till opened */ + error = pixcir_stop(tsdata); + if (error) + return error; + error = input_register_device(input); if (error) return error; - i2c_set_clientdata(client, tsdata); device_init_wakeup(&client->dev, 1); return 0; @@ -188,13 +359,8 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client, static int pixcir_i2c_ts_remove(struct i2c_client *client) { - struct pixcir_i2c_ts_data *tsdata = i2c_get_clientdata(client); - device_init_wakeup(&client->dev, 0); - tsdata->exiting = true; - mb(); - return 0; } diff --git a/include/linux/input/pixcir_ts.h b/include/linux/input/pixcir_ts.h index 7163d91c0373..7942804464d3 100644 --- a/include/linux/input/pixcir_ts.h +++ b/include/linux/input/pixcir_ts.h @@ -1,6 +1,48 @@ #ifndef _PIXCIR_I2C_TS_H #define _PIXCIR_I2C_TS_H +/* + * Register map + */ +#define PIXCIR_REG_POWER_MODE 51 +#define PIXCIR_REG_INT_MODE 52 + +/* + * Power modes: + * active: max scan speed + * idle: lower scan speed with automatic transition to active on touch + * halt: datasheet says sleep but this is more like halt as the chip + * clocks are cut and it can only be brought out of this mode + * using the RESET pin. + */ +enum pixcir_power_mode { + PIXCIR_POWER_ACTIVE, + PIXCIR_POWER_IDLE, + PIXCIR_POWER_HALT, +}; + +#define PIXCIR_POWER_MODE_MASK 0x03 +#define PIXCIR_POWER_ALLOW_IDLE (1UL << 2) + +/* + * Interrupt modes: + * periodical: interrupt is asserted periodicaly + * diff coordinates: interrupt is asserted when coordinates change + * level on touch: interrupt level asserted during touch + * pulse on touch: interrupt pulse asserted druing touch + * + */ +enum pixcir_int_mode { + PIXCIR_INT_PERIODICAL, + PIXCIR_INT_DIFF_COORD, + PIXCIR_INT_LEVEL_TOUCH, + PIXCIR_INT_PULSE_TOUCH, +}; + +#define PIXCIR_INT_MODE_MASK 0x03 +#define PIXCIR_INT_ENABLE (1UL << 3) +#define PIXCIR_INT_POL_HIGH (1UL << 2) + struct pixcir_ts_platform_data { int (*attb_read_val)(void); int x_max; -- cgit v1.2.3-71-gd317 From 0dfc8d41bfa091a61354eea73199a5af0eaae9c0 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Sun, 18 May 2014 22:46:43 -0700 Subject: Input: pixcir_i2c_ts - get rid of pdata->attb_read_val() Get rid of the attb_read_val() platform hook. Instead, read the ATTB gpio directly from the driver. Fail if valid ATTB gpio is not provided by patform data. Signed-off-by: Roger Quadros Acked-by: Mugunthan V N Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/pixcir_i2c_ts.c | 16 +++++++++++++++- include/linux/input/pixcir_ts.h | 2 +- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/input/touchscreen/pixcir_i2c_ts.c b/drivers/input/touchscreen/pixcir_i2c_ts.c index 6c6f6dacb858..f2c0ae18e24a 100644 --- a/drivers/input/touchscreen/pixcir_i2c_ts.c +++ b/drivers/input/touchscreen/pixcir_i2c_ts.c @@ -24,6 +24,7 @@ #include #include #include +#include struct pixcir_i2c_ts_data { struct i2c_client *client; @@ -87,11 +88,12 @@ static void pixcir_ts_poscheck(struct pixcir_i2c_ts_data *data) static irqreturn_t pixcir_ts_isr(int irq, void *dev_id) { struct pixcir_i2c_ts_data *tsdata = dev_id; + const struct pixcir_ts_platform_data *pdata = tsdata->chip; while (tsdata->running) { pixcir_ts_poscheck(tsdata); - if (tsdata->chip->attb_read_val()) + if (gpio_get_value(pdata->gpio_attb)) break; msleep(20); @@ -298,6 +300,11 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client, return -EINVAL; } + if (!gpio_is_valid(pdata->gpio_attb)) { + dev_err(dev, "Invalid gpio_attb in pdata\n"); + return -EINVAL; + } + tsdata = devm_kzalloc(dev, sizeof(*tsdata), GFP_KERNEL); if (!tsdata) return -ENOMEM; @@ -328,6 +335,13 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client, input_set_drvdata(input, tsdata); + error = devm_gpio_request_one(dev, pdata->gpio_attb, + GPIOF_DIR_IN, "pixcir_i2c_attb"); + if (error) { + dev_err(dev, "Failed to request ATTB gpio\n"); + return error; + } + error = devm_request_threaded_irq(dev, client->irq, NULL, pixcir_ts_isr, IRQF_TRIGGER_FALLING | IRQF_ONESHOT, client->name, tsdata); diff --git a/include/linux/input/pixcir_ts.h b/include/linux/input/pixcir_ts.h index 7942804464d3..160cf353aa39 100644 --- a/include/linux/input/pixcir_ts.h +++ b/include/linux/input/pixcir_ts.h @@ -44,9 +44,9 @@ enum pixcir_int_mode { #define PIXCIR_INT_POL_HIGH (1UL << 2) struct pixcir_ts_platform_data { - int (*attb_read_val)(void); int x_max; int y_max; + int gpio_attb; /* GPIO connected to ATTB line */ }; #endif -- cgit v1.2.3-71-gd317 From 2cefdb1f0a27150755ef2730bafc58bf2ed16571 Mon Sep 17 00:00:00 2001 From: Nick Dyer Date: Sun, 18 May 2014 22:59:20 -0700 Subject: Input: atmel_mxt_ts - remove unnecessary platform data It is not necessary to download these values to the maXTouch chip on every probe, since they are stored in NVRAM. It makes life difficult when tuning the device to keep them in sync with the config array/file, and requires a new kernel build for minor tweaks. These parameters only represent a tiny subset of the available configuration options, tracking all of these options in platform data would be a endless task. In addition, different versions of maXTouch chips may have these values in different places or may not even have them at all. Having these values also makes life more complex for device tree and other platforms where having to define a static configuration isn't helpful. Signed-off-by: Nick Dyer Acked-by: Benson Leung Acked-by: Yufeng Shen Signed-off-by: Dmitry Torokhov --- arch/arm/mach-s5pv210/mach-goni.c | 5 ---- drivers/input/touchscreen/atmel_mxt_ts.c | 50 ------------------------------- drivers/platform/chrome/chromeos_laptop.c | 10 ------- include/linux/i2c/atmel_mxt_ts.h | 6 +--- 4 files changed, 1 insertion(+), 70 deletions(-) (limited to 'include') diff --git a/arch/arm/mach-s5pv210/mach-goni.c b/arch/arm/mach-s5pv210/mach-goni.c index b41a38a75844..e549ecf0e5dc 100644 --- a/arch/arm/mach-s5pv210/mach-goni.c +++ b/arch/arm/mach-s5pv210/mach-goni.c @@ -239,13 +239,8 @@ static void __init goni_radio_init(void) /* TSP */ static struct mxt_platform_data qt602240_platform_data = { - .x_line = 17, - .y_line = 11, .x_size = 800, .y_size = 480, - .blen = 0x21, - .threshold = 0x28, - .voltage = 2800000, /* 2.8V */ .orient = MXT_DIAGONAL, .irqflags = IRQF_TRIGGER_FALLING, }; diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index a70400754e92..7eb515caf215 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -685,54 +685,6 @@ static int mxt_make_highchg(struct mxt_data *data) return 0; } -static void mxt_handle_pdata(struct mxt_data *data) -{ - const struct mxt_platform_data *pdata = data->pdata; - u8 voltage; - - /* Set touchscreen lines */ - mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_XSIZE, - pdata->x_line); - mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_YSIZE, - pdata->y_line); - - /* Set touchscreen orient */ - mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_ORIENT, - pdata->orient); - - /* Set touchscreen burst length */ - mxt_write_object(data, MXT_TOUCH_MULTI_T9, - MXT_TOUCH_BLEN, pdata->blen); - - /* Set touchscreen threshold */ - mxt_write_object(data, MXT_TOUCH_MULTI_T9, - MXT_TOUCH_TCHTHR, pdata->threshold); - - /* Set touchscreen resolution */ - mxt_write_object(data, MXT_TOUCH_MULTI_T9, - MXT_TOUCH_XRANGE_LSB, (pdata->x_size - 1) & 0xff); - mxt_write_object(data, MXT_TOUCH_MULTI_T9, - MXT_TOUCH_XRANGE_MSB, (pdata->x_size - 1) >> 8); - mxt_write_object(data, MXT_TOUCH_MULTI_T9, - MXT_TOUCH_YRANGE_LSB, (pdata->y_size - 1) & 0xff); - mxt_write_object(data, MXT_TOUCH_MULTI_T9, - MXT_TOUCH_YRANGE_MSB, (pdata->y_size - 1) >> 8); - - /* Set touchscreen voltage */ - if (pdata->voltage) { - if (pdata->voltage < MXT_VOLTAGE_DEFAULT) { - voltage = (MXT_VOLTAGE_DEFAULT - pdata->voltage) / - MXT_VOLTAGE_STEP; - voltage = 0xff - voltage + 1; - } else - voltage = (pdata->voltage - MXT_VOLTAGE_DEFAULT) / - MXT_VOLTAGE_STEP; - - mxt_write_object(data, MXT_SPT_CTECONFIG_T28, - MXT_CTE_VOLTAGE, voltage); - } -} - static int mxt_get_info(struct mxt_data *data) { struct i2c_client *client = data->client; @@ -840,8 +792,6 @@ static int mxt_initialize(struct mxt_data *data) if (error) goto err_free_object_table; - mxt_handle_pdata(data); - /* Backup to memory */ mxt_write_object(data, MXT_GEN_COMMAND_T6, MXT_COMMAND_BACKUPNV, diff --git a/drivers/platform/chrome/chromeos_laptop.c b/drivers/platform/chrome/chromeos_laptop.c index 7f3aad0e115c..2559a0407c58 100644 --- a/drivers/platform/chrome/chromeos_laptop.c +++ b/drivers/platform/chrome/chromeos_laptop.c @@ -85,13 +85,8 @@ static struct i2c_board_info tsl2563_als_device = { }; static struct mxt_platform_data atmel_224s_tp_platform_data = { - .x_line = 18, - .y_line = 12, .x_size = 102*20, .y_size = 68*20, - .blen = 0x80, /* Gain setting is in upper 4 bits */ - .threshold = 0x32, - .voltage = 0, /* 3.3V */ .orient = MXT_VERTICAL_FLIP, .irqflags = IRQF_TRIGGER_FALLING, .is_tp = true, @@ -110,13 +105,8 @@ static struct i2c_board_info atmel_224s_tp_device = { }; static struct mxt_platform_data atmel_1664s_platform_data = { - .x_line = 32, - .y_line = 50, .x_size = 1700, .y_size = 2560, - .blen = 0x89, /* Gain setting is in upper 4 bits */ - .threshold = 0x28, - .voltage = 0, /* 3.3V */ .orient = MXT_ROTATED_90_COUNTER, .irqflags = IRQF_TRIGGER_FALLING, .is_tp = false, diff --git a/include/linux/i2c/atmel_mxt_ts.h b/include/linux/i2c/atmel_mxt_ts.h index 99e379b74398..eff0cdc08843 100644 --- a/include/linux/i2c/atmel_mxt_ts.h +++ b/include/linux/i2c/atmel_mxt_ts.h @@ -33,14 +33,10 @@ struct mxt_platform_data { const u8 *config; size_t config_length; - unsigned int x_line; - unsigned int y_line; unsigned int x_size; unsigned int y_size; - unsigned int blen; - unsigned int threshold; - unsigned int voltage; unsigned char orient; + unsigned long irqflags; bool is_tp; const unsigned int key_map[MXT_NUM_GPIO]; -- cgit v1.2.3-71-gd317 From fb5e4c3ee140b29e1935b4bbb19c319177bed231 Mon Sep 17 00:00:00 2001 From: Nick Dyer Date: Sun, 18 May 2014 23:00:15 -0700 Subject: Input: atmel_mxt_ts - improve T19 GPIO keys handling * The mapping of the GPIO numbers into the T19 status byte varies between different maXTouch chips. Some have up to 7 GPIOs. Allowing a keycode array of up to 8 items is simpler and more generic. So replace #define with configurable number of keys which also allows the removal of is_tp. * Rename platform data parameters to include "t19" to prevent confusion with T15 key array. * Probe aborts early on when pdata is NULL, so no need to check. * Move "int i" to beginning of function (mixed declarations and code) * Use API calls rather than __set_bit() * Remove unused dev variable. Signed-off-by: Nick Dyer Acked-by: Yufeng Shen Reviewed-by: Henrik Rydberg Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/atmel_mxt_ts.c | 44 ++++++++++++------------------- drivers/platform/chrome/chromeos_laptop.c | 17 +++++++----- include/linux/i2c/atmel_mxt_ts.h | 7 ++--- 3 files changed, 30 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index 7eb515caf215..65df362cf327 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -180,12 +180,6 @@ #define MXT_FWRESET_TIME 175 /* msec */ -/* MXT_SPT_GPIOPWM_T19 field */ -#define MXT_GPIO0_MASK 0x04 -#define MXT_GPIO1_MASK 0x08 -#define MXT_GPIO2_MASK 0x10 -#define MXT_GPIO3_MASK 0x20 - /* Command to unlock bootloader */ #define MXT_UNLOCK_CMD_MSB 0xaa #define MXT_UNLOCK_CMD_LSB 0xdc @@ -250,7 +244,6 @@ struct mxt_data { const struct mxt_platform_data *pdata; struct mxt_object *object_table; struct mxt_info info; - bool is_tp; unsigned int irq; unsigned int max_x; @@ -515,15 +508,16 @@ static int mxt_write_object(struct mxt_data *data, static void mxt_input_button(struct mxt_data *data, struct mxt_message *message) { struct input_dev *input = data->input_dev; + const struct mxt_platform_data *pdata = data->pdata; bool button; int i; /* Active-low switch */ - for (i = 0; i < MXT_NUM_GPIO; i++) { - if (data->pdata->key_map[i] == KEY_RESERVED) + for (i = 0; i < pdata->t19_num_keys; i++) { + if (pdata->t19_keymap[i] == KEY_RESERVED) continue; - button = !(message->message[0] & MXT_GPIO0_MASK << i); - input_report_key(input, data->pdata->key_map[i], button); + button = !(message->message[0] & (1 << i)); + input_report_key(input, pdata->t19_keymap[i], button); } } @@ -1084,6 +1078,8 @@ static int mxt_probe(struct i2c_client *client, struct input_dev *input_dev; int error; unsigned int num_mt_slots; + unsigned int mt_flags = 0; + int i; if (!pdata) return -EINVAL; @@ -1096,10 +1092,7 @@ static int mxt_probe(struct i2c_client *client, goto err_free_mem; } - data->is_tp = pdata && pdata->is_tp; - - input_dev->name = (data->is_tp) ? "Atmel maXTouch Touchpad" : - "Atmel maXTouch Touchscreen"; + input_dev->name = "Atmel maXTouch Touchscreen"; snprintf(data->phys, sizeof(data->phys), "i2c-%u-%04x/input0", client->adapter->nr, client->addr); @@ -1125,20 +1118,15 @@ static int mxt_probe(struct i2c_client *client, __set_bit(EV_KEY, input_dev->evbit); __set_bit(BTN_TOUCH, input_dev->keybit); - if (data->is_tp) { - int i; - __set_bit(INPUT_PROP_POINTER, input_dev->propbit); + if (pdata->t19_num_keys) { __set_bit(INPUT_PROP_BUTTONPAD, input_dev->propbit); - for (i = 0; i < MXT_NUM_GPIO; i++) - if (pdata->key_map[i] != KEY_RESERVED) - __set_bit(pdata->key_map[i], input_dev->keybit); + for (i = 0; i < pdata->t19_num_keys; i++) + if (pdata->t19_keymap[i] != KEY_RESERVED) + input_set_capability(input_dev, EV_KEY, + pdata->t19_keymap[i]); - __set_bit(BTN_TOOL_FINGER, input_dev->keybit); - __set_bit(BTN_TOOL_DOUBLETAP, input_dev->keybit); - __set_bit(BTN_TOOL_TRIPLETAP, input_dev->keybit); - __set_bit(BTN_TOOL_QUADTAP, input_dev->keybit); - __set_bit(BTN_TOOL_QUINTTAP, input_dev->keybit); + mt_flags |= INPUT_MT_POINTER; input_abs_set_res(input_dev, ABS_X, MXT_PIXELS_PER_MM); input_abs_set_res(input_dev, ABS_Y, MXT_PIXELS_PER_MM); @@ -1146,6 +1134,8 @@ static int mxt_probe(struct i2c_client *client, MXT_PIXELS_PER_MM); input_abs_set_res(input_dev, ABS_MT_POSITION_Y, MXT_PIXELS_PER_MM); + + input_dev->name = "Atmel maXTouch Touchpad"; } /* For single touch */ @@ -1158,7 +1148,7 @@ static int mxt_probe(struct i2c_client *client, /* For multi touch */ num_mt_slots = data->T9_reportid_max - data->T9_reportid_min + 1; - error = input_mt_init_slots(input_dev, num_mt_slots, 0); + error = input_mt_init_slots(input_dev, num_mt_slots, mt_flags); if (error) goto err_free_object; input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR, diff --git a/drivers/platform/chrome/chromeos_laptop.c b/drivers/platform/chrome/chromeos_laptop.c index 2559a0407c58..8b7523ab62e5 100644 --- a/drivers/platform/chrome/chromeos_laptop.c +++ b/drivers/platform/chrome/chromeos_laptop.c @@ -84,16 +84,22 @@ static struct i2c_board_info tsl2563_als_device = { I2C_BOARD_INFO("tsl2563", TAOS_ALS_I2C_ADDR), }; +static int mxt_t19_keys[] = { + KEY_RESERVED, + KEY_RESERVED, + KEY_RESERVED, + KEY_RESERVED, + KEY_RESERVED, + BTN_LEFT +}; + static struct mxt_platform_data atmel_224s_tp_platform_data = { .x_size = 102*20, .y_size = 68*20, .orient = MXT_VERTICAL_FLIP, .irqflags = IRQF_TRIGGER_FALLING, - .is_tp = true, - .key_map = { KEY_RESERVED, - KEY_RESERVED, - KEY_RESERVED, - BTN_LEFT }, + .t19_num_keys = ARRAY_SIZE(mxt_t19_keys), + .t19_keymap = mxt_t19_keys, .config = NULL, .config_length = 0, }; @@ -109,7 +115,6 @@ static struct mxt_platform_data atmel_1664s_platform_data = { .y_size = 2560, .orient = MXT_ROTATED_90_COUNTER, .irqflags = IRQF_TRIGGER_FALLING, - .is_tp = false, .config = NULL, .config_length = 0, }; diff --git a/include/linux/i2c/atmel_mxt_ts.h b/include/linux/i2c/atmel_mxt_ts.h index eff0cdc08843..d26080dc606c 100644 --- a/include/linux/i2c/atmel_mxt_ts.h +++ b/include/linux/i2c/atmel_mxt_ts.h @@ -15,9 +15,6 @@ #include -/* For key_map array */ -#define MXT_NUM_GPIO 4 - /* Orient */ #define MXT_NORMAL 0x0 #define MXT_DIAGONAL 0x1 @@ -38,8 +35,8 @@ struct mxt_platform_data { unsigned char orient; unsigned long irqflags; - bool is_tp; - const unsigned int key_map[MXT_NUM_GPIO]; + u8 t19_num_keys; + const unsigned int *t19_keymap; }; #endif /* __LINUX_ATMEL_MXT_TS_H */ -- cgit v1.2.3-71-gd317 From c3f78043d5aea39205a14c580babd87fbdcfa148 Mon Sep 17 00:00:00 2001 From: Nick Dyer Date: Sun, 18 May 2014 23:04:46 -0700 Subject: Input: atmel_mxt_ts - implement CRC check for configuration data The configuration is stored in NVRAM on the maXTouch chip. When the device is reset it reports a CRC of the stored configuration values. Therefore it isn't necessary to send the configuration on each probe - we can check the CRC matches and avoid a timeconsuming backup/reset cycle. Signed-off-by: Nick Dyer Acked-by: Benson Leung Acked-by: Yufeng Shen Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/atmel_mxt_ts.c | 60 +++++++++++++++++++++++++++----- include/linux/i2c/atmel_mxt_ts.h | 1 + 2 files changed, 53 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index 278f364ec6e1..61f9ef221d12 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -188,6 +188,7 @@ #define MXT_BACKUP_TIME 50 /* msec */ #define MXT_RESET_TIME 200 /* msec */ #define MXT_RESET_TIMEOUT 3000 /* msec */ +#define MXT_CRC_TIMEOUT 1000 /* msec */ #define MXT_FW_RESET_TIME 3000 /* msec */ #define MXT_FW_CHG_TIMEOUT 300 /* msec */ @@ -259,6 +260,7 @@ struct mxt_data { unsigned int max_x; unsigned int max_y; bool in_bootloader; + u32 config_crc; /* Cached parameters from object table */ u8 T6_reportid; @@ -272,6 +274,9 @@ struct mxt_data { /* for reset handling */ struct completion reset_completion; + + /* for config update handling */ + struct completion crc_completion; }; static size_t mxt_obj_size(const struct mxt_object *obj) @@ -636,7 +641,7 @@ static void mxt_input_touchevent(struct mxt_data *data, } } -static unsigned mxt_extract_T6_csum(const u8 *csum) +static u16 mxt_extract_T6_csum(const u8 *csum) { return csum[0] | (csum[1] << 8) | (csum[2] << 16); } @@ -654,6 +659,7 @@ static irqreturn_t mxt_process_messages_until_invalid(struct mxt_data *data) struct device *dev = &data->client->dev; u8 reportid; bool update_input = false; + u32 crc; do { if (mxt_read_message(data, &message)) { @@ -665,9 +671,15 @@ static irqreturn_t mxt_process_messages_until_invalid(struct mxt_data *data) if (reportid == data->T6_reportid) { u8 status = payload[0]; - unsigned csum = mxt_extract_T6_csum(&payload[1]); + + crc = mxt_extract_T6_csum(&payload[1]); + if (crc != data->config_crc) { + data->config_crc = crc; + complete(&data->crc_completion); + } + dev_dbg(dev, "Status: %02x Config Checksum: %06x\n", - status, csum); + status, data->config_crc); if (status & MXT_T6_STATUS_RESET) complete(&data->reset_completion); @@ -757,6 +769,24 @@ static int mxt_soft_reset(struct mxt_data *data) return 0; } +static void mxt_update_crc(struct mxt_data *data, u8 cmd, u8 value) +{ + /* + * On failure, CRC is set to 0 and config will always be + * downloaded. + */ + data->config_crc = 0; + reinit_completion(&data->crc_completion); + + mxt_t6_command(data, cmd, value, true); + + /* + * Wait for crc message. On failure, CRC is set to 0 and config will + * always be downloaded. + */ + mxt_wait_for_completion(data, &data->crc_completion, MXT_CRC_TIMEOUT); +} + static int mxt_check_reg_init(struct mxt_data *data) { const struct mxt_platform_data *pdata = data->pdata; @@ -771,6 +801,16 @@ static int mxt_check_reg_init(struct mxt_data *data) return 0; } + mxt_update_crc(data, MXT_COMMAND_REPORTALL, 1); + + if (data->config_crc == pdata->config_crc) { + dev_info(dev, "Config CRC 0x%06X: OK\n", data->config_crc); + return 0; + } + + dev_info(dev, "Config CRC 0x%06X: does not match 0x%06X\n", + data->config_crc, pdata->config_crc); + for (i = 0; i < data->info.object_num; i++) { object = data->object_table + i; @@ -790,6 +830,14 @@ static int mxt_check_reg_init(struct mxt_data *data) index += size; } + mxt_update_crc(data, MXT_COMMAND_BACKUPNV, MXT_BACKUP_VALUE); + + ret = mxt_soft_reset(data); + if (ret) + return ret; + + dev_info(dev, "Config successfully updated\n"); + return 0; } @@ -929,11 +977,6 @@ static int mxt_initialize(struct mxt_data *data) goto err_free_object_table; } - error = mxt_t6_command(data, MXT_COMMAND_BACKUPNV, - MXT_BACKUP_VALUE, false); - if (!error) - mxt_soft_reset(data); - /* Update matrix size at info struct */ error = mxt_read_reg(client, MXT_MATRIX_X_SIZE, &val); if (error) @@ -1263,6 +1306,7 @@ static int mxt_probe(struct i2c_client *client, init_completion(&data->bl_completion); init_completion(&data->reset_completion); + init_completion(&data->crc_completion); mxt_calc_resolution(data); diff --git a/include/linux/i2c/atmel_mxt_ts.h b/include/linux/i2c/atmel_mxt_ts.h index d26080dc606c..9f92135b6620 100644 --- a/include/linux/i2c/atmel_mxt_ts.h +++ b/include/linux/i2c/atmel_mxt_ts.h @@ -29,6 +29,7 @@ struct mxt_platform_data { const u8 *config; size_t config_length; + u32 config_crc; unsigned int x_size; unsigned int y_size; -- cgit v1.2.3-71-gd317 From fd1159318e55e901cf269de90163b19fd62938cb Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 17 May 2014 00:03:54 +0400 Subject: can: add Renesas R-Car CAN driver Add support for the CAN controller found in Renesas R-Car SoCs. Signed-off-by: Sergei Shtylyov Signed-off-by: Marc Kleine-Budde --- drivers/net/can/Kconfig | 10 + drivers/net/can/Makefile | 1 + drivers/net/can/rcar_can.c | 876 ++++++++++++++++++++++++++++++++++ include/linux/can/platform/rcar_can.h | 17 + 4 files changed, 904 insertions(+) create mode 100644 drivers/net/can/rcar_can.c create mode 100644 include/linux/can/platform/rcar_can.h (limited to 'include') diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index ac67afa7735c..714b18790caf 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -119,6 +119,16 @@ config CAN_GRCAN endian syntheses of the cores would need some modifications on the hardware level to work. +config CAN_RCAR + tristate "Renesas R-Car CAN controller" + depends on ARM + ---help--- + Say Y here if you want to use CAN controller found on Renesas R-Car + SoCs. + + To compile this driver as a module, choose M here: the module will + be called rcar_can. + source "drivers/net/can/mscan/Kconfig" source "drivers/net/can/sja1000/Kconfig" diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile index c42058868b0f..90f538c73f8c 100644 --- a/drivers/net/can/Makefile +++ b/drivers/net/can/Makefile @@ -25,5 +25,6 @@ obj-$(CONFIG_CAN_JANZ_ICAN3) += janz-ican3.o obj-$(CONFIG_CAN_FLEXCAN) += flexcan.o obj-$(CONFIG_PCH_CAN) += pch_can.o obj-$(CONFIG_CAN_GRCAN) += grcan.o +obj-$(CONFIG_CAN_RCAR) += rcar_can.o ccflags-$(CONFIG_CAN_DEBUG_DEVICES) := -DDEBUG diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c new file mode 100644 index 000000000000..5268d216ecfa --- /dev/null +++ b/drivers/net/can/rcar_can.c @@ -0,0 +1,876 @@ +/* Renesas R-Car CAN device driver + * + * Copyright (C) 2013 Cogent Embedded, Inc. + * Copyright (C) 2013 Renesas Solutions Corp. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define RCAR_CAN_DRV_NAME "rcar_can" + +/* Mailbox configuration: + * mailbox 60 - 63 - Rx FIFO mailboxes + * mailbox 56 - 59 - Tx FIFO mailboxes + * non-FIFO mailboxes are not used + */ +#define RCAR_CAN_N_MBX 64 /* Number of mailboxes in non-FIFO mode */ +#define RCAR_CAN_RX_FIFO_MBX 60 /* Mailbox - window to Rx FIFO */ +#define RCAR_CAN_TX_FIFO_MBX 56 /* Mailbox - window to Tx FIFO */ +#define RCAR_CAN_FIFO_DEPTH 4 + +/* Mailbox registers structure */ +struct rcar_can_mbox_regs { + u32 id; /* IDE and RTR bits, SID and EID */ + u8 stub; /* Not used */ + u8 dlc; /* Data Length Code - bits [0..3] */ + u8 data[8]; /* Data Bytes */ + u8 tsh; /* Time Stamp Higher Byte */ + u8 tsl; /* Time Stamp Lower Byte */ +}; + +struct rcar_can_regs { + struct rcar_can_mbox_regs mb[RCAR_CAN_N_MBX]; /* Mailbox registers */ + u32 mkr_2_9[8]; /* Mask Registers 2-9 */ + u32 fidcr[2]; /* FIFO Received ID Compare Register */ + u32 mkivlr1; /* Mask Invalid Register 1 */ + u32 mier1; /* Mailbox Interrupt Enable Register 1 */ + u32 mkr_0_1[2]; /* Mask Registers 0-1 */ + u32 mkivlr0; /* Mask Invalid Register 0*/ + u32 mier0; /* Mailbox Interrupt Enable Register 0 */ + u8 pad_440[0x3c0]; + u8 mctl[64]; /* Message Control Registers */ + u16 ctlr; /* Control Register */ + u16 str; /* Status register */ + u8 bcr[3]; /* Bit Configuration Register */ + u8 clkr; /* Clock Select Register */ + u8 rfcr; /* Receive FIFO Control Register */ + u8 rfpcr; /* Receive FIFO Pointer Control Register */ + u8 tfcr; /* Transmit FIFO Control Register */ + u8 tfpcr; /* Transmit FIFO Pointer Control Register */ + u8 eier; /* Error Interrupt Enable Register */ + u8 eifr; /* Error Interrupt Factor Judge Register */ + u8 recr; /* Receive Error Count Register */ + u8 tecr; /* Transmit Error Count Register */ + u8 ecsr; /* Error Code Store Register */ + u8 cssr; /* Channel Search Support Register */ + u8 mssr; /* Mailbox Search Status Register */ + u8 msmr; /* Mailbox Search Mode Register */ + u16 tsr; /* Time Stamp Register */ + u8 afsr; /* Acceptance Filter Support Register */ + u8 pad_857; + u8 tcr; /* Test Control Register */ + u8 pad_859[7]; + u8 ier; /* Interrupt Enable Register */ + u8 isr; /* Interrupt Status Register */ + u8 pad_862; + u8 mbsmr; /* Mailbox Search Mask Register */ +}; + +struct rcar_can_priv { + struct can_priv can; /* Must be the first member! */ + struct net_device *ndev; + struct napi_struct napi; + struct rcar_can_regs __iomem *regs; + struct clk *clk; + u8 tx_dlc[RCAR_CAN_FIFO_DEPTH]; + u32 tx_head; + u32 tx_tail; + u8 clock_select; + u8 ier; +}; + +static const struct can_bittiming_const rcar_can_bittiming_const = { + .name = RCAR_CAN_DRV_NAME, + .tseg1_min = 4, + .tseg1_max = 16, + .tseg2_min = 2, + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 1, + .brp_max = 1024, + .brp_inc = 1, +}; + +/* Control Register bits */ +#define RCAR_CAN_CTLR_BOM (3 << 11) /* Bus-Off Recovery Mode Bits */ +#define RCAR_CAN_CTLR_BOM_ENT (1 << 11) /* Entry to halt mode */ + /* at bus-off entry */ +#define RCAR_CAN_CTLR_SLPM (1 << 10) +#define RCAR_CAN_CTLR_CANM (3 << 8) /* Operating Mode Select Bit */ +#define RCAR_CAN_CTLR_CANM_HALT (1 << 9) +#define RCAR_CAN_CTLR_CANM_RESET (1 << 8) +#define RCAR_CAN_CTLR_CANM_FORCE_RESET (3 << 8) +#define RCAR_CAN_CTLR_MLM (1 << 3) /* Message Lost Mode Select */ +#define RCAR_CAN_CTLR_IDFM (3 << 1) /* ID Format Mode Select Bits */ +#define RCAR_CAN_CTLR_IDFM_MIXED (1 << 2) /* Mixed ID mode */ +#define RCAR_CAN_CTLR_MBM (1 << 0) /* Mailbox Mode select */ + +/* Status Register bits */ +#define RCAR_CAN_STR_RSTST (1 << 8) /* Reset Status Bit */ + +/* FIFO Received ID Compare Registers 0 and 1 bits */ +#define RCAR_CAN_FIDCR_IDE (1 << 31) /* ID Extension Bit */ +#define RCAR_CAN_FIDCR_RTR (1 << 30) /* Remote Transmission Request Bit */ + +/* Receive FIFO Control Register bits */ +#define RCAR_CAN_RFCR_RFEST (1 << 7) /* Receive FIFO Empty Status Flag */ +#define RCAR_CAN_RFCR_RFE (1 << 0) /* Receive FIFO Enable */ + +/* Transmit FIFO Control Register bits */ +#define RCAR_CAN_TFCR_TFUST (7 << 1) /* Transmit FIFO Unsent Message */ + /* Number Status Bits */ +#define RCAR_CAN_TFCR_TFUST_SHIFT 1 /* Offset of Transmit FIFO Unsent */ + /* Message Number Status Bits */ +#define RCAR_CAN_TFCR_TFE (1 << 0) /* Transmit FIFO Enable */ + +#define RCAR_CAN_N_RX_MKREGS1 2 /* Number of mask registers */ + /* for Rx mailboxes 0-31 */ +#define RCAR_CAN_N_RX_MKREGS2 8 + +/* Bit Configuration Register settings */ +#define RCAR_CAN_BCR_TSEG1(x) (((x) & 0x0f) << 20) +#define RCAR_CAN_BCR_BPR(x) (((x) & 0x3ff) << 8) +#define RCAR_CAN_BCR_SJW(x) (((x) & 0x3) << 4) +#define RCAR_CAN_BCR_TSEG2(x) ((x) & 0x07) + +/* Mailbox and Mask Registers bits */ +#define RCAR_CAN_IDE (1 << 31) +#define RCAR_CAN_RTR (1 << 30) +#define RCAR_CAN_SID_SHIFT 18 + +/* Mailbox Interrupt Enable Register 1 bits */ +#define RCAR_CAN_MIER1_RXFIE (1 << 28) /* Receive FIFO Interrupt Enable */ +#define RCAR_CAN_MIER1_TXFIE (1 << 24) /* Transmit FIFO Interrupt Enable */ + +/* Interrupt Enable Register bits */ +#define RCAR_CAN_IER_ERSIE (1 << 5) /* Error (ERS) Interrupt Enable Bit */ +#define RCAR_CAN_IER_RXFIE (1 << 4) /* Reception FIFO Interrupt */ + /* Enable Bit */ +#define RCAR_CAN_IER_TXFIE (1 << 3) /* Transmission FIFO Interrupt */ + /* Enable Bit */ +/* Interrupt Status Register bits */ +#define RCAR_CAN_ISR_ERSF (1 << 5) /* Error (ERS) Interrupt Status Bit */ +#define RCAR_CAN_ISR_RXFF (1 << 4) /* Reception FIFO Interrupt */ + /* Status Bit */ +#define RCAR_CAN_ISR_TXFF (1 << 3) /* Transmission FIFO Interrupt */ + /* Status Bit */ + +/* Error Interrupt Enable Register bits */ +#define RCAR_CAN_EIER_BLIE (1 << 7) /* Bus Lock Interrupt Enable */ +#define RCAR_CAN_EIER_OLIE (1 << 6) /* Overload Frame Transmit */ + /* Interrupt Enable */ +#define RCAR_CAN_EIER_ORIE (1 << 5) /* Receive Overrun Interrupt Enable */ +#define RCAR_CAN_EIER_BORIE (1 << 4) /* Bus-Off Recovery Interrupt Enable */ +#define RCAR_CAN_EIER_BOEIE (1 << 3) /* Bus-Off Entry Interrupt Enable */ +#define RCAR_CAN_EIER_EPIE (1 << 2) /* Error Passive Interrupt Enable */ +#define RCAR_CAN_EIER_EWIE (1 << 1) /* Error Warning Interrupt Enable */ +#define RCAR_CAN_EIER_BEIE (1 << 0) /* Bus Error Interrupt Enable */ + +/* Error Interrupt Factor Judge Register bits */ +#define RCAR_CAN_EIFR_BLIF (1 << 7) /* Bus Lock Detect Flag */ +#define RCAR_CAN_EIFR_OLIF (1 << 6) /* Overload Frame Transmission */ + /* Detect Flag */ +#define RCAR_CAN_EIFR_ORIF (1 << 5) /* Receive Overrun Detect Flag */ +#define RCAR_CAN_EIFR_BORIF (1 << 4) /* Bus-Off Recovery Detect Flag */ +#define RCAR_CAN_EIFR_BOEIF (1 << 3) /* Bus-Off Entry Detect Flag */ +#define RCAR_CAN_EIFR_EPIF (1 << 2) /* Error Passive Detect Flag */ +#define RCAR_CAN_EIFR_EWIF (1 << 1) /* Error Warning Detect Flag */ +#define RCAR_CAN_EIFR_BEIF (1 << 0) /* Bus Error Detect Flag */ + +/* Error Code Store Register bits */ +#define RCAR_CAN_ECSR_EDPM (1 << 7) /* Error Display Mode Select Bit */ +#define RCAR_CAN_ECSR_ADEF (1 << 6) /* ACK Delimiter Error Flag */ +#define RCAR_CAN_ECSR_BE0F (1 << 5) /* Bit Error (dominant) Flag */ +#define RCAR_CAN_ECSR_BE1F (1 << 4) /* Bit Error (recessive) Flag */ +#define RCAR_CAN_ECSR_CEF (1 << 3) /* CRC Error Flag */ +#define RCAR_CAN_ECSR_AEF (1 << 2) /* ACK Error Flag */ +#define RCAR_CAN_ECSR_FEF (1 << 1) /* Form Error Flag */ +#define RCAR_CAN_ECSR_SEF (1 << 0) /* Stuff Error Flag */ + +#define RCAR_CAN_NAPI_WEIGHT 4 +#define MAX_STR_READS 0x100 + +static void tx_failure_cleanup(struct net_device *ndev) +{ + int i; + + for (i = 0; i < RCAR_CAN_FIFO_DEPTH; i++) + can_free_echo_skb(ndev, i); +} + +static void rcar_can_error(struct net_device *ndev) +{ + struct rcar_can_priv *priv = netdev_priv(ndev); + struct net_device_stats *stats = &ndev->stats; + struct can_frame *cf; + struct sk_buff *skb; + u8 eifr, txerr = 0, rxerr = 0; + + /* Propagate the error condition to the CAN stack */ + skb = alloc_can_err_skb(ndev, &cf); + + eifr = readb(&priv->regs->eifr); + if (eifr & (RCAR_CAN_EIFR_EWIF | RCAR_CAN_EIFR_EPIF)) { + txerr = readb(&priv->regs->tecr); + rxerr = readb(&priv->regs->recr); + if (skb) { + cf->can_id |= CAN_ERR_CRTL; + cf->data[6] = txerr; + cf->data[7] = rxerr; + } + } + if (eifr & RCAR_CAN_EIFR_BEIF) { + int rx_errors = 0, tx_errors = 0; + u8 ecsr; + + netdev_dbg(priv->ndev, "Bus error interrupt:\n"); + if (skb) { + cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT; + cf->data[2] = CAN_ERR_PROT_UNSPEC; + } + ecsr = readb(&priv->regs->ecsr); + if (ecsr & RCAR_CAN_ECSR_ADEF) { + netdev_dbg(priv->ndev, "ACK Delimiter Error\n"); + tx_errors++; + writeb(~RCAR_CAN_ECSR_ADEF, &priv->regs->ecsr); + if (skb) + cf->data[3] |= CAN_ERR_PROT_LOC_ACK_DEL; + } + if (ecsr & RCAR_CAN_ECSR_BE0F) { + netdev_dbg(priv->ndev, "Bit Error (dominant)\n"); + tx_errors++; + writeb(~RCAR_CAN_ECSR_BE0F, &priv->regs->ecsr); + if (skb) + cf->data[2] |= CAN_ERR_PROT_BIT0; + } + if (ecsr & RCAR_CAN_ECSR_BE1F) { + netdev_dbg(priv->ndev, "Bit Error (recessive)\n"); + tx_errors++; + writeb(~RCAR_CAN_ECSR_BE1F, &priv->regs->ecsr); + if (skb) + cf->data[2] |= CAN_ERR_PROT_BIT1; + } + if (ecsr & RCAR_CAN_ECSR_CEF) { + netdev_dbg(priv->ndev, "CRC Error\n"); + rx_errors++; + writeb(~RCAR_CAN_ECSR_CEF, &priv->regs->ecsr); + if (skb) + cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ; + } + if (ecsr & RCAR_CAN_ECSR_AEF) { + netdev_dbg(priv->ndev, "ACK Error\n"); + tx_errors++; + writeb(~RCAR_CAN_ECSR_AEF, &priv->regs->ecsr); + if (skb) { + cf->can_id |= CAN_ERR_ACK; + cf->data[3] |= CAN_ERR_PROT_LOC_ACK; + } + } + if (ecsr & RCAR_CAN_ECSR_FEF) { + netdev_dbg(priv->ndev, "Form Error\n"); + rx_errors++; + writeb(~RCAR_CAN_ECSR_FEF, &priv->regs->ecsr); + if (skb) + cf->data[2] |= CAN_ERR_PROT_FORM; + } + if (ecsr & RCAR_CAN_ECSR_SEF) { + netdev_dbg(priv->ndev, "Stuff Error\n"); + rx_errors++; + writeb(~RCAR_CAN_ECSR_SEF, &priv->regs->ecsr); + if (skb) + cf->data[2] |= CAN_ERR_PROT_STUFF; + } + + priv->can.can_stats.bus_error++; + ndev->stats.rx_errors += rx_errors; + ndev->stats.tx_errors += tx_errors; + writeb(~RCAR_CAN_EIFR_BEIF, &priv->regs->eifr); + } + if (eifr & RCAR_CAN_EIFR_EWIF) { + netdev_dbg(priv->ndev, "Error warning interrupt\n"); + priv->can.state = CAN_STATE_ERROR_WARNING; + priv->can.can_stats.error_warning++; + /* Clear interrupt condition */ + writeb(~RCAR_CAN_EIFR_EWIF, &priv->regs->eifr); + if (skb) + cf->data[1] = txerr > rxerr ? CAN_ERR_CRTL_TX_WARNING : + CAN_ERR_CRTL_RX_WARNING; + } + if (eifr & RCAR_CAN_EIFR_EPIF) { + netdev_dbg(priv->ndev, "Error passive interrupt\n"); + priv->can.state = CAN_STATE_ERROR_PASSIVE; + priv->can.can_stats.error_passive++; + /* Clear interrupt condition */ + writeb(~RCAR_CAN_EIFR_EPIF, &priv->regs->eifr); + if (skb) + cf->data[1] = txerr > rxerr ? CAN_ERR_CRTL_TX_PASSIVE : + CAN_ERR_CRTL_RX_PASSIVE; + } + if (eifr & RCAR_CAN_EIFR_BOEIF) { + netdev_dbg(priv->ndev, "Bus-off entry interrupt\n"); + tx_failure_cleanup(ndev); + priv->ier = RCAR_CAN_IER_ERSIE; + writeb(priv->ier, &priv->regs->ier); + priv->can.state = CAN_STATE_BUS_OFF; + /* Clear interrupt condition */ + writeb(~RCAR_CAN_EIFR_BOEIF, &priv->regs->eifr); + can_bus_off(ndev); + if (skb) + cf->can_id |= CAN_ERR_BUSOFF; + } + if (eifr & RCAR_CAN_EIFR_ORIF) { + netdev_dbg(priv->ndev, "Receive overrun error interrupt\n"); + ndev->stats.rx_over_errors++; + ndev->stats.rx_errors++; + writeb(~RCAR_CAN_EIFR_ORIF, &priv->regs->eifr); + if (skb) { + cf->can_id |= CAN_ERR_CRTL; + cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW; + } + } + if (eifr & RCAR_CAN_EIFR_OLIF) { + netdev_dbg(priv->ndev, + "Overload Frame Transmission error interrupt\n"); + ndev->stats.rx_over_errors++; + ndev->stats.rx_errors++; + writeb(~RCAR_CAN_EIFR_OLIF, &priv->regs->eifr); + if (skb) { + cf->can_id |= CAN_ERR_PROT; + cf->data[2] |= CAN_ERR_PROT_OVERLOAD; + } + } + + if (skb) { + stats->rx_packets++; + stats->rx_bytes += cf->can_dlc; + netif_rx(skb); + } +} + +static void rcar_can_tx_done(struct net_device *ndev) +{ + struct rcar_can_priv *priv = netdev_priv(ndev); + struct net_device_stats *stats = &ndev->stats; + u8 isr; + + while (1) { + u8 unsent = readb(&priv->regs->tfcr); + + unsent = (unsent & RCAR_CAN_TFCR_TFUST) >> + RCAR_CAN_TFCR_TFUST_SHIFT; + if (priv->tx_head - priv->tx_tail <= unsent) + break; + stats->tx_packets++; + stats->tx_bytes += priv->tx_dlc[priv->tx_tail % + RCAR_CAN_FIFO_DEPTH]; + priv->tx_dlc[priv->tx_tail % RCAR_CAN_FIFO_DEPTH] = 0; + can_get_echo_skb(ndev, priv->tx_tail % RCAR_CAN_FIFO_DEPTH); + priv->tx_tail++; + netif_wake_queue(ndev); + } + /* Clear interrupt */ + isr = readb(&priv->regs->isr); + writeb(isr & ~RCAR_CAN_ISR_TXFF, &priv->regs->isr); + can_led_event(ndev, CAN_LED_EVENT_TX); +} + +static irqreturn_t rcar_can_interrupt(int irq, void *dev_id) +{ + struct net_device *ndev = dev_id; + struct rcar_can_priv *priv = netdev_priv(ndev); + u8 isr; + + isr = readb(&priv->regs->isr); + if (!(isr & priv->ier)) + return IRQ_NONE; + + if (isr & RCAR_CAN_ISR_ERSF) + rcar_can_error(ndev); + + if (isr & RCAR_CAN_ISR_TXFF) + rcar_can_tx_done(ndev); + + if (isr & RCAR_CAN_ISR_RXFF) { + if (napi_schedule_prep(&priv->napi)) { + /* Disable Rx FIFO interrupts */ + priv->ier &= ~RCAR_CAN_IER_RXFIE; + writeb(priv->ier, &priv->regs->ier); + __napi_schedule(&priv->napi); + } + } + + return IRQ_HANDLED; +} + +static void rcar_can_set_bittiming(struct net_device *dev) +{ + struct rcar_can_priv *priv = netdev_priv(dev); + struct can_bittiming *bt = &priv->can.bittiming; + u32 bcr; + + bcr = RCAR_CAN_BCR_TSEG1(bt->phase_seg1 + bt->prop_seg - 1) | + RCAR_CAN_BCR_BPR(bt->brp - 1) | RCAR_CAN_BCR_SJW(bt->sjw - 1) | + RCAR_CAN_BCR_TSEG2(bt->phase_seg2 - 1); + /* Don't overwrite CLKR with 32-bit BCR access; CLKR has 8-bit access. + * All the registers are big-endian but they get byte-swapped on 32-bit + * read/write (but not on 8-bit, contrary to the manuals)... + */ + writel((bcr << 8) | priv->clock_select, &priv->regs->bcr); +} + +static void rcar_can_start(struct net_device *ndev) +{ + struct rcar_can_priv *priv = netdev_priv(ndev); + u16 ctlr; + int i; + + /* Set controller to known mode: + * - FIFO mailbox mode + * - accept all messages + * - overrun mode + * CAN is in sleep mode after MCU hardware or software reset. + */ + ctlr = readw(&priv->regs->ctlr); + ctlr &= ~RCAR_CAN_CTLR_SLPM; + writew(ctlr, &priv->regs->ctlr); + /* Go to reset mode */ + ctlr |= RCAR_CAN_CTLR_CANM_FORCE_RESET; + writew(ctlr, &priv->regs->ctlr); + for (i = 0; i < MAX_STR_READS; i++) { + if (readw(&priv->regs->str) & RCAR_CAN_STR_RSTST) + break; + } + rcar_can_set_bittiming(ndev); + ctlr |= RCAR_CAN_CTLR_IDFM_MIXED; /* Select mixed ID mode */ + ctlr |= RCAR_CAN_CTLR_BOM_ENT; /* Entry to halt mode automatically */ + /* at bus-off */ + ctlr |= RCAR_CAN_CTLR_MBM; /* Select FIFO mailbox mode */ + ctlr |= RCAR_CAN_CTLR_MLM; /* Overrun mode */ + writew(ctlr, &priv->regs->ctlr); + + /* Accept all SID and EID */ + writel(0, &priv->regs->mkr_2_9[6]); + writel(0, &priv->regs->mkr_2_9[7]); + /* In FIFO mailbox mode, write "0" to bits 24 to 31 */ + writel(0, &priv->regs->mkivlr1); + /* Accept all frames */ + writel(0, &priv->regs->fidcr[0]); + writel(RCAR_CAN_FIDCR_IDE | RCAR_CAN_FIDCR_RTR, &priv->regs->fidcr[1]); + /* Enable and configure FIFO mailbox interrupts */ + writel(RCAR_CAN_MIER1_RXFIE | RCAR_CAN_MIER1_TXFIE, &priv->regs->mier1); + + priv->ier = RCAR_CAN_IER_ERSIE | RCAR_CAN_IER_RXFIE | + RCAR_CAN_IER_TXFIE; + writeb(priv->ier, &priv->regs->ier); + + /* Accumulate error codes */ + writeb(RCAR_CAN_ECSR_EDPM, &priv->regs->ecsr); + /* Enable error interrupts */ + writeb(RCAR_CAN_EIER_EWIE | RCAR_CAN_EIER_EPIE | RCAR_CAN_EIER_BOEIE | + (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING ? + RCAR_CAN_EIER_BEIE : 0) | RCAR_CAN_EIER_ORIE | + RCAR_CAN_EIER_OLIE, &priv->regs->eier); + priv->can.state = CAN_STATE_ERROR_ACTIVE; + + /* Go to operation mode */ + writew(ctlr & ~RCAR_CAN_CTLR_CANM, &priv->regs->ctlr); + for (i = 0; i < MAX_STR_READS; i++) { + if (!(readw(&priv->regs->str) & RCAR_CAN_STR_RSTST)) + break; + } + /* Enable Rx and Tx FIFO */ + writeb(RCAR_CAN_RFCR_RFE, &priv->regs->rfcr); + writeb(RCAR_CAN_TFCR_TFE, &priv->regs->tfcr); +} + +static int rcar_can_open(struct net_device *ndev) +{ + struct rcar_can_priv *priv = netdev_priv(ndev); + int err; + + err = clk_prepare_enable(priv->clk); + if (err) { + netdev_err(ndev, "clk_prepare_enable() failed, error %d\n", + err); + goto out; + } + err = open_candev(ndev); + if (err) { + netdev_err(ndev, "open_candev() failed, error %d\n", err); + goto out_clock; + } + napi_enable(&priv->napi); + err = request_irq(ndev->irq, rcar_can_interrupt, 0, ndev->name, ndev); + if (err) { + netdev_err(ndev, "error requesting interrupt %x\n", ndev->irq); + goto out_close; + } + can_led_event(ndev, CAN_LED_EVENT_OPEN); + rcar_can_start(ndev); + netif_start_queue(ndev); + return 0; +out_close: + napi_disable(&priv->napi); + close_candev(ndev); +out_clock: + clk_disable_unprepare(priv->clk); +out: + return err; +} + +static void rcar_can_stop(struct net_device *ndev) +{ + struct rcar_can_priv *priv = netdev_priv(ndev); + u16 ctlr; + int i; + + /* Go to (force) reset mode */ + ctlr = readw(&priv->regs->ctlr); + ctlr |= RCAR_CAN_CTLR_CANM_FORCE_RESET; + writew(ctlr, &priv->regs->ctlr); + for (i = 0; i < MAX_STR_READS; i++) { + if (readw(&priv->regs->str) & RCAR_CAN_STR_RSTST) + break; + } + writel(0, &priv->regs->mier0); + writel(0, &priv->regs->mier1); + writeb(0, &priv->regs->ier); + writeb(0, &priv->regs->eier); + /* Go to sleep mode */ + ctlr |= RCAR_CAN_CTLR_SLPM; + writew(ctlr, &priv->regs->ctlr); + priv->can.state = CAN_STATE_STOPPED; +} + +static int rcar_can_close(struct net_device *ndev) +{ + struct rcar_can_priv *priv = netdev_priv(ndev); + + netif_stop_queue(ndev); + rcar_can_stop(ndev); + free_irq(ndev->irq, ndev); + napi_disable(&priv->napi); + clk_disable_unprepare(priv->clk); + close_candev(ndev); + can_led_event(ndev, CAN_LED_EVENT_STOP); + return 0; +} + +static netdev_tx_t rcar_can_start_xmit(struct sk_buff *skb, + struct net_device *ndev) +{ + struct rcar_can_priv *priv = netdev_priv(ndev); + struct can_frame *cf = (struct can_frame *)skb->data; + u32 data, i; + + if (can_dropped_invalid_skb(ndev, skb)) + return NETDEV_TX_OK; + + if (cf->can_id & CAN_EFF_FLAG) /* Extended frame format */ + data = (cf->can_id & CAN_EFF_MASK) | RCAR_CAN_IDE; + else /* Standard frame format */ + data = (cf->can_id & CAN_SFF_MASK) << RCAR_CAN_SID_SHIFT; + + if (cf->can_id & CAN_RTR_FLAG) { /* Remote transmission request */ + data |= RCAR_CAN_RTR; + } else { + for (i = 0; i < cf->can_dlc; i++) + writeb(cf->data[i], + &priv->regs->mb[RCAR_CAN_TX_FIFO_MBX].data[i]); + } + + writel(data, &priv->regs->mb[RCAR_CAN_TX_FIFO_MBX].id); + + writeb(cf->can_dlc, &priv->regs->mb[RCAR_CAN_TX_FIFO_MBX].dlc); + + priv->tx_dlc[priv->tx_head % RCAR_CAN_FIFO_DEPTH] = cf->can_dlc; + can_put_echo_skb(skb, ndev, priv->tx_head % RCAR_CAN_FIFO_DEPTH); + priv->tx_head++; + /* Start Tx: write 0xff to the TFPCR register to increment + * the CPU-side pointer for the transmit FIFO to the next + * mailbox location + */ + writeb(0xff, &priv->regs->tfpcr); + /* Stop the queue if we've filled all FIFO entries */ + if (priv->tx_head - priv->tx_tail >= RCAR_CAN_FIFO_DEPTH) + netif_stop_queue(ndev); + + return NETDEV_TX_OK; +} + +static const struct net_device_ops rcar_can_netdev_ops = { + .ndo_open = rcar_can_open, + .ndo_stop = rcar_can_close, + .ndo_start_xmit = rcar_can_start_xmit, +}; + +static void rcar_can_rx_pkt(struct rcar_can_priv *priv) +{ + struct net_device_stats *stats = &priv->ndev->stats; + struct can_frame *cf; + struct sk_buff *skb; + u32 data; + u8 dlc; + + skb = alloc_can_skb(priv->ndev, &cf); + if (!skb) { + stats->rx_dropped++; + return; + } + + data = readl(&priv->regs->mb[RCAR_CAN_RX_FIFO_MBX].id); + if (data & RCAR_CAN_IDE) + cf->can_id = (data & CAN_EFF_MASK) | CAN_EFF_FLAG; + else + cf->can_id = (data >> RCAR_CAN_SID_SHIFT) & CAN_SFF_MASK; + + dlc = readb(&priv->regs->mb[RCAR_CAN_RX_FIFO_MBX].dlc); + cf->can_dlc = get_can_dlc(dlc); + if (data & RCAR_CAN_RTR) { + cf->can_id |= CAN_RTR_FLAG; + } else { + for (dlc = 0; dlc < cf->can_dlc; dlc++) + cf->data[dlc] = + readb(&priv->regs->mb[RCAR_CAN_RX_FIFO_MBX].data[dlc]); + } + + can_led_event(priv->ndev, CAN_LED_EVENT_RX); + + stats->rx_bytes += cf->can_dlc; + stats->rx_packets++; + netif_receive_skb(skb); +} + +static int rcar_can_rx_poll(struct napi_struct *napi, int quota) +{ + struct rcar_can_priv *priv = container_of(napi, + struct rcar_can_priv, napi); + int num_pkts; + + for (num_pkts = 0; num_pkts < quota; num_pkts++) { + u8 rfcr, isr; + + isr = readb(&priv->regs->isr); + /* Clear interrupt bit */ + if (isr & RCAR_CAN_ISR_RXFF) + writeb(isr & ~RCAR_CAN_ISR_RXFF, &priv->regs->isr); + rfcr = readb(&priv->regs->rfcr); + if (rfcr & RCAR_CAN_RFCR_RFEST) + break; + rcar_can_rx_pkt(priv); + /* Write 0xff to the RFPCR register to increment + * the CPU-side pointer for the receive FIFO + * to the next mailbox location + */ + writeb(0xff, &priv->regs->rfpcr); + } + /* All packets processed */ + if (num_pkts < quota) { + napi_complete(napi); + priv->ier |= RCAR_CAN_IER_RXFIE; + writeb(priv->ier, &priv->regs->ier); + } + return num_pkts; +} + +static int rcar_can_do_set_mode(struct net_device *ndev, enum can_mode mode) +{ + switch (mode) { + case CAN_MODE_START: + rcar_can_start(ndev); + netif_wake_queue(ndev); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int rcar_can_get_berr_counter(const struct net_device *dev, + struct can_berr_counter *bec) +{ + struct rcar_can_priv *priv = netdev_priv(dev); + int err; + + err = clk_prepare_enable(priv->clk); + if (err) + return err; + bec->txerr = readb(&priv->regs->tecr); + bec->rxerr = readb(&priv->regs->recr); + clk_disable_unprepare(priv->clk); + return 0; +} + +static int rcar_can_probe(struct platform_device *pdev) +{ + struct rcar_can_platform_data *pdata; + struct rcar_can_priv *priv; + struct net_device *ndev; + struct resource *mem; + void __iomem *addr; + int err = -ENODEV; + int irq; + + pdata = dev_get_platdata(&pdev->dev); + if (!pdata) { + dev_err(&pdev->dev, "No platform data provided!\n"); + goto fail; + } + + irq = platform_get_irq(pdev, 0); + if (!irq) { + dev_err(&pdev->dev, "No IRQ resource\n"); + goto fail; + } + + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + addr = devm_ioremap_resource(&pdev->dev, mem); + if (IS_ERR(addr)) { + err = PTR_ERR(addr); + goto fail; + } + + ndev = alloc_candev(sizeof(struct rcar_can_priv), RCAR_CAN_FIFO_DEPTH); + if (!ndev) { + dev_err(&pdev->dev, "alloc_candev() failed\n"); + err = -ENOMEM; + goto fail; + } + + priv = netdev_priv(ndev); + + priv->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(priv->clk)) { + err = PTR_ERR(priv->clk); + dev_err(&pdev->dev, "cannot get clock: %d\n", err); + goto fail_clk; + } + + ndev->netdev_ops = &rcar_can_netdev_ops; + ndev->irq = irq; + ndev->flags |= IFF_ECHO; + priv->ndev = ndev; + priv->regs = addr; + priv->clock_select = pdata->clock_select; + priv->can.clock.freq = clk_get_rate(priv->clk); + priv->can.bittiming_const = &rcar_can_bittiming_const; + priv->can.do_set_mode = rcar_can_do_set_mode; + priv->can.do_get_berr_counter = rcar_can_get_berr_counter; + priv->can.ctrlmode_supported = CAN_CTRLMODE_BERR_REPORTING; + platform_set_drvdata(pdev, ndev); + SET_NETDEV_DEV(ndev, &pdev->dev); + + netif_napi_add(ndev, &priv->napi, rcar_can_rx_poll, + RCAR_CAN_NAPI_WEIGHT); + err = register_candev(ndev); + if (err) { + dev_err(&pdev->dev, "register_candev() failed, error %d\n", + err); + goto fail_candev; + } + + devm_can_led_init(ndev); + + dev_info(&pdev->dev, "device registered (reg_base=%p, irq=%u)\n", + priv->regs, ndev->irq); + + return 0; +fail_candev: + netif_napi_del(&priv->napi); +fail_clk: + free_candev(ndev); +fail: + return err; +} + +static int rcar_can_remove(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + struct rcar_can_priv *priv = netdev_priv(ndev); + + unregister_candev(ndev); + netif_napi_del(&priv->napi); + free_candev(ndev); + return 0; +} + +static int __maybe_unused rcar_can_suspend(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct rcar_can_priv *priv = netdev_priv(ndev); + u16 ctlr; + + if (netif_running(ndev)) { + netif_stop_queue(ndev); + netif_device_detach(ndev); + } + ctlr = readw(&priv->regs->ctlr); + ctlr |= RCAR_CAN_CTLR_CANM_HALT; + writew(ctlr, &priv->regs->ctlr); + ctlr |= RCAR_CAN_CTLR_SLPM; + writew(ctlr, &priv->regs->ctlr); + priv->can.state = CAN_STATE_SLEEPING; + + clk_disable(priv->clk); + return 0; +} + +static int __maybe_unused rcar_can_resume(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct rcar_can_priv *priv = netdev_priv(ndev); + u16 ctlr; + int err; + + err = clk_enable(priv->clk); + if (err) { + netdev_err(ndev, "clk_enable() failed, error %d\n", err); + return err; + } + + ctlr = readw(&priv->regs->ctlr); + ctlr &= ~RCAR_CAN_CTLR_SLPM; + writew(ctlr, &priv->regs->ctlr); + ctlr &= ~RCAR_CAN_CTLR_CANM; + writew(ctlr, &priv->regs->ctlr); + priv->can.state = CAN_STATE_ERROR_ACTIVE; + + if (netif_running(ndev)) { + netif_device_attach(ndev); + netif_start_queue(ndev); + } + return 0; +} + +static SIMPLE_DEV_PM_OPS(rcar_can_pm_ops, rcar_can_suspend, rcar_can_resume); + +static struct platform_driver rcar_can_driver = { + .driver = { + .name = RCAR_CAN_DRV_NAME, + .owner = THIS_MODULE, + .pm = &rcar_can_pm_ops, + }, + .probe = rcar_can_probe, + .remove = rcar_can_remove, +}; + +module_platform_driver(rcar_can_driver); + +MODULE_AUTHOR("Cogent Embedded, Inc."); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("CAN driver for Renesas R-Car SoC"); +MODULE_ALIAS("platform:" RCAR_CAN_DRV_NAME); diff --git a/include/linux/can/platform/rcar_can.h b/include/linux/can/platform/rcar_can.h new file mode 100644 index 000000000000..0f4a2f3df504 --- /dev/null +++ b/include/linux/can/platform/rcar_can.h @@ -0,0 +1,17 @@ +#ifndef _CAN_PLATFORM_RCAR_CAN_H_ +#define _CAN_PLATFORM_RCAR_CAN_H_ + +#include + +/* Clock Select Register settings */ +enum CLKR { + CLKR_CLKP1 = 0, /* Peripheral clock (clkp1) */ + CLKR_CLKP2 = 1, /* Peripheral clock (clkp2) */ + CLKR_CLKEXT = 3 /* Externally input clock */ +}; + +struct rcar_can_platform_data { + enum CLKR clock_select; /* Clock source select */ +}; + +#endif /* !_CAN_PLATFORM_RCAR_CAN_H_ */ -- cgit v1.2.3-71-gd317 From 42193e3efb632c84d686acacd7b2327f2b1f8c63 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Thu, 15 May 2014 20:31:56 +0200 Subject: can: unify identifiers to ensure unique include processing Armin pointed me to the fact that the identifier which is used to ensure the unique include processing in lunux/include/uapi/linux/can.h is CAN_H. This clashed with his own source as includes from libraries and APIs should use an underscore '_' at the identifier start. This patch fixes the protection identifiers in all CAN relavant includes. Reported-by: Armin Burchardt Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/linux/can/core.h | 6 +++--- include/linux/can/dev.h | 6 +++--- include/linux/can/led.h | 6 +++--- include/linux/can/platform/cc770.h | 6 +++--- include/linux/can/platform/mcp251x.h | 6 +++--- include/linux/can/platform/sja1000.h | 6 +++--- include/linux/can/platform/ti_hecc.h | 6 +++--- include/linux/can/skb.h | 6 +++--- include/uapi/linux/can.h | 6 +++--- include/uapi/linux/can/bcm.h | 6 +++--- include/uapi/linux/can/error.h | 6 +++--- include/uapi/linux/can/gw.h | 6 +++--- include/uapi/linux/can/netlink.h | 6 +++--- include/uapi/linux/can/raw.h | 6 +++--- 14 files changed, 42 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/include/linux/can/core.h b/include/linux/can/core.h index 78c6c52073ad..a0875001b13c 100644 --- a/include/linux/can/core.h +++ b/include/linux/can/core.h @@ -10,8 +10,8 @@ * */ -#ifndef CAN_CORE_H -#define CAN_CORE_H +#ifndef _CAN_CORE_H +#define _CAN_CORE_H #include #include @@ -58,4 +58,4 @@ extern void can_rx_unregister(struct net_device *dev, canid_t can_id, extern int can_send(struct sk_buff *skb, int loop); extern int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); -#endif /* CAN_CORE_H */ +#endif /* !_CAN_CORE_H */ diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 3ce5e526525f..6992afc6ba7f 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -10,8 +10,8 @@ * */ -#ifndef CAN_DEV_H -#define CAN_DEV_H +#ifndef _CAN_DEV_H +#define _CAN_DEV_H #include #include @@ -132,4 +132,4 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev, struct sk_buff *alloc_can_err_skb(struct net_device *dev, struct can_frame **cf); -#endif /* CAN_DEV_H */ +#endif /* !_CAN_DEV_H */ diff --git a/include/linux/can/led.h b/include/linux/can/led.h index 9c1167baf273..e0475c5cbb92 100644 --- a/include/linux/can/led.h +++ b/include/linux/can/led.h @@ -6,8 +6,8 @@ * published by the Free Software Foundation. */ -#ifndef CAN_LED_H -#define CAN_LED_H +#ifndef _CAN_LED_H +#define _CAN_LED_H #include #include @@ -48,4 +48,4 @@ static inline void can_led_notifier_exit(void) #endif -#endif +#endif /* !_CAN_LED_H */ diff --git a/include/linux/can/platform/cc770.h b/include/linux/can/platform/cc770.h index 7702641f87ee..78b2d44f04cf 100644 --- a/include/linux/can/platform/cc770.h +++ b/include/linux/can/platform/cc770.h @@ -1,5 +1,5 @@ -#ifndef _CAN_PLATFORM_CC770_H_ -#define _CAN_PLATFORM_CC770_H_ +#ifndef _CAN_PLATFORM_CC770_H +#define _CAN_PLATFORM_CC770_H /* CPU Interface Register (0x02) */ #define CPUIF_CEN 0x01 /* Clock Out Enable */ @@ -30,4 +30,4 @@ struct cc770_platform_data { u8 bcr; /* Bus Configuration Register */ }; -#endif /* !_CAN_PLATFORM_CC770_H_ */ +#endif /* !_CAN_PLATFORM_CC770_H */ diff --git a/include/linux/can/platform/mcp251x.h b/include/linux/can/platform/mcp251x.h index dc029dba7a03..d44fcae274ff 100644 --- a/include/linux/can/platform/mcp251x.h +++ b/include/linux/can/platform/mcp251x.h @@ -1,5 +1,5 @@ -#ifndef __CAN_PLATFORM_MCP251X_H__ -#define __CAN_PLATFORM_MCP251X_H__ +#ifndef _CAN_PLATFORM_MCP251X_H +#define _CAN_PLATFORM_MCP251X_H /* * @@ -18,4 +18,4 @@ struct mcp251x_platform_data { unsigned long oscillator_frequency; }; -#endif /* __CAN_PLATFORM_MCP251X_H__ */ +#endif /* !_CAN_PLATFORM_MCP251X_H */ diff --git a/include/linux/can/platform/sja1000.h b/include/linux/can/platform/sja1000.h index 96f8fcc78d78..93570b61ec6c 100644 --- a/include/linux/can/platform/sja1000.h +++ b/include/linux/can/platform/sja1000.h @@ -1,5 +1,5 @@ -#ifndef _CAN_PLATFORM_SJA1000_H_ -#define _CAN_PLATFORM_SJA1000_H_ +#ifndef _CAN_PLATFORM_SJA1000_H +#define _CAN_PLATFORM_SJA1000_H /* clock divider register */ #define CDR_CLKOUT_MASK 0x07 @@ -32,4 +32,4 @@ struct sja1000_platform_data { u8 cdr; /* clock divider register */ }; -#endif /* !_CAN_PLATFORM_SJA1000_H_ */ +#endif /* !_CAN_PLATFORM_SJA1000_H */ diff --git a/include/linux/can/platform/ti_hecc.h b/include/linux/can/platform/ti_hecc.h index af17cb3f7a84..a52f47ca6c8a 100644 --- a/include/linux/can/platform/ti_hecc.h +++ b/include/linux/can/platform/ti_hecc.h @@ -1,5 +1,5 @@ -#ifndef __CAN_PLATFORM_TI_HECC_H__ -#define __CAN_PLATFORM_TI_HECC_H__ +#ifndef _CAN_PLATFORM_TI_HECC_H +#define _CAN_PLATFORM_TI_HECC_H /* * TI HECC (High End CAN Controller) driver platform header @@ -41,4 +41,4 @@ struct ti_hecc_platform_data { u32 version; void (*transceiver_switch) (int); }; -#endif +#endif /* !_CAN_PLATFORM_TI_HECC_H */ diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index f9bbbb472663..cc00d15c6107 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -7,8 +7,8 @@ * */ -#ifndef CAN_SKB_H -#define CAN_SKB_H +#ifndef _CAN_SKB_H +#define _CAN_SKB_H #include #include @@ -80,4 +80,4 @@ static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb) return skb; } -#endif /* CAN_SKB_H */ +#endif /* !_CAN_SKB_H */ diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index 5d9d1d140718..41892f720057 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -42,8 +42,8 @@ * DAMAGE. */ -#ifndef CAN_H -#define CAN_H +#ifndef _UAPI_CAN_H +#define _UAPI_CAN_H #include #include @@ -191,4 +191,4 @@ struct can_filter { #define CAN_INV_FILTER 0x20000000U /* to be set in can_filter.can_id */ -#endif /* CAN_H */ +#endif /* !_UAPI_CAN_H */ diff --git a/include/uapi/linux/can/bcm.h b/include/uapi/linux/can/bcm.h index 382251a1d214..89ddb9dc9bdf 100644 --- a/include/uapi/linux/can/bcm.h +++ b/include/uapi/linux/can/bcm.h @@ -41,8 +41,8 @@ * DAMAGE. */ -#ifndef CAN_BCM_H -#define CAN_BCM_H +#ifndef _UAPI_CAN_BCM_H +#define _UAPI_CAN_BCM_H #include #include @@ -95,4 +95,4 @@ enum { #define TX_RESET_MULTI_IDX 0x0200 #define RX_RTR_FRAME 0x0400 -#endif /* CAN_BCM_H */ +#endif /* !_UAPI_CAN_BCM_H */ diff --git a/include/uapi/linux/can/error.h b/include/uapi/linux/can/error.h index b63204545320..c247446ab25a 100644 --- a/include/uapi/linux/can/error.h +++ b/include/uapi/linux/can/error.h @@ -41,8 +41,8 @@ * DAMAGE. */ -#ifndef CAN_ERROR_H -#define CAN_ERROR_H +#ifndef _UAPI_CAN_ERROR_H +#define _UAPI_CAN_ERROR_H #define CAN_ERR_DLC 8 /* dlc for error message frames */ @@ -120,4 +120,4 @@ /* controller specific additional information / data[5..7] */ -#endif /* CAN_ERROR_H */ +#endif /* _UAPI_CAN_ERROR_H */ diff --git a/include/uapi/linux/can/gw.h b/include/uapi/linux/can/gw.h index 844c8964bdfe..3e6184cf2f6d 100644 --- a/include/uapi/linux/can/gw.h +++ b/include/uapi/linux/can/gw.h @@ -41,8 +41,8 @@ * DAMAGE. */ -#ifndef CAN_GW_H -#define CAN_GW_H +#ifndef _UAPI_CAN_GW_H +#define _UAPI_CAN_GW_H #include #include @@ -200,4 +200,4 @@ enum { * Beware of sending unpacked or aligned structs! */ -#endif +#endif /* !_UAPI_CAN_GW_H */ diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h index 7e2e1863db16..813d11f54977 100644 --- a/include/uapi/linux/can/netlink.h +++ b/include/uapi/linux/can/netlink.h @@ -15,8 +15,8 @@ * GNU General Public License for more details. */ -#ifndef CAN_NETLINK_H -#define CAN_NETLINK_H +#ifndef _UAPI_CAN_NETLINK_H +#define _UAPI_CAN_NETLINK_H #include @@ -130,4 +130,4 @@ enum { #define IFLA_CAN_MAX (__IFLA_CAN_MAX - 1) -#endif /* CAN_NETLINK_H */ +#endif /* !_UAPI_CAN_NETLINK_H */ diff --git a/include/uapi/linux/can/raw.h b/include/uapi/linux/can/raw.h index c7d8c334e0ce..78ec76fd89a6 100644 --- a/include/uapi/linux/can/raw.h +++ b/include/uapi/linux/can/raw.h @@ -42,8 +42,8 @@ * DAMAGE. */ -#ifndef CAN_RAW_H -#define CAN_RAW_H +#ifndef _UAPI_CAN_RAW_H +#define _UAPI_CAN_RAW_H #include @@ -59,4 +59,4 @@ enum { CAN_RAW_FD_FRAMES, /* allow CAN FD frames (default:off) */ }; -#endif +#endif /* !_UAPI_CAN_RAW_H */ -- cgit v1.2.3-71-gd317 From 7c95f6d866d861268a217003c5202009fa76f252 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 4 Apr 2014 01:22:45 +0200 Subject: netfilter: nf_tables: deconstify table and chain in context structure The new transaction infrastructure updates the family, table and chain objects in the context structure, so let's deconstify them. While at it, move the context structure initialization routine to the top of the source file as it will be also used from the table and chain routines. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 6 ++-- net/netfilter/nf_tables_api.c | 58 +++++++++++++++++++-------------------- 2 files changed, 32 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 29ff1dc41ef3..91505231a105 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -83,9 +83,9 @@ struct nft_ctx { struct net *net; const struct sk_buff *skb; const struct nlmsghdr *nlh; - const struct nft_af_info *afi; - const struct nft_table *table; - const struct nft_chain *chain; + struct nft_af_info *afi; + struct nft_table *table; + struct nft_chain *chain; const struct nlattr * const *nla; }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a5ca900912e1..3643bbc720bc 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -88,6 +88,23 @@ nf_tables_afinfo_lookup(struct net *net, int family, bool autoload) return ERR_PTR(-EAFNOSUPPORT); } +static void nft_ctx_init(struct nft_ctx *ctx, + const struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nft_af_info *afi, + struct nft_table *table, + struct nft_chain *chain, + const struct nlattr * const *nla) +{ + ctx->net = sock_net(skb->sk); + ctx->skb = skb; + ctx->nlh = nlh; + ctx->afi = afi; + ctx->table = table; + ctx->chain = chain; + ctx->nla = nla; +} + /* * Tables */ @@ -812,7 +829,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nlattr * uninitialized_var(name); - const struct nft_af_info *afi; + struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; struct nft_base_chain *basechain = NULL; @@ -1024,7 +1041,7 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - const struct nft_af_info *afi; + struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; struct net *net = sock_net(skb->sk); @@ -1062,23 +1079,6 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, return 0; } -static void nft_ctx_init(struct nft_ctx *ctx, - const struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nft_af_info *afi, - const struct nft_table *table, - const struct nft_chain *chain, - const struct nlattr * const *nla) -{ - ctx->net = sock_net(skb->sk); - ctx->skb = skb; - ctx->nlh = nlh; - ctx->afi = afi; - ctx->table = table; - ctx->chain = chain; - ctx->nla = nla; -} - /* * Expressions */ @@ -1582,7 +1582,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - const struct nft_af_info *afi; + struct nft_af_info *afi; struct net *net = sock_net(skb->sk); struct nft_table *table; struct nft_chain *chain; @@ -1763,9 +1763,9 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - const struct nft_af_info *afi; + struct nft_af_info *afi; struct net *net = sock_net(skb->sk); - const struct nft_table *table; + struct nft_table *table; struct nft_chain *chain = NULL; struct nft_rule *rule; int family = nfmsg->nfgen_family, err = 0; @@ -2009,8 +2009,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, { struct net *net = sock_net(skb->sk); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - const struct nft_af_info *afi = NULL; - const struct nft_table *table = NULL; + struct nft_af_info *afi = NULL; + struct nft_table *table = NULL; if (nfmsg->nfgen_family != NFPROTO_UNSPEC) { afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); @@ -2244,7 +2244,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, { const struct nft_set *set; unsigned int idx, s_idx = cb->args[0]; - const struct nft_af_info *afi; + struct nft_af_info *afi; struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; struct net *net = sock_net(skb->sk); int cur_family = cb->args[3]; @@ -2389,7 +2389,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_set_ops *ops; - const struct nft_af_info *afi; + struct nft_af_info *afi; struct net *net = sock_net(skb->sk); struct nft_table *table; struct nft_set *set; @@ -2651,8 +2651,8 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - const struct nft_af_info *afi; - const struct nft_table *table; + struct nft_af_info *afi; + struct nft_table *table; struct net *net = sock_net(skb->sk); afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); @@ -2959,7 +2959,7 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set, struct nft_ctx bind_ctx = { .afi = ctx->afi, .table = ctx->table, - .chain = binding->chain, + .chain = (struct nft_chain *)binding->chain, }; err = nft_validate_data_load(&bind_ctx, dreg, -- cgit v1.2.3-71-gd317 From 1081d11b086afb73e1d8f52f9047d661d8770b82 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 4 Apr 2014 01:24:07 +0200 Subject: netfilter: nf_tables: generalise transaction infrastructure This patch generalises the existing rule transaction infrastructure so it can be used to handle set, table and chain object transactions as well. The transaction provides a data area that stores private information depending on the transaction type. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 15 +++-- net/netfilter/nf_tables_api.c | 123 +++++++++++++++++++++----------------- 2 files changed, 80 insertions(+), 58 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 91505231a105..246dbd48825f 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -387,18 +387,25 @@ struct nft_rule { }; /** - * struct nft_rule_trans - nf_tables rule update in transaction + * struct nft_trans - nf_tables object update in transaction * * @list: used internally - * @ctx: rule context - * @rule: rule that needs to be updated + * @ctx: transaction context + * @data: internal information related to the transaction */ -struct nft_rule_trans { +struct nft_trans { struct list_head list; struct nft_ctx ctx; + char data[0]; +}; + +struct nft_trans_rule { struct nft_rule *rule; }; +#define nft_trans_rule(trans) \ + (((struct nft_trans_rule *)trans->data)->rule) + static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) { return (struct nft_expr *)&rule->data[0]; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3643bbc720bc..424a6f3cb6c5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -105,6 +105,25 @@ static void nft_ctx_init(struct nft_ctx *ctx, ctx->nla = nla; } +static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, u32 size) +{ + struct nft_trans *trans; + + trans = kzalloc(sizeof(struct nft_trans) + size, GFP_KERNEL); + if (trans == NULL) + return NULL; + + trans->ctx = *ctx; + + return trans; +} + +static void nft_trans_destroy(struct nft_trans *trans) +{ + list_del(&trans->list); + kfree(trans); +} + /* * Tables */ @@ -1557,26 +1576,25 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, kfree(rule); } -#define NFT_RULE_MAXEXPRS 128 - -static struct nft_expr_info *info; - -static struct nft_rule_trans * -nf_tables_trans_add(struct nft_ctx *ctx, struct nft_rule *rule) +static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, + struct nft_rule *rule) { - struct nft_rule_trans *rupd; + struct nft_trans *trans; - rupd = kmalloc(sizeof(struct nft_rule_trans), GFP_KERNEL); - if (rupd == NULL) - return NULL; + trans = nft_trans_alloc(ctx, sizeof(struct nft_trans_rule)); + if (trans == NULL) + return NULL; - rupd->ctx = *ctx; - rupd->rule = rule; - list_add_tail(&rupd->list, &ctx->net->nft.commit_list); + nft_trans_rule(trans) = rule; + list_add_tail(&trans->list, &ctx->net->nft.commit_list); - return rupd; + return trans; } +#define NFT_RULE_MAXEXPRS 128 + +static struct nft_expr_info *info; + static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -1587,7 +1605,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *old_rule = NULL; - struct nft_rule_trans *repl = NULL; + struct nft_trans *trans = NULL; struct nft_expr *expr; struct nft_ctx ctx; struct nlattr *tmp; @@ -1685,8 +1703,8 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_REPLACE) { if (nft_rule_is_active_next(net, old_rule)) { - repl = nf_tables_trans_add(&ctx, old_rule); - if (repl == NULL) { + trans = nft_trans_rule_add(&ctx, old_rule); + if (trans == NULL) { err = -ENOMEM; goto err2; } @@ -1708,7 +1726,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, list_add_rcu(&rule->list, &chain->rules); } - if (nf_tables_trans_add(&ctx, rule) == NULL) { + if (nft_trans_rule_add(&ctx, rule) == NULL) { err = -ENOMEM; goto err3; } @@ -1716,11 +1734,10 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, err3: list_del_rcu(&rule->list); - if (repl) { - list_del_rcu(&repl->rule->list); - list_del(&repl->list); - nft_rule_clear(net, repl->rule); - kfree(repl); + if (trans) { + list_del_rcu(&nft_trans_rule(trans)->list); + nft_rule_clear(net, nft_trans_rule(trans)); + nft_trans_destroy(trans); } err2: nf_tables_rule_destroy(&ctx, rule); @@ -1737,7 +1754,7 @@ nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule) { /* You cannot delete the same rule twice */ if (nft_rule_is_active_next(ctx->net, rule)) { - if (nf_tables_trans_add(ctx, rule) == NULL) + if (nft_trans_rule_add(ctx, rule) == NULL) return -ENOMEM; nft_rule_disactivate_next(ctx->net, rule); return 0; @@ -1813,7 +1830,7 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, static int nf_tables_commit(struct sk_buff *skb) { struct net *net = sock_net(skb->sk); - struct nft_rule_trans *rupd, *tmp; + struct nft_trans *trans, *next; /* Bump generation counter, invalidate any dump in progress */ net->nft.genctr++; @@ -1826,38 +1843,38 @@ static int nf_tables_commit(struct sk_buff *skb) */ synchronize_rcu(); - list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { + list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { /* This rule was inactive in the past and just became active. * Clear the next bit of the genmask since its meaning has * changed, now it is the future. */ - if (nft_rule_is_active(net, rupd->rule)) { - nft_rule_clear(net, rupd->rule); - nf_tables_rule_notify(skb, rupd->ctx.nlh, - rupd->ctx.table, rupd->ctx.chain, - rupd->rule, NFT_MSG_NEWRULE, 0, - rupd->ctx.afi->family); - list_del(&rupd->list); - kfree(rupd); + if (nft_rule_is_active(net, nft_trans_rule(trans))) { + nft_rule_clear(net, nft_trans_rule(trans)); + nf_tables_rule_notify(skb, trans->ctx.nlh, + trans->ctx.table, + trans->ctx.chain, + nft_trans_rule(trans), + NFT_MSG_NEWRULE, 0, + trans->ctx.afi->family); + nft_trans_destroy(trans); continue; } /* This rule is in the past, get rid of it */ - list_del_rcu(&rupd->rule->list); - nf_tables_rule_notify(skb, rupd->ctx.nlh, - rupd->ctx.table, rupd->ctx.chain, - rupd->rule, NFT_MSG_DELRULE, 0, - rupd->ctx.afi->family); + list_del_rcu(&nft_trans_rule(trans)->list); + nf_tables_rule_notify(skb, trans->ctx.nlh, + trans->ctx.table, trans->ctx.chain, + nft_trans_rule(trans), NFT_MSG_DELRULE, + 0, trans->ctx.afi->family); } /* Make sure we don't see any packet traversing old rules */ synchronize_rcu(); /* Now we can safely release unused old rules */ - list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { - nf_tables_rule_destroy(&rupd->ctx, rupd->rule); - list_del(&rupd->list); - kfree(rupd); + list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); + nft_trans_destroy(trans); } return 0; @@ -1866,27 +1883,25 @@ static int nf_tables_commit(struct sk_buff *skb) static int nf_tables_abort(struct sk_buff *skb) { struct net *net = sock_net(skb->sk); - struct nft_rule_trans *rupd, *tmp; + struct nft_trans *trans, *next; - list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { - if (!nft_rule_is_active_next(net, rupd->rule)) { - nft_rule_clear(net, rupd->rule); - list_del(&rupd->list); - kfree(rupd); + list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + if (!nft_rule_is_active_next(net, nft_trans_rule(trans))) { + nft_rule_clear(net, nft_trans_rule(trans)); + nft_trans_destroy(trans); continue; } /* This rule is inactive, get rid of it */ - list_del_rcu(&rupd->rule->list); + list_del_rcu(&nft_trans_rule(trans)->list); } /* Make sure we don't see any packet accessing aborted rules */ synchronize_rcu(); - list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { - nf_tables_rule_destroy(&rupd->ctx, rupd->rule); - list_del(&rupd->list); - kfree(rupd); + list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); + nft_trans_destroy(trans); } return 0; -- cgit v1.2.3-71-gd317 From b380e5c733b9f18a6a3ebb97963b6dd037339bc0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 4 Apr 2014 01:38:51 +0200 Subject: netfilter: nf_tables: add message type to transactions The patch adds message type to the transaction to simplify the commit the and abort routines. Yet another step forward in the generalisation of the transaction infrastructure. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 74 +++++++++++++++++++++++---------------- 2 files changed, 45 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 246dbd48825f..d8dfb2695e0f 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -390,11 +390,13 @@ struct nft_rule { * struct nft_trans - nf_tables object update in transaction * * @list: used internally + * @msg_type: message type * @ctx: transaction context * @data: internal information related to the transaction */ struct nft_trans { struct list_head list; + int msg_type; struct nft_ctx ctx; char data[0]; }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 17df4b807f84..34aa3d507542 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -105,7 +105,8 @@ static void nft_ctx_init(struct nft_ctx *ctx, ctx->nla = nla; } -static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, u32 size) +static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, int msg_type, + u32 size) { struct nft_trans *trans; @@ -113,6 +114,7 @@ static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, u32 size) if (trans == NULL) return NULL; + trans->msg_type = msg_type; trans->ctx = *ctx; return trans; @@ -1576,12 +1578,12 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, kfree(rule); } -static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, +static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type, struct nft_rule *rule) { struct nft_trans *trans; - trans = nft_trans_alloc(ctx, sizeof(struct nft_trans_rule)); + trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_rule)); if (trans == NULL) return NULL; @@ -1703,7 +1705,8 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_REPLACE) { if (nft_rule_is_active_next(net, old_rule)) { - trans = nft_trans_rule_add(&ctx, old_rule); + trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, + old_rule); if (trans == NULL) { err = -ENOMEM; goto err2; @@ -1726,7 +1729,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, list_add_rcu(&rule->list, &chain->rules); } - if (nft_trans_rule_add(&ctx, rule) == NULL) { + if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) { err = -ENOMEM; goto err3; } @@ -1754,7 +1757,7 @@ nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule) { /* You cannot delete the same rule twice */ if (nft_rule_is_active_next(ctx->net, rule)) { - if (nft_trans_rule_add(ctx, rule) == NULL) + if (nft_trans_rule_add(ctx, NFT_MSG_DELRULE, rule) == NULL) return -ENOMEM; nft_rule_disactivate_next(ctx->net, rule); return 0; @@ -3114,28 +3117,26 @@ static int nf_tables_commit(struct sk_buff *skb) synchronize_rcu(); list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { - /* This rule was inactive in the past and just became active. - * Clear the next bit of the genmask since its meaning has - * changed, now it is the future. - */ - if (nft_rule_is_active(net, nft_trans_rule(trans))) { - nft_rule_clear(net, nft_trans_rule(trans)); - nf_tables_rule_notify(skb, trans->ctx.nlh, + switch (trans->msg_type) { + case NFT_MSG_NEWRULE: + nft_rule_clear(trans->ctx.net, nft_trans_rule(trans)); + nf_tables_rule_notify(trans->ctx.skb, trans->ctx.nlh, trans->ctx.table, trans->ctx.chain, nft_trans_rule(trans), NFT_MSG_NEWRULE, 0, trans->ctx.afi->family); nft_trans_destroy(trans); - continue; + break; + case NFT_MSG_DELRULE: + list_del_rcu(&nft_trans_rule(trans)->list); + nf_tables_rule_notify(trans->ctx.skb, trans->ctx.nlh, + trans->ctx.table, + trans->ctx.chain, + nft_trans_rule(trans), NFT_MSG_DELRULE, 0, + trans->ctx.afi->family); + break; } - - /* This rule is in the past, get rid of it */ - list_del_rcu(&nft_trans_rule(trans)->list); - nf_tables_rule_notify(skb, trans->ctx.nlh, - trans->ctx.table, trans->ctx.chain, - nft_trans_rule(trans), NFT_MSG_DELRULE, - 0, trans->ctx.afi->family); } /* Make sure we don't see any packet traversing old rules */ @@ -3143,8 +3144,13 @@ static int nf_tables_commit(struct sk_buff *skb) /* Now we can safely release unused old rules */ list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { - nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); - nft_trans_destroy(trans); + switch (trans->msg_type) { + case NFT_MSG_DELRULE: + nf_tables_rule_destroy(&trans->ctx, + nft_trans_rule(trans)); + nft_trans_destroy(trans); + break; + } } return 0; @@ -3156,22 +3162,28 @@ static int nf_tables_abort(struct sk_buff *skb) struct nft_trans *trans, *next; list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { - if (!nft_rule_is_active_next(net, nft_trans_rule(trans))) { - nft_rule_clear(net, nft_trans_rule(trans)); + switch (trans->msg_type) { + case NFT_MSG_NEWRULE: + list_del_rcu(&nft_trans_rule(trans)->list); + break; + case NFT_MSG_DELRULE: + nft_rule_clear(trans->ctx.net, nft_trans_rule(trans)); nft_trans_destroy(trans); - continue; + break; } - - /* This rule is inactive, get rid of it */ - list_del_rcu(&nft_trans_rule(trans)->list); } /* Make sure we don't see any packet accessing aborted rules */ synchronize_rcu(); list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { - nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); - nft_trans_destroy(trans); + switch (trans->msg_type) { + case NFT_MSG_NEWRULE: + nf_tables_rule_destroy(&trans->ctx, + nft_trans_rule(trans)); + nft_trans_destroy(trans); + break; + } } return 0; -- cgit v1.2.3-71-gd317 From 958bee14d0718ca7a5002c0f48a099d1d345812a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 3 Apr 2014 11:48:44 +0200 Subject: netfilter: nf_tables: use new transaction infrastructure to handle sets This patch reworks the nf_tables API so set updates are included in the same batch that contains rule updates. This speeds up rule-set updates since we skip a dialog of four messages between kernel and user-space (two on each direction), from: 1) create the set and send netlink message to the kernel 2) process the response from the kernel that contains the allocated name. 3) add the set elements and send netlink message to the kernel. 4) process the response from the kernel (to check for errors). To: 1) add the set to the batch. 2) add the set elements to the batch. 3) add the rule that points to the set. 4) send batch to the kernel. This also introduces an internal set ID (NFTA_SET_ID) that is unique in the batch so set elements and rules can refer to new sets. Backward compatibility has been only retained in userspace, this means that new nft versions can talk to the kernel both in the new and the old fashion. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 12 +++ include/uapi/linux/netfilter/nf_tables.h | 6 ++ net/netfilter/nf_tables_api.c | 123 +++++++++++++++++++++++++++---- net/netfilter/nft_lookup.c | 10 ++- 4 files changed, 133 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index d8dfb2695e0f..0f472d668cbe 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -268,6 +268,8 @@ static inline void *nft_set_priv(const struct nft_set *set) struct nft_set *nf_tables_set_lookup(const struct nft_table *table, const struct nlattr *nla); +struct nft_set *nf_tables_set_lookup_byid(const struct net *net, + const struct nlattr *nla); /** * struct nft_set_binding - nf_tables set binding @@ -408,6 +410,16 @@ struct nft_trans_rule { #define nft_trans_rule(trans) \ (((struct nft_trans_rule *)trans->data)->rule) +struct nft_trans_set { + struct nft_set *set; + u32 set_id; +}; + +#define nft_trans_set(trans) \ + (((struct nft_trans_set *)trans->data)->set) +#define nft_trans_set_id(trans) \ + (((struct nft_trans_set *)trans->data)->set_id) + static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) { return (struct nft_expr *)&rule->data[0]; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 7d6433f66bf8..2a88f645a5d8 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -246,6 +246,7 @@ enum nft_set_desc_attributes { * @NFTA_SET_DATA_LEN: mapping data length (NLA_U32) * @NFTA_SET_POLICY: selection policy (NLA_U32) * @NFTA_SET_DESC: set description (NLA_NESTED) + * @NFTA_SET_ID: uniquely identifies a set in a transaction (NLA_U32) */ enum nft_set_attributes { NFTA_SET_UNSPEC, @@ -258,6 +259,7 @@ enum nft_set_attributes { NFTA_SET_DATA_LEN, NFTA_SET_POLICY, NFTA_SET_DESC, + NFTA_SET_ID, __NFTA_SET_MAX }; #define NFTA_SET_MAX (__NFTA_SET_MAX - 1) @@ -293,12 +295,14 @@ enum nft_set_elem_attributes { * @NFTA_SET_ELEM_LIST_TABLE: table of the set to be changed (NLA_STRING) * @NFTA_SET_ELEM_LIST_SET: name of the set to be changed (NLA_STRING) * @NFTA_SET_ELEM_LIST_ELEMENTS: list of set elements (NLA_NESTED: nft_set_elem_attributes) + * @NFTA_SET_ELEM_LIST_SET_ID: uniquely identifies a set in a transaction (NLA_U32) */ enum nft_set_elem_list_attributes { NFTA_SET_ELEM_LIST_UNSPEC, NFTA_SET_ELEM_LIST_TABLE, NFTA_SET_ELEM_LIST_SET, NFTA_SET_ELEM_LIST_ELEMENTS, + NFTA_SET_ELEM_LIST_SET_ID, __NFTA_SET_ELEM_LIST_MAX }; #define NFTA_SET_ELEM_LIST_MAX (__NFTA_SET_ELEM_LIST_MAX - 1) @@ -484,12 +488,14 @@ enum nft_cmp_attributes { * @NFTA_LOOKUP_SET: name of the set where to look for (NLA_STRING) * @NFTA_LOOKUP_SREG: source register of the data to look for (NLA_U32: nft_registers) * @NFTA_LOOKUP_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_LOOKUP_SET_ID: uniquely identifies a set in a transaction (NLA_U32) */ enum nft_lookup_attributes { NFTA_LOOKUP_UNSPEC, NFTA_LOOKUP_SET, NFTA_LOOKUP_SREG, NFTA_LOOKUP_DREG, + NFTA_LOOKUP_SET_ID, __NFTA_LOOKUP_MAX }; #define NFTA_LOOKUP_MAX (__NFTA_LOOKUP_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 34aa3d507542..02d3cc65690c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1934,6 +1934,7 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { [NFTA_SET_DATA_LEN] = { .type = NLA_U32 }, [NFTA_SET_POLICY] = { .type = NLA_U32 }, [NFTA_SET_DESC] = { .type = NLA_NESTED }, + [NFTA_SET_ID] = { .type = NLA_U32 }, }; static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { @@ -1984,6 +1985,20 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table, return ERR_PTR(-ENOENT); } +struct nft_set *nf_tables_set_lookup_byid(const struct net *net, + const struct nlattr *nla) +{ + struct nft_trans *trans; + u32 id = ntohl(nla_get_be32(nla)); + + list_for_each_entry(trans, &net->nft.commit_list, list) { + if (trans->msg_type == NFT_MSG_NEWSET && + id == nft_trans_set_id(trans)) + return nft_trans_set(trans); + } + return ERR_PTR(-ENOENT); +} + static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, const char *name) { @@ -2259,6 +2274,8 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) return ret; } +#define NFT_SET_INACTIVE (1 << 15) /* Internal set flag */ + static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -2288,6 +2305,8 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) return PTR_ERR(set); + if (set->flags & NFT_SET_INACTIVE) + return -ENOENT; skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (skb2 == NULL) @@ -2321,6 +2340,26 @@ static int nf_tables_set_desc_parse(const struct nft_ctx *ctx, return 0; } +static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type, + struct nft_set *set) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set)); + if (trans == NULL) + return -ENOMEM; + + if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) { + nft_trans_set_id(trans) = + ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID])); + set->flags |= NFT_SET_INACTIVE; + } + nft_trans_set(trans) = set; + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + + return 0; +} + static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -2341,7 +2380,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, if (nla[NFTA_SET_TABLE] == NULL || nla[NFTA_SET_NAME] == NULL || - nla[NFTA_SET_KEY_LEN] == NULL) + nla[NFTA_SET_KEY_LEN] == NULL || + nla[NFTA_SET_ID] == NULL) return -EINVAL; memset(&desc, 0, sizeof(desc)); @@ -2458,8 +2498,11 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, if (err < 0) goto err2; + err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); + if (err < 0) + goto err2; + list_add_tail(&set->list, &table->sets); - nf_tables_set_notify(&ctx, set, NFT_MSG_NEWSET); return 0; err2: @@ -2469,16 +2512,20 @@ err1: return err; } -static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) +static void nft_set_destroy(struct nft_set *set) { - list_del(&set->list); - nf_tables_set_notify(ctx, set, NFT_MSG_DELSET); - set->ops->destroy(set); module_put(set->ops->owner); kfree(set); } +static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) +{ + list_del(&set->list); + nf_tables_set_notify(ctx, set, NFT_MSG_DELSET); + nft_set_destroy(set); +} + static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -2500,10 +2547,16 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) return PTR_ERR(set); + if (set->flags & NFT_SET_INACTIVE) + return -ENOENT; if (!list_empty(&set->bindings)) return -EBUSY; - nf_tables_set_destroy(&ctx, set); + err = nft_trans_set_add(&ctx, NFT_MSG_DELSET, set); + if (err < 0) + return err; + + list_del(&set->list); return 0; } @@ -2563,7 +2616,8 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, { list_del(&binding->list); - if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS) + if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS && + !(set->flags & NFT_SET_INACTIVE)) nf_tables_set_destroy(ctx, set); } @@ -2581,6 +2635,7 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + [NFTA_SET_ELEM_LIST_TABLE] = { .type = NLA_STRING }, [NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING }, [NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED }, + [NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 }, }; static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, @@ -2680,6 +2735,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); if (IS_ERR(set)) return PTR_ERR(set); + if (set->flags & NFT_SET_INACTIVE) + return -ENOENT; event = NFT_MSG_NEWSETELEM; event |= NFNL_SUBSYS_NFTABLES << 8; @@ -2743,6 +2800,8 @@ static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb, set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); if (IS_ERR(set)) return PTR_ERR(set); + if (set->flags & NFT_SET_INACTIVE) + return -ENOENT; if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -2928,6 +2987,7 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { + struct net *net = sock_net(skb->sk); const struct nlattr *attr; struct nft_set *set; struct nft_ctx ctx; @@ -2938,8 +2998,15 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb, return err; set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); - if (IS_ERR(set)) - return PTR_ERR(set); + if (IS_ERR(set)) { + if (nla[NFTA_SET_ELEM_LIST_SET_ID]) { + set = nf_tables_set_lookup_byid(net, + nla[NFTA_SET_ELEM_LIST_SET_ID]); + } + if (IS_ERR(set)) + return PTR_ERR(set); + } + if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) return -EBUSY; @@ -3069,7 +3136,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_rule_policy, }, [NFT_MSG_NEWSET] = { - .call = nf_tables_newset, + .call_batch = nf_tables_newset, .attr_count = NFTA_SET_MAX, .policy = nft_set_policy, }, @@ -3079,12 +3146,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_set_policy, }, [NFT_MSG_DELSET] = { - .call = nf_tables_delset, + .call_batch = nf_tables_delset, .attr_count = NFTA_SET_MAX, .policy = nft_set_policy, }, [NFT_MSG_NEWSETELEM] = { - .call = nf_tables_newsetelem, + .call_batch = nf_tables_newsetelem, .attr_count = NFTA_SET_ELEM_LIST_MAX, .policy = nft_set_elem_list_policy, }, @@ -3094,7 +3161,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_set_elem_list_policy, }, [NFT_MSG_DELSETELEM] = { - .call = nf_tables_delsetelem, + .call_batch = nf_tables_delsetelem, .attr_count = NFTA_SET_ELEM_LIST_MAX, .policy = nft_set_elem_list_policy, }, @@ -3136,6 +3203,16 @@ static int nf_tables_commit(struct sk_buff *skb) nft_trans_rule(trans), NFT_MSG_DELRULE, 0, trans->ctx.afi->family); break; + case NFT_MSG_NEWSET: + nft_trans_set(trans)->flags &= ~NFT_SET_INACTIVE; + nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), + NFT_MSG_NEWSET); + nft_trans_destroy(trans); + break; + case NFT_MSG_DELSET: + nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), + NFT_MSG_DELSET); + break; } } @@ -3148,9 +3225,12 @@ static int nf_tables_commit(struct sk_buff *skb) case NFT_MSG_DELRULE: nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); - nft_trans_destroy(trans); + break; + case NFT_MSG_DELSET: + nft_set_destroy(nft_trans_set(trans)); break; } + nft_trans_destroy(trans); } return 0; @@ -3170,6 +3250,14 @@ static int nf_tables_abort(struct sk_buff *skb) nft_rule_clear(trans->ctx.net, nft_trans_rule(trans)); nft_trans_destroy(trans); break; + case NFT_MSG_NEWSET: + list_del(&nft_trans_set(trans)->list); + break; + case NFT_MSG_DELSET: + list_add_tail(&nft_trans_set(trans)->list, + &trans->ctx.table->sets); + nft_trans_destroy(trans); + break; } } @@ -3181,9 +3269,12 @@ static int nf_tables_abort(struct sk_buff *skb) case NFT_MSG_NEWRULE: nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); - nft_trans_destroy(trans); + break; + case NFT_MSG_NEWSET: + nft_set_destroy(nft_trans_set(trans)); break; } + nft_trans_destroy(trans); } return 0; diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 7fd2bea8aa23..6404a726d17b 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -56,8 +56,14 @@ static int nft_lookup_init(const struct nft_ctx *ctx, return -EINVAL; set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]); - if (IS_ERR(set)) - return PTR_ERR(set); + if (IS_ERR(set)) { + if (tb[NFTA_LOOKUP_SET_ID]) { + set = nf_tables_set_lookup_byid(ctx->net, + tb[NFTA_LOOKUP_SET_ID]); + } + if (IS_ERR(set)) + return PTR_ERR(set); + } priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG])); err = nft_validate_input_register(priv->sreg); -- cgit v1.2.3-71-gd317 From 91c7b38dc9f0de4f7f444b796d14476bc12df7bc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 9 Apr 2014 11:58:08 +0200 Subject: netfilter: nf_tables: use new transaction infrastructure to handle chain This patch speeds up rule-set updates and it also introduces a way to revert chain updates if the batch is aborted. The idea is to store the changes in the transaction to apply that in the commit step. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 17 ++++ net/netfilter/nf_tables_api.c | 203 +++++++++++++++++++++++++++++--------- 2 files changed, 175 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 0f472d668cbe..7b2361c559b5 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -420,6 +420,22 @@ struct nft_trans_set { #define nft_trans_set_id(trans) \ (((struct nft_trans_set *)trans->data)->set_id) +struct nft_trans_chain { + bool update; + char name[NFT_CHAIN_MAXNAMELEN]; + struct nft_stats __percpu *stats; + u8 policy; +}; + +#define nft_trans_chain_update(trans) \ + (((struct nft_trans_chain *)trans->data)->update) +#define nft_trans_chain_name(trans) \ + (((struct nft_trans_chain *)trans->data)->name) +#define nft_trans_chain_stats(trans) \ + (((struct nft_trans_chain *)trans->data)->stats) +#define nft_trans_chain_policy(trans) \ + (((struct nft_trans_chain *)trans->data)->policy) + static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) { return (struct nft_expr *)&rule->data[0]; @@ -452,6 +468,7 @@ static inline void *nft_userdata(const struct nft_rule *rule) enum nft_chain_flags { NFT_BASE_CHAIN = 0x1, + NFT_CHAIN_INACTIVE = 0x2, }; /** diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 92cec68c03ec..7a1149fe2100 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -782,6 +782,8 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); if (IS_ERR(chain)) return PTR_ERR(chain); + if (chain->flags & NFT_CHAIN_INACTIVE) + return -ENOENT; skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb2) @@ -847,6 +849,34 @@ static void nft_chain_stats_replace(struct nft_base_chain *chain, rcu_assign_pointer(chain->stats, newstats); } +static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain)); + if (trans == NULL) + return -ENOMEM; + + if (msg_type == NFT_MSG_NEWCHAIN) + ctx->chain->flags |= NFT_CHAIN_INACTIVE; + + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + return 0; +} + +static void nf_tables_chain_destroy(struct nft_chain *chain) +{ + BUG_ON(chain->use > 0); + + if (chain->flags & NFT_BASE_CHAIN) { + module_put(nft_base_chain(chain)->type->owner); + free_percpu(nft_base_chain(chain)->stats); + kfree(nft_base_chain(chain)); + } else { + kfree(chain); + } +} + static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -866,6 +896,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, struct nft_stats __percpu *stats; int err; bool create; + struct nft_ctx ctx; create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; @@ -911,6 +942,11 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, } if (chain != NULL) { + struct nft_stats *stats = NULL; + struct nft_trans *trans; + + if (chain->flags & NFT_CHAIN_INACTIVE) + return -ENOENT; if (nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; if (nlh->nlmsg_flags & NLM_F_REPLACE) @@ -927,17 +963,28 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]); if (IS_ERR(stats)) return PTR_ERR(stats); - - nft_chain_stats_replace(nft_base_chain(chain), stats); } - if (nla[NFTA_CHAIN_POLICY]) - nft_base_chain(chain)->policy = policy; + nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); + trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN, + sizeof(struct nft_trans_chain)); + if (trans == NULL) + return -ENOMEM; - if (nla[NFTA_CHAIN_HANDLE] && name) - nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); + nft_trans_chain_stats(trans) = stats; + nft_trans_chain_update(trans) = true; - goto notify; + if (nla[NFTA_CHAIN_POLICY]) + nft_trans_chain_policy(trans) = policy; + else + nft_trans_chain_policy(trans) = -1; + + if (nla[NFTA_CHAIN_HANDLE] && name) { + nla_strlcpy(nft_trans_chain_name(trans), name, + NFT_CHAIN_MAXNAMELEN); + } + list_add_tail(&trans->list, &net->nft.commit_list); + return 0; } if (table->use == UINT_MAX) @@ -1033,31 +1080,26 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (!(table->flags & NFT_TABLE_F_DORMANT) && chain->flags & NFT_BASE_CHAIN) { err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops); - if (err < 0) { - module_put(basechain->type->owner); - free_percpu(basechain->stats); - kfree(basechain); - return err; - } + if (err < 0) + goto err1; } - list_add_tail(&chain->list, &table->chains); - table->use++; -notify: - nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN, - family); - return 0; -} -static void nf_tables_chain_destroy(struct nft_chain *chain) -{ - BUG_ON(chain->use > 0); + nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); + err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN); + if (err < 0) + goto err2; - if (chain->flags & NFT_BASE_CHAIN) { - module_put(nft_base_chain(chain)->type->owner); - free_percpu(nft_base_chain(chain)->stats); - kfree(nft_base_chain(chain)); - } else - kfree(chain); + list_add_tail(&chain->list, &table->chains); + return 0; +err2: + if (!(table->flags & NFT_TABLE_F_DORMANT) && + chain->flags & NFT_BASE_CHAIN) { + nf_unregister_hooks(nft_base_chain(chain)->ops, + afi->nops); + } +err1: + nf_tables_chain_destroy(chain); + return err; } static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, @@ -1070,6 +1112,8 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, struct nft_chain *chain; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + struct nft_ctx ctx; + int err; afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) @@ -1082,24 +1126,17 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); if (IS_ERR(chain)) return PTR_ERR(chain); - + if (chain->flags & NFT_CHAIN_INACTIVE) + return -ENOENT; if (!list_empty(&chain->rules) || chain->use > 0) return -EBUSY; - list_del(&chain->list); - table->use--; - - if (!(table->flags & NFT_TABLE_F_DORMANT) && - chain->flags & NFT_BASE_CHAIN) - nf_unregister_hooks(nft_base_chain(chain)->ops, afi->nops); - - nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN, - family); - - /* Make sure all rule references are gone before this is released */ - synchronize_rcu(); + nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); + err = nft_trans_chain_add(&ctx, NFT_MSG_DELCHAIN); + if (err < 0) + return err; - nf_tables_chain_destroy(chain); + list_del(&chain->list); return 0; } @@ -1542,6 +1579,8 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); if (IS_ERR(chain)) return PTR_ERR(chain); + if (chain->flags & NFT_CHAIN_INACTIVE) + return -ENOENT; rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); if (IS_ERR(rule)) @@ -3109,7 +3148,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_table_policy, }, [NFT_MSG_NEWCHAIN] = { - .call = nf_tables_newchain, + .call_batch = nf_tables_newchain, .attr_count = NFTA_CHAIN_MAX, .policy = nft_chain_policy, }, @@ -3119,7 +3158,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_chain_policy, }, [NFT_MSG_DELCHAIN] = { - .call = nf_tables_delchain, + .call_batch = nf_tables_delchain, .attr_count = NFTA_CHAIN_MAX, .policy = nft_chain_policy, }, @@ -3170,6 +3209,27 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { }, }; +static void nft_chain_commit_update(struct nft_trans *trans) +{ + struct nft_base_chain *basechain; + + if (nft_trans_chain_name(trans)[0]) + strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans)); + + if (!(trans->ctx.chain->flags & NFT_BASE_CHAIN)) + return; + + basechain = nft_base_chain(trans->ctx.chain); + nft_chain_stats_replace(basechain, nft_trans_chain_stats(trans)); + + switch (nft_trans_chain_policy(trans)) { + case NF_DROP: + case NF_ACCEPT: + basechain->policy = nft_trans_chain_policy(trans); + break; + } +} + static int nf_tables_commit(struct sk_buff *skb) { struct net *net = sock_net(skb->sk); @@ -3188,6 +3248,33 @@ static int nf_tables_commit(struct sk_buff *skb) list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { switch (trans->msg_type) { + case NFT_MSG_NEWCHAIN: + if (nft_trans_chain_update(trans)) + nft_chain_commit_update(trans); + else { + trans->ctx.chain->flags &= ~NFT_CHAIN_INACTIVE; + trans->ctx.table->use++; + } + nf_tables_chain_notify(trans->ctx.skb, trans->ctx.nlh, + trans->ctx.table, + trans->ctx.chain, + NFT_MSG_NEWCHAIN, + trans->ctx.afi->family); + nft_trans_destroy(trans); + break; + case NFT_MSG_DELCHAIN: + trans->ctx.table->use--; + nf_tables_chain_notify(trans->ctx.skb, trans->ctx.nlh, + trans->ctx.table, + trans->ctx.chain, + NFT_MSG_DELCHAIN, + trans->ctx.afi->family); + if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) && + trans->ctx.chain->flags & NFT_BASE_CHAIN) { + nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops, + trans->ctx.afi->nops); + } + break; case NFT_MSG_NEWRULE: nft_rule_clear(trans->ctx.net, nft_trans_rule(trans)); nf_tables_rule_notify(trans->ctx.skb, trans->ctx.nlh, @@ -3225,6 +3312,9 @@ static int nf_tables_commit(struct sk_buff *skb) /* Now we can safely release unused old rules */ list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { switch (trans->msg_type) { + case NFT_MSG_DELCHAIN: + nf_tables_chain_destroy(trans->ctx.chain); + break; case NFT_MSG_DELRULE: nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); @@ -3246,6 +3336,26 @@ static int nf_tables_abort(struct sk_buff *skb) list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { switch (trans->msg_type) { + case NFT_MSG_NEWCHAIN: + if (nft_trans_chain_update(trans)) { + if (nft_trans_chain_stats(trans)) + free_percpu(nft_trans_chain_stats(trans)); + + nft_trans_destroy(trans); + } else { + list_del(&trans->ctx.chain->list); + if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) && + trans->ctx.chain->flags & NFT_BASE_CHAIN) { + nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops, + trans->ctx.afi->nops); + } + } + break; + case NFT_MSG_DELCHAIN: + list_add_tail(&trans->ctx.chain->list, + &trans->ctx.table->chains); + nft_trans_destroy(trans); + break; case NFT_MSG_NEWRULE: list_del_rcu(&nft_trans_rule(trans)->list); break; @@ -3269,6 +3379,9 @@ static int nf_tables_abort(struct sk_buff *skb) list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { switch (trans->msg_type) { + case NFT_MSG_NEWCHAIN: + nf_tables_chain_destroy(trans->ctx.chain); + break; case NFT_MSG_NEWRULE: nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); -- cgit v1.2.3-71-gd317 From 55dd6f93076bb82aa8911191125418dcfcbf2c9b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 3 Apr 2014 11:53:37 +0200 Subject: netfilter: nf_tables: use new transaction infrastructure to handle table This patch speeds up rule-set updates and it also provides a way to revert updates and leave things in consistent state in case that the batch needs to be aborted. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 10 +++ net/netfilter/nf_tables_api.c | 145 +++++++++++++++++++++++++++++++++----- 2 files changed, 136 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 7b2361c559b5..15bf745f198d 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -436,6 +436,16 @@ struct nft_trans_chain { #define nft_trans_chain_policy(trans) \ (((struct nft_trans_chain *)trans->data)->policy) +struct nft_trans_table { + bool update; + bool enable; +}; + +#define nft_trans_table_update(trans) \ + (((struct nft_trans_table *)trans->data)->update) +#define nft_trans_table_enable(trans) \ + (((struct nft_trans_table *)trans->data)->enable) + static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) { return (struct nft_expr *)&rule->data[0]; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d71125a0a341..3e685dbb2921 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -307,6 +307,9 @@ done: return skb->len; } +/* Internal table flags */ +#define NFT_TABLE_INACTIVE (1 << 15) + static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -333,6 +336,8 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]); if (IS_ERR(table)) return PTR_ERR(table); + if (table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb2) @@ -395,9 +400,9 @@ static void nf_tables_table_disable(const struct nft_af_info *afi, static int nf_tables_updtable(struct nft_ctx *ctx) { - const struct nfgenmsg *nfmsg = nlmsg_data(ctx->nlh); - int family = nfmsg->nfgen_family, ret = 0; + struct nft_trans *trans; u32 flags; + int ret = 0; if (!ctx->nla[NFTA_TABLE_FLAGS]) return 0; @@ -406,25 +411,48 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if (flags & ~NFT_TABLE_F_DORMANT) return -EINVAL; + trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, + sizeof(struct nft_trans_table)); + if (trans == NULL) + return -ENOMEM; + if ((flags & NFT_TABLE_F_DORMANT) && !(ctx->table->flags & NFT_TABLE_F_DORMANT)) { - nf_tables_table_disable(ctx->afi, ctx->table); - ctx->table->flags |= NFT_TABLE_F_DORMANT; + nft_trans_table_enable(trans) = false; } else if (!(flags & NFT_TABLE_F_DORMANT) && ctx->table->flags & NFT_TABLE_F_DORMANT) { ret = nf_tables_table_enable(ctx->afi, ctx->table); - if (ret >= 0) + if (ret >= 0) { ctx->table->flags &= ~NFT_TABLE_F_DORMANT; + nft_trans_table_enable(trans) = true; + } } if (ret < 0) goto err; - nf_tables_table_notify(ctx->skb, ctx->nlh, ctx->table, - NFT_MSG_NEWTABLE, family); + nft_trans_table_update(trans) = true; + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + return 0; err: + nft_trans_destroy(trans); return ret; } +static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_table)); + if (trans == NULL) + return -ENOMEM; + + if (msg_type == NFT_MSG_NEWTABLE) + ctx->table->flags |= NFT_TABLE_INACTIVE; + + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + return 0; +} + static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -437,6 +465,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, int family = nfmsg->nfgen_family; u32 flags = 0; struct nft_ctx ctx; + int err; afi = nf_tables_afinfo_lookup(net, family, true); if (IS_ERR(afi)) @@ -451,6 +480,8 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, } if (table != NULL) { + if (table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; if (nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; if (nlh->nlmsg_flags & NLM_F_REPLACE) @@ -480,8 +511,14 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, INIT_LIST_HEAD(&table->sets); table->flags = flags; + nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); + err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE); + if (err < 0) { + kfree(table); + module_put(afi->owner); + return err; + } list_add_tail(&table->list, &afi->tables); - nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family); return 0; } @@ -493,7 +530,8 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, struct nft_af_info *afi; struct nft_table *table; struct net *net = sock_net(skb->sk); - int family = nfmsg->nfgen_family; + int family = nfmsg->nfgen_family, err; + struct nft_ctx ctx; afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) @@ -502,17 +540,27 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]); if (IS_ERR(table)) return PTR_ERR(table); + if (table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; if (!list_empty(&table->chains) || !list_empty(&table->sets)) return -EBUSY; + nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); + err = nft_trans_table_add(&ctx, NFT_MSG_DELTABLE); + if (err < 0) + return err; + list_del(&table->list); - nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family); - kfree(table); - module_put(afi->owner); return 0; } +static void nf_tables_table_destroy(struct nft_ctx *ctx) +{ + kfree(ctx->table); + module_put(ctx->afi->owner); +} + int nft_register_chain_type(const struct nf_chain_type *ctype) { int err = 0; @@ -776,6 +824,8 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); + if (table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); if (IS_ERR(chain)) @@ -1120,6 +1170,8 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); + if (table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); if (IS_ERR(chain)) @@ -1573,6 +1625,8 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); + if (table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); if (IS_ERR(chain)) @@ -1838,6 +1892,8 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); + if (table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; if (nla[NFTA_RULE_CHAIN]) { chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); @@ -2004,6 +2060,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); + if (table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; } nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla); @@ -2681,7 +2739,8 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, const struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + bool trans) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nft_af_info *afi; @@ -2695,6 +2754,8 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); + if (!trans && (table->flags & NFT_TABLE_INACTIVE)) + return -ENOENT; nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla); return 0; @@ -2768,7 +2829,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) if (err < 0) return err; - err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla); + err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla, + false); if (err < 0) return err; @@ -2833,7 +2895,7 @@ static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb, struct nft_ctx ctx; int err; - err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla); + err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false); if (err < 0) return err; @@ -3033,7 +3095,7 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb, struct nft_ctx ctx; int rem, err; - err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla); + err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, true); if (err < 0) return err; @@ -3111,7 +3173,7 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb, struct nft_ctx ctx; int rem, err; - err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla); + err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false); if (err < 0) return err; @@ -3131,7 +3193,7 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb, static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { [NFT_MSG_NEWTABLE] = { - .call = nf_tables_newtable, + .call_batch = nf_tables_newtable, .attr_count = NFTA_TABLE_MAX, .policy = nft_table_policy, }, @@ -3141,7 +3203,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_table_policy, }, [NFT_MSG_DELTABLE] = { - .call = nf_tables_deltable, + .call_batch = nf_tables_deltable, .attr_count = NFTA_TABLE_MAX, .policy = nft_table_policy, }, @@ -3246,6 +3308,28 @@ static int nf_tables_commit(struct sk_buff *skb) list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { switch (trans->msg_type) { + case NFT_MSG_NEWTABLE: + if (nft_trans_table_update(trans)) { + if (!nft_trans_table_enable(trans)) { + nf_tables_table_disable(trans->ctx.afi, + trans->ctx.table); + trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; + } + } else { + trans->ctx.table->flags &= ~NFT_TABLE_INACTIVE; + } + nf_tables_table_notify(trans->ctx.skb, trans->ctx.nlh, + trans->ctx.table, + NFT_MSG_NEWTABLE, + trans->ctx.afi->family); + nft_trans_destroy(trans); + break; + case NFT_MSG_DELTABLE: + nf_tables_table_notify(trans->ctx.skb, trans->ctx.nlh, + trans->ctx.table, + NFT_MSG_DELTABLE, + trans->ctx.afi->family); + break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) nft_chain_commit_update(trans); @@ -3310,6 +3394,9 @@ static int nf_tables_commit(struct sk_buff *skb) /* Now we can safely release unused old rules */ list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { switch (trans->msg_type) { + case NFT_MSG_DELTABLE: + nf_tables_table_destroy(&trans->ctx); + break; case NFT_MSG_DELCHAIN: nf_tables_chain_destroy(trans->ctx.chain); break; @@ -3334,6 +3421,23 @@ static int nf_tables_abort(struct sk_buff *skb) list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { switch (trans->msg_type) { + case NFT_MSG_NEWTABLE: + if (nft_trans_table_update(trans)) { + if (nft_trans_table_enable(trans)) { + nf_tables_table_disable(trans->ctx.afi, + trans->ctx.table); + trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; + } + nft_trans_destroy(trans); + } else { + list_del(&trans->ctx.table->list); + } + break; + case NFT_MSG_DELTABLE: + list_add_tail(&trans->ctx.table->list, + &trans->ctx.afi->tables); + nft_trans_destroy(trans); + break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { if (nft_trans_chain_stats(trans)) @@ -3377,6 +3481,9 @@ static int nf_tables_abort(struct sk_buff *skb) list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { switch (trans->msg_type) { + case NFT_MSG_NEWTABLE: + nf_tables_table_destroy(&trans->ctx); + break; case NFT_MSG_NEWCHAIN: nf_tables_chain_destroy(trans->ctx.chain); break; -- cgit v1.2.3-71-gd317 From 60319eb1ca351aa36e29d58d2e60ba9a9836265a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 4 Apr 2014 03:36:42 +0200 Subject: netfilter: nf_tables: use new transaction infrastructure to handle elements Leave the set content in consistent state if we fail to load the batch. Use the new generic transaction infrastructure to achieve this. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 10 +++++ net/netfilter/nf_tables_api.c | 82 ++++++++++++++++++++++++++++++++------- 2 files changed, 78 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 15bf745f198d..b08f2a941007 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -446,6 +446,16 @@ struct nft_trans_table { #define nft_trans_table_enable(trans) \ (((struct nft_trans_table *)trans->data)->enable) +struct nft_trans_elem { + struct nft_set *set; + struct nft_set_elem elem; +}; + +#define nft_trans_elem_set(trans) \ + (((struct nft_trans_elem *)trans->data)->set) +#define nft_trans_elem(trans) \ + (((struct nft_trans_elem *)trans->data)->elem) + static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) { return (struct nft_expr *)&rule->data[0]; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3e685dbb2921..cd002935e6b2 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2993,7 +2993,21 @@ err: return err; } -static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set, +static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx, + int msg_type, + struct nft_set *set) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_elem)); + if (trans == NULL) + return NULL; + + nft_trans_elem_set(trans) = set; + return trans; +} + +static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; @@ -3001,6 +3015,7 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_elem elem; struct nft_set_binding *binding; enum nft_registers dreg; + struct nft_trans *trans; int err; if (set->size && set->nelems == set->size) @@ -3068,14 +3083,20 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set, } } + trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); + if (trans == NULL) + goto err3; + err = set->ops->insert(set, &elem); if (err < 0) - goto err3; - set->nelems++; + goto err4; - nf_tables_setelem_notify(ctx, set, &elem, NFT_MSG_NEWSETELEM, 0); + nft_trans_elem(trans) = elem; + list_add(&trans->list, &ctx->net->nft.commit_list); return 0; +err4: + kfree(trans); err3: if (nla[NFTA_SET_ELEM_DATA] != NULL) nft_data_uninit(&elem.data, d2.type); @@ -3093,7 +3114,7 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb, const struct nlattr *attr; struct nft_set *set; struct nft_ctx ctx; - int rem, err; + int rem, err = 0; err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, true); if (err < 0) @@ -3115,17 +3136,18 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb, nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_add_set_elem(&ctx, set, attr); if (err < 0) - return err; + break; } - return 0; + return err; } -static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set, +static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; struct nft_data_desc desc; struct nft_set_elem elem; + struct nft_trans *trans; int err; err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, @@ -3149,10 +3171,12 @@ static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set, if (err < 0) goto err2; - set->ops->remove(set, &elem); - set->nelems--; + trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set); + if (trans == NULL) + goto err2; - nf_tables_setelem_notify(ctx, set, &elem, NFT_MSG_DELSETELEM, 0); + nft_trans_elem(trans) = elem; + list_add(&trans->list, &ctx->net->nft.commit_list); nft_data_uninit(&elem.key, NFT_DATA_VALUE); if (set->flags & NFT_SET_MAP) @@ -3171,7 +3195,7 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb, const struct nlattr *attr; struct nft_set *set; struct nft_ctx ctx; - int rem, err; + int rem, err = 0; err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false); if (err < 0) @@ -3186,9 +3210,9 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb, nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_del_setelem(&ctx, set, attr); if (err < 0) - return err; + break; } - return 0; + return err; } static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { @@ -3294,6 +3318,7 @@ static int nf_tables_commit(struct sk_buff *skb) { struct net *net = sock_net(skb->sk); struct nft_trans *trans, *next; + struct nft_set *set; /* Bump generation counter, invalidate any dump in progress */ net->nft.genctr++; @@ -3385,6 +3410,25 @@ static int nf_tables_commit(struct sk_buff *skb) nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), NFT_MSG_DELSET); break; + case NFT_MSG_NEWSETELEM: + nft_trans_elem_set(trans)->nelems++; + nf_tables_setelem_notify(&trans->ctx, + nft_trans_elem_set(trans), + &nft_trans_elem(trans), + NFT_MSG_NEWSETELEM, 0); + nft_trans_destroy(trans); + break; + case NFT_MSG_DELSETELEM: + nft_trans_elem_set(trans)->nelems--; + nf_tables_setelem_notify(&trans->ctx, + nft_trans_elem_set(trans), + &nft_trans_elem(trans), + NFT_MSG_DELSETELEM, 0); + set = nft_trans_elem_set(trans); + set->ops->get(set, &nft_trans_elem(trans)); + set->ops->remove(set, &nft_trans_elem(trans)); + nft_trans_destroy(trans); + break; } } @@ -3418,6 +3462,7 @@ static int nf_tables_abort(struct sk_buff *skb) { struct net *net = sock_net(skb->sk); struct nft_trans *trans, *next; + struct nft_set *set; list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { switch (trans->msg_type) { @@ -3473,6 +3518,15 @@ static int nf_tables_abort(struct sk_buff *skb) &trans->ctx.table->sets); nft_trans_destroy(trans); break; + case NFT_MSG_NEWSETELEM: + set = nft_trans_elem_set(trans); + set->ops->get(set, &nft_trans_elem(trans)); + set->ops->remove(set, &nft_trans_elem(trans)); + nft_trans_destroy(trans); + break; + case NFT_MSG_DELSETELEM: + nft_trans_destroy(trans); + break; } } -- cgit v1.2.3-71-gd317 From 128ad3322ba5de8fa346203c9931d1fdcab8da87 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 May 2014 17:14:24 +0200 Subject: netfilter: nf_tables: remove skb and nlh from context structure Instead of caching the original skbuff that contains the netlink messages, this stores the netlink message sequence number, the netlink portID and the report flag. This helps to prepare the introduction of the object release via call_rcu. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 10 ++-- net/netfilter/nf_tables_api.c | 101 +++++++++++++++++--------------------- 2 files changed, 52 insertions(+), 59 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index b08f2a941007..1ed2797fb964 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -72,21 +72,23 @@ static inline void nft_data_debug(const struct nft_data *data) * struct nft_ctx - nf_tables rule/set context * * @net: net namespace - * @skb: netlink skb - * @nlh: netlink message header * @afi: address family info * @table: the table the chain is contained in * @chain: the chain the rule is contained in * @nla: netlink attributes + * @portid: netlink portID of the original message + * @seq: netlink sequence number + * @report: notify via unicast netlink message */ struct nft_ctx { struct net *net; - const struct sk_buff *skb; - const struct nlmsghdr *nlh; struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; const struct nlattr * const *nla; + u32 portid; + u32 seq; + bool report; }; struct nft_data_desc { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 86d055a51f0b..4147166ad17c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -96,13 +96,14 @@ static void nft_ctx_init(struct nft_ctx *ctx, struct nft_chain *chain, const struct nlattr * const *nla) { - ctx->net = sock_net(skb->sk); - ctx->skb = skb; - ctx->nlh = nlh; - ctx->afi = afi; - ctx->table = table; - ctx->chain = chain; - ctx->nla = nla; + ctx->net = sock_net(skb->sk); + ctx->afi = afi; + ctx->table = table; + ctx->chain = chain; + ctx->nla = nla; + ctx->portid = NETLINK_CB(skb).portid; + ctx->report = nlmsg_report(nlh); + ctx->seq = nlh->nlmsg_seq; } static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, int msg_type, @@ -238,14 +239,10 @@ nla_put_failure: static int nf_tables_table_notify(const struct nft_ctx *ctx, int event) { struct sk_buff *skb; - u32 portid = NETLINK_CB(ctx->skb).portid; - u32 seq = ctx->nlh->nlmsg_seq; - struct net *net = sock_net(ctx->skb->sk); - bool report; int err; - report = nlmsg_report(ctx->nlh); - if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + if (!ctx->report && + !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) return 0; err = -ENOBUFS; @@ -253,18 +250,20 @@ static int nf_tables_table_notify(const struct nft_ctx *ctx, int event) if (skb == NULL) goto err; - err = nf_tables_fill_table_info(skb, portid, seq, event, 0, + err = nf_tables_fill_table_info(skb, ctx->portid, ctx->seq, event, 0, ctx->afi->family, ctx->table); if (err < 0) { kfree_skb(skb); goto err; } - err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, - GFP_KERNEL); + err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, + ctx->report, GFP_KERNEL); err: - if (err < 0) - nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + if (err < 0) { + nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, + err); + } return err; } @@ -721,14 +720,10 @@ nla_put_failure: static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event) { struct sk_buff *skb; - u32 portid = NETLINK_CB(ctx->skb).portid; - struct net *net = sock_net(ctx->skb->sk); - u32 seq = ctx->nlh->nlmsg_seq; - bool report; int err; - report = nlmsg_report(ctx->nlh); - if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + if (!ctx->report && + !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) return 0; err = -ENOBUFS; @@ -736,7 +731,7 @@ static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event) if (skb == NULL) goto err; - err = nf_tables_fill_chain_info(skb, portid, seq, event, 0, + err = nf_tables_fill_chain_info(skb, ctx->portid, ctx->seq, event, 0, ctx->afi->family, ctx->table, ctx->chain); if (err < 0) { @@ -744,11 +739,13 @@ static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event) goto err; } - err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, - GFP_KERNEL); + err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, + ctx->report, GFP_KERNEL); err: - if (err < 0) - nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + if (err < 0) { + nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, + err); + } return err; } @@ -1473,16 +1470,11 @@ static int nf_tables_rule_notify(const struct nft_ctx *ctx, const struct nft_rule *rule, int event) { - const struct sk_buff *oskb = ctx->skb; struct sk_buff *skb; - u32 portid = NETLINK_CB(oskb).portid; - struct net *net = sock_net(oskb->sk); - u32 seq = ctx->nlh->nlmsg_seq; - bool report; int err; - report = nlmsg_report(ctx->nlh); - if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + if (!ctx->report && + !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) return 0; err = -ENOBUFS; @@ -1490,7 +1482,7 @@ static int nf_tables_rule_notify(const struct nft_ctx *ctx, if (skb == NULL) goto err; - err = nf_tables_fill_rule_info(skb, portid, seq, event, 0, + err = nf_tables_fill_rule_info(skb, ctx->portid, ctx->seq, event, 0, ctx->afi->family, ctx->table, ctx->chain, rule); if (err < 0) { @@ -1498,11 +1490,13 @@ static int nf_tables_rule_notify(const struct nft_ctx *ctx, goto err; } - err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, - GFP_KERNEL); + err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, + ctx->report, GFP_KERNEL); err: - if (err < 0) - nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + if (err < 0) { + nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, + err); + } return err; } @@ -2141,8 +2135,8 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; struct nlattr *desc; - u32 portid = NETLINK_CB(ctx->skb).portid; - u32 seq = ctx->nlh->nlmsg_seq; + u32 portid = ctx->portid; + u32 seq = ctx->seq; event |= NFNL_SUBSYS_NFTABLES << 8; nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), @@ -2194,12 +2188,11 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx, int event) { struct sk_buff *skb; - u32 portid = NETLINK_CB(ctx->skb).portid; - bool report; + u32 portid = ctx->portid; int err; - report = nlmsg_report(ctx->nlh); - if (!report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) + if (!ctx->report && + !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) return 0; err = -ENOBUFS; @@ -2213,8 +2206,8 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx, goto err; } - err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, report, - GFP_KERNEL); + err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, + ctx->report, GFP_KERNEL); err: if (err < 0) nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err); @@ -2956,14 +2949,12 @@ static int nf_tables_setelem_notify(const struct nft_ctx *ctx, const struct nft_set_elem *elem, int event, u16 flags) { - const struct sk_buff *oskb = ctx->skb; - struct net *net = sock_net(oskb->sk); - u32 portid = NETLINK_CB(oskb).portid; - bool report = nlmsg_report(ctx->nlh); + struct net *net = ctx->net; + u32 portid = ctx->portid; struct sk_buff *skb; int err; - if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) return 0; err = -ENOBUFS; @@ -2978,7 +2969,7 @@ static int nf_tables_setelem_notify(const struct nft_ctx *ctx, goto err; } - err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, + err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report, GFP_KERNEL); err: if (err < 0) -- cgit v1.2.3-71-gd317 From c7c32e72cbe23cea97c5d87ffcf6e23cc1ec1a65 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 10 Apr 2014 00:31:10 +0200 Subject: netfilter: nf_tables: defer all object release via rcu Now that all objects are released in the reverse order via the transaction infrastructure, we can enqueue the release via call_rcu to save one synchronize_rcu. For small rule-sets loaded via nft -f, it now takes around 50ms less here. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 + net/netfilter/nf_tables_api.c | 93 +++++++++++++++++++++++---------------- 2 files changed, 56 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 1ed2797fb964..7ee6ce6564ae 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -393,12 +393,14 @@ struct nft_rule { /** * struct nft_trans - nf_tables object update in transaction * + * @rcu_head: rcu head to defer release of transaction data * @list: used internally * @msg_type: message type * @ctx: transaction context * @data: internal information related to the transaction */ struct nft_trans { + struct rcu_head rcu_head; struct list_head list; int msg_type; struct nft_ctx ctx; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4147166ad17c..e171c635d08c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3298,6 +3298,30 @@ static void nft_chain_commit_update(struct nft_trans *trans) } } +/* Schedule objects for release via rcu to make sure no packets are accesing + * removed rules. + */ +static void nf_tables_commit_release_rcu(struct rcu_head *rt) +{ + struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head); + + switch (trans->msg_type) { + case NFT_MSG_DELTABLE: + nf_tables_table_destroy(&trans->ctx); + break; + case NFT_MSG_DELCHAIN: + nf_tables_chain_destroy(trans->ctx.chain); + break; + case NFT_MSG_DELRULE: + nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); + break; + case NFT_MSG_DELSET: + nft_set_destroy(nft_trans_set(trans)); + break; + } + kfree(trans); +} + static int nf_tables_commit(struct sk_buff *skb) { struct net *net = sock_net(skb->sk); @@ -3397,32 +3421,39 @@ static int nf_tables_commit(struct sk_buff *skb) } } - /* Make sure we don't see any packet traversing old rules */ - synchronize_rcu(); - - /* Now we can safely release unused old rules */ list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { - switch (trans->msg_type) { - case NFT_MSG_DELTABLE: - nf_tables_table_destroy(&trans->ctx); - break; - case NFT_MSG_DELCHAIN: - nf_tables_chain_destroy(trans->ctx.chain); - break; - case NFT_MSG_DELRULE: - nf_tables_rule_destroy(&trans->ctx, - nft_trans_rule(trans)); - break; - case NFT_MSG_DELSET: - nft_set_destroy(nft_trans_set(trans)); - break; - } - nft_trans_destroy(trans); + list_del(&trans->list); + trans->ctx.nla = NULL; + call_rcu(&trans->rcu_head, nf_tables_commit_release_rcu); } return 0; } +/* Schedule objects for release via rcu to make sure no packets are accesing + * aborted rules. + */ +static void nf_tables_abort_release_rcu(struct rcu_head *rt) +{ + struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head); + + switch (trans->msg_type) { + case NFT_MSG_NEWTABLE: + nf_tables_table_destroy(&trans->ctx); + break; + case NFT_MSG_NEWCHAIN: + nf_tables_chain_destroy(trans->ctx.chain); + break; + case NFT_MSG_NEWRULE: + nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); + break; + case NFT_MSG_NEWSET: + nft_set_destroy(nft_trans_set(trans)); + break; + } + kfree(trans); +} + static int nf_tables_abort(struct sk_buff *skb) { struct net *net = sock_net(skb->sk); @@ -3495,26 +3526,10 @@ static int nf_tables_abort(struct sk_buff *skb) } } - /* Make sure we don't see any packet accessing aborted rules */ - synchronize_rcu(); - list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { - switch (trans->msg_type) { - case NFT_MSG_NEWTABLE: - nf_tables_table_destroy(&trans->ctx); - break; - case NFT_MSG_NEWCHAIN: - nf_tables_chain_destroy(trans->ctx.chain); - break; - case NFT_MSG_NEWRULE: - nf_tables_rule_destroy(&trans->ctx, - nft_trans_rule(trans)); - break; - case NFT_MSG_NEWSET: - nft_set_destroy(nft_trans_set(trans)); - break; - } - nft_trans_destroy(trans); + list_del(&trans->list); + trans->ctx.nla = NULL; + call_rcu(&trans->rcu_head, nf_tables_abort_release_rcu); } return 0; -- cgit v1.2.3-71-gd317 From a1b73fc194e73ed33c8b77bf09374cb05b58151b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 May 2014 16:51:04 +0200 Subject: scsi: reintroduce scsi_driver.init_command Instead of letting the ULD play games with the prep_fn move back to the model of a central prep_fn with a callback to the ULD. This already cleans up and shortens the code by itself, and will be required to properly support blk-mq in the SCSI midlayer. Signed-off-by: Christoph Hellwig Reviewed-by: Nicholas Bellinger Reviewed-by: Mike Christie Reviewed-by: Hannes Reinecke --- drivers/scsi/scsi_lib.c | 66 +++++++++++++++++++++++++++------------------- drivers/scsi/sd.c | 44 ++++++++++--------------------- drivers/scsi/sr.c | 19 +++++-------- include/scsi/scsi_driver.h | 9 +++---- 4 files changed, 62 insertions(+), 76 deletions(-) (limited to 'include') diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 045822befad9..9f841df6add8 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1073,15 +1073,7 @@ static struct scsi_cmnd *scsi_get_cmd_from_req(struct scsi_device *sdev, int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req) { - struct scsi_cmnd *cmd; - int ret = scsi_prep_state_check(sdev, req); - - if (ret != BLKPREP_OK) - return ret; - - cmd = scsi_get_cmd_from_req(sdev, req); - if (unlikely(!cmd)) - return BLKPREP_DEFER; + struct scsi_cmnd *cmd = req->special; /* * BLOCK_PC requests may transfer data, in which case they must @@ -1125,15 +1117,11 @@ EXPORT_SYMBOL(scsi_setup_blk_pc_cmnd); */ int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req) { - struct scsi_cmnd *cmd; - int ret = scsi_prep_state_check(sdev, req); - - if (ret != BLKPREP_OK) - return ret; + struct scsi_cmnd *cmd = req->special; if (unlikely(sdev->scsi_dh_data && sdev->scsi_dh_data->scsi_dh && sdev->scsi_dh_data->scsi_dh->prep_fn)) { - ret = sdev->scsi_dh_data->scsi_dh->prep_fn(sdev, req); + int ret = sdev->scsi_dh_data->scsi_dh->prep_fn(sdev, req); if (ret != BLKPREP_OK) return ret; } @@ -1143,16 +1131,13 @@ int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req) */ BUG_ON(!req->nr_phys_segments); - cmd = scsi_get_cmd_from_req(sdev, req); - if (unlikely(!cmd)) - return BLKPREP_DEFER; - memset(cmd->cmnd, 0, BLK_MAX_CDB); return scsi_init_io(cmd, GFP_ATOMIC); } EXPORT_SYMBOL(scsi_setup_fs_cmnd); -int scsi_prep_state_check(struct scsi_device *sdev, struct request *req) +static int +scsi_prep_state_check(struct scsi_device *sdev, struct request *req) { int ret = BLKPREP_OK; @@ -1204,9 +1189,9 @@ int scsi_prep_state_check(struct scsi_device *sdev, struct request *req) } return ret; } -EXPORT_SYMBOL(scsi_prep_state_check); -int scsi_prep_return(struct request_queue *q, struct request *req, int ret) +static int +scsi_prep_return(struct request_queue *q, struct request *req, int ret) { struct scsi_device *sdev = q->queuedata; @@ -1237,18 +1222,44 @@ int scsi_prep_return(struct request_queue *q, struct request *req, int ret) return ret; } -EXPORT_SYMBOL(scsi_prep_return); -int scsi_prep_fn(struct request_queue *q, struct request *req) +static int scsi_prep_fn(struct request_queue *q, struct request *req) { struct scsi_device *sdev = q->queuedata; - int ret = BLKPREP_KILL; + struct scsi_cmnd *cmd; + int ret; - if (req->cmd_type == REQ_TYPE_BLOCK_PC) + ret = scsi_prep_state_check(sdev, req); + if (ret != BLKPREP_OK) + goto out; + + cmd = scsi_get_cmd_from_req(sdev, req); + if (unlikely(!cmd)) { + ret = BLKPREP_DEFER; + goto out; + } + + if (req->cmd_type == REQ_TYPE_FS) + ret = scsi_cmd_to_driver(cmd)->init_command(cmd); + else if (req->cmd_type == REQ_TYPE_BLOCK_PC) ret = scsi_setup_blk_pc_cmnd(sdev, req); + else + ret = BLKPREP_KILL; + +out: return scsi_prep_return(q, req, ret); } -EXPORT_SYMBOL(scsi_prep_fn); + +static void scsi_unprep_fn(struct request_queue *q, struct request *req) +{ + if (req->cmd_type == REQ_TYPE_FS) { + struct scsi_cmnd *cmd = req->special; + struct scsi_driver *drv = scsi_cmd_to_driver(cmd); + + if (drv->uninit_command) + drv->uninit_command(cmd); + } +} /* * scsi_dev_queue_ready: if we can send requests to sdev, return 1 else @@ -1669,6 +1680,7 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev) return NULL; blk_queue_prep_rq(q, scsi_prep_fn); + blk_queue_unprep_rq(q, scsi_unprep_fn); blk_queue_softirq_done(q, scsi_softirq_done); blk_queue_rq_timed_out(q, scsi_times_out); blk_queue_lld_busy(q, scsi_lld_busy); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index efcbcd182863..aa028d8ae774 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -109,6 +109,8 @@ static int sd_suspend_system(struct device *); static int sd_suspend_runtime(struct device *); static int sd_resume(struct device *); static void sd_rescan(struct device *); +static int sd_init_command(struct scsi_cmnd *SCpnt); +static void sd_uninit_command(struct scsi_cmnd *SCpnt); static int sd_done(struct scsi_cmnd *); static int sd_eh_action(struct scsi_cmnd *, int); static void sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer); @@ -503,6 +505,8 @@ static struct scsi_driver sd_template = { .pm = &sd_pm_ops, }, .rescan = sd_rescan, + .init_command = sd_init_command, + .uninit_command = sd_uninit_command, .done = sd_done, .eh_action = sd_eh_action, }; @@ -838,9 +842,9 @@ static int scsi_setup_flush_cmnd(struct scsi_device *sdp, struct request *rq) return scsi_setup_blk_pc_cmnd(sdp, rq); } -static void sd_unprep_fn(struct request_queue *q, struct request *rq) +static void sd_uninit_command(struct scsi_cmnd *SCpnt) { - struct scsi_cmnd *SCpnt = rq->special; + struct request *rq = SCpnt->request; if (rq->cmd_flags & REQ_DISCARD) { free_page((unsigned long)rq->buffer); @@ -853,18 +857,10 @@ static void sd_unprep_fn(struct request_queue *q, struct request *rq) } } -/** - * sd_prep_fn - build a scsi (read or write) command from - * information in the request structure. - * @SCpnt: pointer to mid-level's per scsi command structure that - * contains request and into which the scsi command is written - * - * Returns 1 if successful and 0 if error (or cannot be done now). - **/ -static int sd_prep_fn(struct request_queue *q, struct request *rq) +static int sd_init_command(struct scsi_cmnd *SCpnt) { - struct scsi_cmnd *SCpnt; - struct scsi_device *sdp = q->queuedata; + struct request *rq = SCpnt->request; + struct scsi_device *sdp = SCpnt->device; struct gendisk *disk = rq->rq_disk; struct scsi_disk *sdkp; sector_t block = blk_rq_pos(rq); @@ -886,12 +882,6 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq) } else if (rq->cmd_flags & REQ_FLUSH) { ret = scsi_setup_flush_cmnd(sdp, rq); goto out; - } else if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { - ret = scsi_setup_blk_pc_cmnd(sdp, rq); - goto out; - } else if (rq->cmd_type != REQ_TYPE_FS) { - ret = BLKPREP_KILL; - goto out; } ret = scsi_setup_fs_cmnd(sdp, rq); if (ret != BLKPREP_OK) @@ -903,11 +893,10 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq) * is used for a killable error condition */ ret = BLKPREP_KILL; - SCSI_LOG_HLQUEUE(1, scmd_printk(KERN_INFO, SCpnt, - "sd_prep_fn: block=%llu, " - "count=%d\n", - (unsigned long long)block, - this_count)); + SCSI_LOG_HLQUEUE(1, + scmd_printk(KERN_INFO, SCpnt, + "%s: block=%llu, count=%d\n", + __func__, (unsigned long long)block, this_count)); if (!sdp || !scsi_device_online(sdp) || block + blk_rq_sectors(rq) > get_capacity(disk)) { @@ -1127,7 +1116,7 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq) */ ret = BLKPREP_OK; out: - return scsi_prep_return(q, rq, ret); + return ret; } /** @@ -2878,9 +2867,6 @@ static void sd_probe_async(void *data, async_cookie_t cookie) sd_revalidate_disk(gd); - blk_queue_prep_rq(sdp->request_queue, sd_prep_fn); - blk_queue_unprep_rq(sdp->request_queue, sd_unprep_fn); - gd->driverfs_dev = &sdp->sdev_gendev; gd->flags = GENHD_FL_EXT_DEVT; if (sdp->removable) { @@ -3028,8 +3014,6 @@ static int sd_remove(struct device *dev) async_synchronize_full_domain(&scsi_sd_pm_domain); async_synchronize_full_domain(&scsi_sd_probe_domain); - blk_queue_prep_rq(sdkp->device->request_queue, scsi_prep_fn); - blk_queue_unprep_rq(sdkp->device->request_queue, NULL); device_del(&sdkp->dev); del_gendisk(sdkp->disk); sd_shutdown(dev); diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 40d85929aefe..93cbd36c990b 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -79,6 +79,7 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_WORM); static DEFINE_MUTEX(sr_mutex); static int sr_probe(struct device *); static int sr_remove(struct device *); +static int sr_init_command(struct scsi_cmnd *SCpnt); static int sr_done(struct scsi_cmnd *); static int sr_runtime_suspend(struct device *dev); @@ -94,6 +95,7 @@ static struct scsi_driver sr_template = { .remove = sr_remove, .pm = &sr_pm_ops, }, + .init_command = sr_init_command, .done = sr_done, }; @@ -378,21 +380,14 @@ static int sr_done(struct scsi_cmnd *SCpnt) return good_bytes; } -static int sr_prep_fn(struct request_queue *q, struct request *rq) +static int sr_init_command(struct scsi_cmnd *SCpnt) { int block = 0, this_count, s_size; struct scsi_cd *cd; - struct scsi_cmnd *SCpnt; - struct scsi_device *sdp = q->queuedata; + struct request *rq = SCpnt->request; + struct scsi_device *sdp = SCpnt->device; int ret; - if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { - ret = scsi_setup_blk_pc_cmnd(sdp, rq); - goto out; - } else if (rq->cmd_type != REQ_TYPE_FS) { - ret = BLKPREP_KILL; - goto out; - } ret = scsi_setup_fs_cmnd(sdp, rq); if (ret != BLKPREP_OK) goto out; @@ -517,7 +512,7 @@ static int sr_prep_fn(struct request_queue *q, struct request *rq) */ ret = BLKPREP_OK; out: - return scsi_prep_return(q, rq, ret); + return ret; } static int sr_block_open(struct block_device *bdev, fmode_t mode) @@ -718,7 +713,6 @@ static int sr_probe(struct device *dev) /* FIXME: need to handle a get_capabilities failure properly ?? */ get_capabilities(cd); - blk_queue_prep_rq(sdev->request_queue, sr_prep_fn); sr_vendor_init(cd); disk->driverfs_dev = &sdev->sdev_gendev; @@ -993,7 +987,6 @@ static int sr_remove(struct device *dev) scsi_autopm_get_device(cd->device); - blk_queue_prep_rq(cd->device->request_queue, scsi_prep_fn); del_gendisk(cd->disk); mutex_lock(&sr_ref_mutex); diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h index 20fdfc2526ad..36c4114ed9bc 100644 --- a/include/scsi/scsi_driver.h +++ b/include/scsi/scsi_driver.h @@ -4,17 +4,17 @@ #include struct module; +struct request; struct scsi_cmnd; struct scsi_device; -struct request; -struct request_queue; - struct scsi_driver { struct module *owner; struct device_driver gendrv; void (*rescan)(struct device *); + int (*init_command)(struct scsi_cmnd *); + void (*uninit_command)(struct scsi_cmnd *); int (*done)(struct scsi_cmnd *); int (*eh_action)(struct scsi_cmnd *, int); }; @@ -31,8 +31,5 @@ extern int scsi_register_interface(struct class_interface *); int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req); int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req); -int scsi_prep_state_check(struct scsi_device *sdev, struct request *req); -int scsi_prep_return(struct request_queue *q, struct request *req, int ret); -int scsi_prep_fn(struct request_queue *, struct request *); #endif /* _SCSI_SCSI_DRIVER_H */ -- cgit v1.2.3-71-gd317 From c2e4323b3316b9daec7824802ca0dd9eae4317c5 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Thu, 15 May 2014 20:18:09 +0300 Subject: cfg80211: add documentation for max_num_csa_counters Move the comment in the structure to a description of the max_num_csa_counters field in the docbook area. This fixes a warning when building htmldocs (at least): Warning(include/net/cfg80211.h:3064): No description found for parameter 'max_num_csa_counters' Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 447cb58f0d77..955fdec5a1b6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2936,6 +2936,11 @@ struct wiphy_vendor_command { * (including P2P GO) or 0 to indicate no such limit is advertised. The * driver is allowed to advertise a theoretical limit that it can reach in * some cases, but may not always reach. + * + * @max_num_csa_counters: Number of supported csa_counters in beacons + * and probe responses. This value should be set if the driver + * wishes to limit the number of csa counters. Default (0) means + * infinite. */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -3053,11 +3058,6 @@ struct wiphy { u16 max_ap_assoc_sta; - /* - * Number of supported csa_counters in beacons and probe responses. - * This value should be set if the driver wishes to limit the number of - * csa counters. Default (0) means infinite. - */ u8 max_num_csa_counters; char priv[0] __aligned(NETDEV_ALIGN); -- cgit v1.2.3-71-gd317 From 8d77ec856200df31623074de3fde44519df7725b Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Thu, 15 May 2014 20:32:08 +0300 Subject: mac80211: fix csa_counter_offs argument name in docbook The csa_counter_offs was erroneously described as csa_offs in the docbook section. This fixes two warnings when making htmldocs (at least): Warning(include/net/mac80211.h:3428): No description found for parameter 'csa_counter_offs[IEEE80211_MAX_CSA_COUNTERS_NUM]' Warning(include/net/mac80211.h:3428): Excess struct/union/enum/typedef member 'csa_offs' description in 'ieee80211_mutable_offsets' Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- include/net/mac80211.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 982d2cd80166..a34f26a4ed18 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3417,8 +3417,9 @@ void ieee80211_report_low_ack(struct ieee80211_sta *sta, u32 num_packets); * struct ieee80211_mutable_offsets - mutable beacon offsets * @tim_offset: position of TIM element * @tim_length: size of TIM element - * @csa_offs: array of IEEE80211_MAX_CSA_COUNTERS_NUM offsets to CSA counters. - * This array can contain zero values which should be ignored. + * @csa_counter_offs: array of IEEE80211_MAX_CSA_COUNTERS_NUM offsets + * to CSA counters. This array can contain zero values which + * should be ignored. */ struct ieee80211_mutable_offsets { u16 tim_offset; -- cgit v1.2.3-71-gd317 From 3b3a0162fade6b83d5c83efafcd5adb9e4537047 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 19 May 2014 17:19:31 +0200 Subject: cfg80211: constify MAC addresses in cfg80211 ops This propagates through all the drivers and mac80211. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 7 ++-- drivers/net/wireless/ath/wil6210/cfg80211.c | 4 +- drivers/net/wireless/ath/wil6210/main.c | 4 +- drivers/net/wireless/ath/wil6210/wil6210.h | 2 +- .../net/wireless/brcm80211/brcmfmac/wl_cfg80211.c | 6 +-- drivers/net/wireless/libertas/cfg.c | 2 +- drivers/net/wireless/mwifiex/11n.h | 2 +- drivers/net/wireless/mwifiex/cfg80211.c | 19 +++++----- drivers/net/wireless/mwifiex/main.h | 22 +++++------ drivers/net/wireless/mwifiex/tdls.c | 44 ++++++++++++---------- drivers/net/wireless/mwifiex/util.c | 6 +-- drivers/net/wireless/mwifiex/wmm.c | 10 ++--- drivers/net/wireless/mwifiex/wmm.h | 5 ++- drivers/net/wireless/rndis_wlan.c | 4 +- drivers/staging/wlan-ng/cfg80211.c | 2 +- include/net/cfg80211.h | 29 +++++++------- net/mac80211/cfg.c | 24 ++++++------ net/mac80211/ieee80211_i.h | 4 +- net/mac80211/tdls.c | 12 +++--- 19 files changed, 106 insertions(+), 102 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index c2c6f4604958..1fffbde4b01e 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -1759,7 +1759,7 @@ static bool is_rate_ht40(s32 rate, u8 *mcs, bool *sgi) } static int ath6kl_get_station(struct wiphy *wiphy, struct net_device *dev, - u8 *mac, struct station_info *sinfo) + const u8 *mac, struct station_info *sinfo) { struct ath6kl *ar = ath6kl_priv(dev); struct ath6kl_vif *vif = netdev_priv(dev); @@ -2975,7 +2975,7 @@ static int ath6kl_stop_ap(struct wiphy *wiphy, struct net_device *dev) static const u8 bcast_addr[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; static int ath6kl_del_station(struct wiphy *wiphy, struct net_device *dev, - u8 *mac) + const u8 *mac) { struct ath6kl *ar = ath6kl_priv(dev); struct ath6kl_vif *vif = netdev_priv(dev); @@ -2986,7 +2986,8 @@ static int ath6kl_del_station(struct wiphy *wiphy, struct net_device *dev, } static int ath6kl_change_station(struct wiphy *wiphy, struct net_device *dev, - u8 *mac, struct station_parameters *params) + const u8 *mac, + struct station_parameters *params) { struct ath6kl *ar = ath6kl_priv(dev); struct ath6kl_vif *vif = netdev_priv(dev); diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index 4806a49cb61b..6e699d050d1e 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -172,7 +172,7 @@ static int wil_cid_fill_sinfo(struct wil6210_priv *wil, int cid, static int wil_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev, - u8 *mac, struct station_info *sinfo) + const u8 *mac, struct station_info *sinfo) { struct wil6210_priv *wil = wiphy_to_wil(wiphy); int rc; @@ -671,7 +671,7 @@ static int wil_cfg80211_stop_ap(struct wiphy *wiphy, } static int wil_cfg80211_del_station(struct wiphy *wiphy, - struct net_device *dev, u8 *mac) + struct net_device *dev, const u8 *mac) { struct wil6210_priv *wil = wiphy_to_wil(wiphy); diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index 95f4efe9ef37..a6b1d5872786 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -81,7 +81,7 @@ static void wil_disconnect_cid(struct wil6210_priv *wil, int cid) memset(&sta->stats, 0, sizeof(sta->stats)); } -static void _wil6210_disconnect(struct wil6210_priv *wil, void *bssid) +static void _wil6210_disconnect(struct wil6210_priv *wil, const u8 *bssid) { int cid = -ENOENT; struct net_device *ndev = wil_to_ndev(wil); @@ -252,7 +252,7 @@ int wil_priv_init(struct wil6210_priv *wil) return 0; } -void wil6210_disconnect(struct wil6210_priv *wil, void *bssid) +void wil6210_disconnect(struct wil6210_priv *wil, const u8 *bssid) { del_timer_sync(&wil->connect_timer); _wil6210_disconnect(wil, bssid); diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h index 2a2dec75f026..6b353be64579 100644 --- a/drivers/net/wireless/ath/wil6210/wil6210.h +++ b/drivers/net/wireless/ath/wil6210/wil6210.h @@ -507,7 +507,7 @@ void wil_wdev_free(struct wil6210_priv *wil); int wmi_set_mac_address(struct wil6210_priv *wil, void *addr); int wmi_pcp_start(struct wil6210_priv *wil, int bi, u8 wmi_nettype, u8 chan); int wmi_pcp_stop(struct wil6210_priv *wil); -void wil6210_disconnect(struct wil6210_priv *wil, void *bssid); +void wil6210_disconnect(struct wil6210_priv *wil, const u8 *bssid); int wil_rx_init(struct wil6210_priv *wil); void wil_rx_fini(struct wil6210_priv *wil); diff --git a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c index afb3d15e38ff..befa9c04166d 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c @@ -2182,7 +2182,7 @@ brcmf_cfg80211_config_default_mgmt_key(struct wiphy *wiphy, static s32 brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev, - u8 *mac, struct station_info *sinfo) + const u8 *mac, struct station_info *sinfo) { struct brcmf_if *ifp = netdev_priv(ndev); struct brcmf_cfg80211_profile *profile = &ifp->vif->profile; @@ -3975,7 +3975,7 @@ brcmf_cfg80211_change_beacon(struct wiphy *wiphy, struct net_device *ndev, static int brcmf_cfg80211_del_station(struct wiphy *wiphy, struct net_device *ndev, - u8 *mac) + const u8 *mac) { struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy); struct brcmf_scb_val_le scbval; @@ -4203,7 +4203,7 @@ static int brcmf_convert_nl80211_tdls_oper(enum nl80211_tdls_operation oper) } static int brcmf_cfg80211_tdls_oper(struct wiphy *wiphy, - struct net_device *ndev, u8 *peer, + struct net_device *ndev, const u8 *peer, enum nl80211_tdls_operation oper) { struct brcmf_if *ifp; diff --git a/drivers/net/wireless/libertas/cfg.c b/drivers/net/wireless/libertas/cfg.c index 54e344aed6e0..e4d7031df05c 100644 --- a/drivers/net/wireless/libertas/cfg.c +++ b/drivers/net/wireless/libertas/cfg.c @@ -1610,7 +1610,7 @@ static int lbs_cfg_del_key(struct wiphy *wiphy, struct net_device *netdev, */ static int lbs_cfg_get_station(struct wiphy *wiphy, struct net_device *dev, - u8 *mac, struct station_info *sinfo) + const u8 *mac, struct station_info *sinfo) { struct lbs_private *priv = wiphy_priv(wiphy); s8 signal, noise; diff --git a/drivers/net/wireless/mwifiex/11n.h b/drivers/net/wireless/mwifiex/11n.h index 40b007a00f4b..0a46aab24214 100644 --- a/drivers/net/wireless/mwifiex/11n.h +++ b/drivers/net/wireless/mwifiex/11n.h @@ -199,7 +199,7 @@ static inline int mwifiex_is_sta_11n_enabled(struct mwifiex_private *priv, } static inline u8 -mwifiex_tdls_peer_11n_enabled(struct mwifiex_private *priv, u8 *ra) +mwifiex_tdls_peer_11n_enabled(struct mwifiex_private *priv, const u8 *ra) { struct mwifiex_sta_node *node = mwifiex_get_sta_entry(priv, ra); if (node) diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c index 21ee27ab7b74..e95dec91a561 100644 --- a/drivers/net/wireless/mwifiex/cfg80211.c +++ b/drivers/net/wireless/mwifiex/cfg80211.c @@ -994,7 +994,7 @@ mwifiex_dump_station_info(struct mwifiex_private *priv, */ static int mwifiex_cfg80211_get_station(struct wiphy *wiphy, struct net_device *dev, - u8 *mac, struct station_info *sinfo) + const u8 *mac, struct station_info *sinfo) { struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); @@ -1270,7 +1270,7 @@ static int mwifiex_cfg80211_change_beacon(struct wiphy *wiphy, */ static int mwifiex_cfg80211_del_station(struct wiphy *wiphy, struct net_device *dev, - u8 *mac) + const u8 *mac) { struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); struct mwifiex_sta_node *sta_node; @@ -2629,7 +2629,7 @@ static int mwifiex_cfg80211_set_coalesce(struct wiphy *wiphy, */ static int mwifiex_cfg80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, - u8 *peer, u8 action_code, u8 dialog_token, + const u8 *peer, u8 action_code, u8 dialog_token, u16 status_code, u32 peer_capability, const u8 *extra_ies, size_t extra_ies_len) { @@ -2701,7 +2701,7 @@ mwifiex_cfg80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, static int mwifiex_cfg80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, - u8 *peer, enum nl80211_tdls_operation action) + const u8 *peer, enum nl80211_tdls_operation action) { struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); @@ -2748,9 +2748,8 @@ mwifiex_cfg80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, } static int -mwifiex_cfg80211_add_station(struct wiphy *wiphy, - struct net_device *dev, - u8 *mac, struct station_parameters *params) +mwifiex_cfg80211_add_station(struct wiphy *wiphy, struct net_device *dev, + const u8 *mac, struct station_parameters *params) { struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); @@ -2765,9 +2764,9 @@ mwifiex_cfg80211_add_station(struct wiphy *wiphy, } static int -mwifiex_cfg80211_change_station(struct wiphy *wiphy, - struct net_device *dev, - u8 *mac, struct station_parameters *params) +mwifiex_cfg80211_change_station(struct wiphy *wiphy, struct net_device *dev, + const u8 *mac, + struct station_parameters *params) { int ret; struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); diff --git a/drivers/net/wireless/mwifiex/main.h b/drivers/net/wireless/mwifiex/main.h index d53e1e8c9467..86c6b8cdec22 100644 --- a/drivers/net/wireless/mwifiex/main.h +++ b/drivers/net/wireless/mwifiex/main.h @@ -910,8 +910,6 @@ int mwifiex_handle_uap_rx_forward(struct mwifiex_private *priv, struct sk_buff *skb); int mwifiex_process_sta_event(struct mwifiex_private *); int mwifiex_process_uap_event(struct mwifiex_private *); -struct mwifiex_sta_node * -mwifiex_get_sta_entry(struct mwifiex_private *priv, u8 *mac); void mwifiex_delete_all_station_list(struct mwifiex_private *priv); void *mwifiex_process_sta_txpd(struct mwifiex_private *, struct sk_buff *skb); void *mwifiex_process_uap_txpd(struct mwifiex_private *, struct sk_buff *skb); @@ -1220,26 +1218,26 @@ void mwifiex_dnld_txpwr_table(struct mwifiex_private *priv); extern const struct ethtool_ops mwifiex_ethtool_ops; void mwifiex_del_all_sta_list(struct mwifiex_private *priv); -void mwifiex_del_sta_entry(struct mwifiex_private *priv, u8 *mac); +void mwifiex_del_sta_entry(struct mwifiex_private *priv, const u8 *mac); void mwifiex_set_sta_ht_cap(struct mwifiex_private *priv, const u8 *ies, int ies_len, struct mwifiex_sta_node *node); struct mwifiex_sta_node * -mwifiex_add_sta_entry(struct mwifiex_private *priv, u8 *mac); +mwifiex_add_sta_entry(struct mwifiex_private *priv, const u8 *mac); struct mwifiex_sta_node * -mwifiex_get_sta_entry(struct mwifiex_private *priv, u8 *mac); -int mwifiex_send_tdls_data_frame(struct mwifiex_private *priv, u8 *peer, +mwifiex_get_sta_entry(struct mwifiex_private *priv, const u8 *mac); +int mwifiex_send_tdls_data_frame(struct mwifiex_private *priv, const u8 *peer, u8 action_code, u8 dialog_token, u16 status_code, const u8 *extra_ies, size_t extra_ies_len); -int mwifiex_send_tdls_action_frame(struct mwifiex_private *priv, - u8 *peer, u8 action_code, u8 dialog_token, - u16 status_code, const u8 *extra_ies, - size_t extra_ies_len); +int mwifiex_send_tdls_action_frame(struct mwifiex_private *priv, const u8 *peer, + u8 action_code, u8 dialog_token, + u16 status_code, const u8 *extra_ies, + size_t extra_ies_len); void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, u8 *buf, int len); -int mwifiex_tdls_oper(struct mwifiex_private *priv, u8 *peer, u8 action); -int mwifiex_get_tdls_link_status(struct mwifiex_private *priv, u8 *mac); +int mwifiex_tdls_oper(struct mwifiex_private *priv, const u8 *peer, u8 action); +int mwifiex_get_tdls_link_status(struct mwifiex_private *priv, const u8 *mac); void mwifiex_disable_all_tdls_links(struct mwifiex_private *priv); bool mwifiex_is_bss_in_11ac_mode(struct mwifiex_private *priv); u8 mwifiex_get_center_freq_index(struct mwifiex_private *priv, u8 band, diff --git a/drivers/net/wireless/mwifiex/tdls.c b/drivers/net/wireless/mwifiex/tdls.c index 97662a1ba58c..49c8a2c64eeb 100644 --- a/drivers/net/wireless/mwifiex/tdls.c +++ b/drivers/net/wireless/mwifiex/tdls.c @@ -25,8 +25,8 @@ #define TDLS_RESP_FIX_LEN 8 #define TDLS_CONFIRM_FIX_LEN 6 -static void -mwifiex_restore_tdls_packets(struct mwifiex_private *priv, u8 *mac, u8 status) +static void mwifiex_restore_tdls_packets(struct mwifiex_private *priv, + const u8 *mac, u8 status) { struct mwifiex_ra_list_tbl *ra_list; struct list_head *tid_list; @@ -84,7 +84,8 @@ mwifiex_restore_tdls_packets(struct mwifiex_private *priv, u8 *mac, u8 status) return; } -static void mwifiex_hold_tdls_packets(struct mwifiex_private *priv, u8 *mac) +static void mwifiex_hold_tdls_packets(struct mwifiex_private *priv, + const u8 *mac) { struct mwifiex_ra_list_tbl *ra_list; struct list_head *ra_list_head; @@ -186,7 +187,7 @@ static int mwifiex_tdls_add_vht_capab(struct mwifiex_private *priv, } static int mwifiex_tdls_add_vht_oper(struct mwifiex_private *priv, - u8 *mac, struct sk_buff *skb) + const u8 *mac, struct sk_buff *skb) { struct mwifiex_bssdescriptor *bss_desc; struct ieee80211_vht_operation *vht_oper; @@ -325,8 +326,9 @@ static void mwifiex_tdls_add_qos_capab(struct sk_buff *skb) } static int mwifiex_prep_tdls_encap_data(struct mwifiex_private *priv, - u8 *peer, u8 action_code, u8 dialog_token, - u16 status_code, struct sk_buff *skb) + const u8 *peer, u8 action_code, + u8 dialog_token, + u16 status_code, struct sk_buff *skb) { struct ieee80211_tdls_data *tf; int ret; @@ -453,7 +455,8 @@ static int mwifiex_prep_tdls_encap_data(struct mwifiex_private *priv, } static void -mwifiex_tdls_add_link_ie(struct sk_buff *skb, u8 *src_addr, u8 *peer, u8 *bssid) +mwifiex_tdls_add_link_ie(struct sk_buff *skb, const u8 *src_addr, + const u8 *peer, const u8 *bssid) { struct ieee80211_tdls_lnkie *lnkid; @@ -467,8 +470,8 @@ mwifiex_tdls_add_link_ie(struct sk_buff *skb, u8 *src_addr, u8 *peer, u8 *bssid) memcpy(lnkid->resp_sta, peer, ETH_ALEN); } -int mwifiex_send_tdls_data_frame(struct mwifiex_private *priv, - u8 *peer, u8 action_code, u8 dialog_token, +int mwifiex_send_tdls_data_frame(struct mwifiex_private *priv, const u8 *peer, + u8 action_code, u8 dialog_token, u16 status_code, const u8 *extra_ies, size_t extra_ies_len) { @@ -560,7 +563,8 @@ int mwifiex_send_tdls_data_frame(struct mwifiex_private *priv, } static int -mwifiex_construct_tdls_action_frame(struct mwifiex_private *priv, u8 *peer, +mwifiex_construct_tdls_action_frame(struct mwifiex_private *priv, + const u8 *peer, u8 action_code, u8 dialog_token, u16 status_code, struct sk_buff *skb) { @@ -638,10 +642,10 @@ mwifiex_construct_tdls_action_frame(struct mwifiex_private *priv, u8 *peer, return 0; } -int mwifiex_send_tdls_action_frame(struct mwifiex_private *priv, - u8 *peer, u8 action_code, u8 dialog_token, - u16 status_code, const u8 *extra_ies, - size_t extra_ies_len) +int mwifiex_send_tdls_action_frame(struct mwifiex_private *priv, const u8 *peer, + u8 action_code, u8 dialog_token, + u16 status_code, const u8 *extra_ies, + size_t extra_ies_len) { struct sk_buff *skb; struct mwifiex_txinfo *tx_info; @@ -848,7 +852,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, } static int -mwifiex_tdls_process_config_link(struct mwifiex_private *priv, u8 *peer) +mwifiex_tdls_process_config_link(struct mwifiex_private *priv, const u8 *peer) { struct mwifiex_sta_node *sta_ptr; struct mwifiex_ds_tdls_oper tdls_oper; @@ -869,7 +873,7 @@ mwifiex_tdls_process_config_link(struct mwifiex_private *priv, u8 *peer) } static int -mwifiex_tdls_process_create_link(struct mwifiex_private *priv, u8 *peer) +mwifiex_tdls_process_create_link(struct mwifiex_private *priv, const u8 *peer) { struct mwifiex_sta_node *sta_ptr; struct mwifiex_ds_tdls_oper tdls_oper; @@ -896,7 +900,7 @@ mwifiex_tdls_process_create_link(struct mwifiex_private *priv, u8 *peer) } static int -mwifiex_tdls_process_disable_link(struct mwifiex_private *priv, u8 *peer) +mwifiex_tdls_process_disable_link(struct mwifiex_private *priv, const u8 *peer) { struct mwifiex_sta_node *sta_ptr; struct mwifiex_ds_tdls_oper tdls_oper; @@ -925,7 +929,7 @@ mwifiex_tdls_process_disable_link(struct mwifiex_private *priv, u8 *peer) } static int -mwifiex_tdls_process_enable_link(struct mwifiex_private *priv, u8 *peer) +mwifiex_tdls_process_enable_link(struct mwifiex_private *priv, const u8 *peer) { struct mwifiex_sta_node *sta_ptr; struct ieee80211_mcs_info mcs; @@ -982,7 +986,7 @@ mwifiex_tdls_process_enable_link(struct mwifiex_private *priv, u8 *peer) return 0; } -int mwifiex_tdls_oper(struct mwifiex_private *priv, u8 *peer, u8 action) +int mwifiex_tdls_oper(struct mwifiex_private *priv, const u8 *peer, u8 action) { switch (action) { case MWIFIEX_TDLS_ENABLE_LINK: @@ -997,7 +1001,7 @@ int mwifiex_tdls_oper(struct mwifiex_private *priv, u8 *peer, u8 action) return 0; } -int mwifiex_get_tdls_link_status(struct mwifiex_private *priv, u8 *mac) +int mwifiex_get_tdls_link_status(struct mwifiex_private *priv, const u8 *mac) { struct mwifiex_sta_node *sta_ptr; diff --git a/drivers/net/wireless/mwifiex/util.c b/drivers/net/wireless/mwifiex/util.c index c3824e37f3f2..6da5abf52e61 100644 --- a/drivers/net/wireless/mwifiex/util.c +++ b/drivers/net/wireless/mwifiex/util.c @@ -259,7 +259,7 @@ int mwifiex_complete_cmd(struct mwifiex_adapter *adapter, * NULL is returned if station entry is not found in associated STA list. */ struct mwifiex_sta_node * -mwifiex_get_sta_entry(struct mwifiex_private *priv, u8 *mac) +mwifiex_get_sta_entry(struct mwifiex_private *priv, const u8 *mac) { struct mwifiex_sta_node *node; @@ -280,7 +280,7 @@ mwifiex_get_sta_entry(struct mwifiex_private *priv, u8 *mac) * If received mac address is NULL, NULL is returned. */ struct mwifiex_sta_node * -mwifiex_add_sta_entry(struct mwifiex_private *priv, u8 *mac) +mwifiex_add_sta_entry(struct mwifiex_private *priv, const u8 *mac) { struct mwifiex_sta_node *node; unsigned long flags; @@ -332,7 +332,7 @@ mwifiex_set_sta_ht_cap(struct mwifiex_private *priv, const u8 *ies, } /* This function will delete a station entry from station list */ -void mwifiex_del_sta_entry(struct mwifiex_private *priv, u8 *mac) +void mwifiex_del_sta_entry(struct mwifiex_private *priv, const u8 *mac) { struct mwifiex_sta_node *node; unsigned long flags; diff --git a/drivers/net/wireless/mwifiex/wmm.c b/drivers/net/wireless/mwifiex/wmm.c index 0a7cc742aed7..c6bc5b3191d6 100644 --- a/drivers/net/wireless/mwifiex/wmm.c +++ b/drivers/net/wireless/mwifiex/wmm.c @@ -92,7 +92,7 @@ mwifiex_wmm_ac_debug_print(const struct ieee_types_wmm_ac_parameters *ac_param) * The function also initializes the list with the provided RA. */ static struct mwifiex_ra_list_tbl * -mwifiex_wmm_allocate_ralist_node(struct mwifiex_adapter *adapter, u8 *ra) +mwifiex_wmm_allocate_ralist_node(struct mwifiex_adapter *adapter, const u8 *ra) { struct mwifiex_ra_list_tbl *ra_list; @@ -139,8 +139,7 @@ static u8 mwifiex_get_random_ba_threshold(void) * This function allocates and adds a RA list for all TIDs * with the given RA. */ -void -mwifiex_ralist_add(struct mwifiex_private *priv, u8 *ra) +void mwifiex_ralist_add(struct mwifiex_private *priv, const u8 *ra) { int i; struct mwifiex_ra_list_tbl *ra_list; @@ -575,7 +574,7 @@ mwifiex_clean_txrx(struct mwifiex_private *priv) */ static struct mwifiex_ra_list_tbl * mwifiex_wmm_get_ralist_node(struct mwifiex_private *priv, u8 tid, - u8 *ra_addr) + const u8 *ra_addr) { struct mwifiex_ra_list_tbl *ra_list; @@ -596,7 +595,8 @@ mwifiex_wmm_get_ralist_node(struct mwifiex_private *priv, u8 tid, * retrieved. */ struct mwifiex_ra_list_tbl * -mwifiex_wmm_get_queue_raptr(struct mwifiex_private *priv, u8 tid, u8 *ra_addr) +mwifiex_wmm_get_queue_raptr(struct mwifiex_private *priv, u8 tid, + const u8 *ra_addr) { struct mwifiex_ra_list_tbl *ra_list; diff --git a/drivers/net/wireless/mwifiex/wmm.h b/drivers/net/wireless/mwifiex/wmm.h index 83e42083ebff..eca56e371a57 100644 --- a/drivers/net/wireless/mwifiex/wmm.h +++ b/drivers/net/wireless/mwifiex/wmm.h @@ -99,7 +99,7 @@ mwifiex_wmm_is_ra_list_empty(struct list_head *ra_list_hhead) void mwifiex_wmm_add_buf_txqueue(struct mwifiex_private *priv, struct sk_buff *skb); -void mwifiex_ralist_add(struct mwifiex_private *priv, u8 *ra); +void mwifiex_ralist_add(struct mwifiex_private *priv, const u8 *ra); void mwifiex_rotate_priolists(struct mwifiex_private *priv, struct mwifiex_ra_list_tbl *ra, int tid); @@ -123,7 +123,8 @@ void mwifiex_wmm_setup_ac_downgrade(struct mwifiex_private *priv); int mwifiex_ret_wmm_get_status(struct mwifiex_private *priv, const struct host_cmd_ds_command *resp); struct mwifiex_ra_list_tbl * -mwifiex_wmm_get_queue_raptr(struct mwifiex_private *priv, u8 tid, u8 *ra_addr); +mwifiex_wmm_get_queue_raptr(struct mwifiex_private *priv, u8 tid, + const u8 *ra_addr); u8 mwifiex_wmm_downgrade_tid(struct mwifiex_private *priv, u32 tid); #endif /* !_MWIFIEX_WMM_H_ */ diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c index 39d22a154341..d2a9a08210be 100644 --- a/drivers/net/wireless/rndis_wlan.c +++ b/drivers/net/wireless/rndis_wlan.c @@ -517,7 +517,7 @@ static int rndis_set_default_key(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, bool unicast, bool multicast); static int rndis_get_station(struct wiphy *wiphy, struct net_device *dev, - u8 *mac, struct station_info *sinfo); + const u8 *mac, struct station_info *sinfo); static int rndis_dump_station(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *mac, struct station_info *sinfo); @@ -2490,7 +2490,7 @@ static void rndis_fill_station_info(struct usbnet *usbdev, } static int rndis_get_station(struct wiphy *wiphy, struct net_device *dev, - u8 *mac, struct station_info *sinfo) + const u8 *mac, struct station_info *sinfo) { struct rndis_wlan_private *priv = wiphy_priv(wiphy); struct usbnet *usbdev = priv->usbdev; diff --git a/drivers/staging/wlan-ng/cfg80211.c b/drivers/staging/wlan-ng/cfg80211.c index f76f95c29617..f7eb8163d4fb 100644 --- a/drivers/staging/wlan-ng/cfg80211.c +++ b/drivers/staging/wlan-ng/cfg80211.c @@ -298,7 +298,7 @@ static int prism2_set_default_key(struct wiphy *wiphy, struct net_device *dev, static int prism2_get_station(struct wiphy *wiphy, struct net_device *dev, - u8 *mac, struct station_info *sinfo) + const u8 *mac, struct station_info *sinfo) { wlandevice_t *wlandev = dev->ml_priv; struct p80211msg_lnxreq_commsquality quality; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 955fdec5a1b6..d4a602b92edf 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2345,28 +2345,29 @@ struct cfg80211_ops { int (*add_station)(struct wiphy *wiphy, struct net_device *dev, - u8 *mac, struct station_parameters *params); + const u8 *mac, + struct station_parameters *params); int (*del_station)(struct wiphy *wiphy, struct net_device *dev, - u8 *mac); + const u8 *mac); int (*change_station)(struct wiphy *wiphy, struct net_device *dev, - u8 *mac, struct station_parameters *params); + const u8 *mac, + struct station_parameters *params); int (*get_station)(struct wiphy *wiphy, struct net_device *dev, - u8 *mac, struct station_info *sinfo); + const u8 *mac, struct station_info *sinfo); int (*dump_station)(struct wiphy *wiphy, struct net_device *dev, - int idx, u8 *mac, struct station_info *sinfo); + int idx, u8 *mac, struct station_info *sinfo); int (*add_mpath)(struct wiphy *wiphy, struct net_device *dev, - u8 *dst, u8 *next_hop); + const u8 *dst, const u8 *next_hop); int (*del_mpath)(struct wiphy *wiphy, struct net_device *dev, - u8 *dst); + const u8 *dst); int (*change_mpath)(struct wiphy *wiphy, struct net_device *dev, - u8 *dst, u8 *next_hop); + const u8 *dst, const u8 *next_hop); int (*get_mpath)(struct wiphy *wiphy, struct net_device *dev, - u8 *dst, u8 *next_hop, - struct mpath_info *pinfo); + u8 *dst, u8 *next_hop, struct mpath_info *pinfo); int (*dump_mpath)(struct wiphy *wiphy, struct net_device *dev, - int idx, u8 *dst, u8 *next_hop, - struct mpath_info *pinfo); + int idx, u8 *dst, u8 *next_hop, + struct mpath_info *pinfo); int (*get_mesh_config)(struct wiphy *wiphy, struct net_device *dev, struct mesh_config *conf); @@ -2496,11 +2497,11 @@ struct cfg80211_ops { struct cfg80211_gtk_rekey_data *data); int (*tdls_mgmt)(struct wiphy *wiphy, struct net_device *dev, - u8 *peer, u8 action_code, u8 dialog_token, + const u8 *peer, u8 action_code, u8 dialog_token, u16 status_code, u32 peer_capability, const u8 *buf, size_t len); int (*tdls_oper)(struct wiphy *wiphy, struct net_device *dev, - u8 *peer, enum nl80211_tdls_operation oper); + const u8 *peer, enum nl80211_tdls_operation oper); int (*probe_client)(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, u64 *cookie); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index bfd2534e5a4d..ac45304590d8 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -777,7 +777,7 @@ static void ieee80211_get_et_strings(struct wiphy *wiphy, } static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, - int idx, u8 *mac, struct station_info *sinfo) + int idx, u8 *mac, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -807,7 +807,7 @@ static int ieee80211_dump_survey(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, - u8 *mac, struct station_info *sinfo) + const u8 *mac, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -1457,7 +1457,8 @@ static int sta_apply_parameters(struct ieee80211_local *local, } static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, - u8 *mac, struct station_parameters *params) + const u8 *mac, + struct station_parameters *params) { struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; @@ -1526,7 +1527,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, - u8 *mac) + const u8 *mac) { struct ieee80211_sub_if_data *sdata; @@ -1540,7 +1541,7 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_change_station(struct wiphy *wiphy, - struct net_device *dev, u8 *mac, + struct net_device *dev, const u8 *mac, struct station_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -1665,7 +1666,7 @@ out_err: #ifdef CONFIG_MAC80211_MESH static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, - u8 *dst, u8 *next_hop) + const u8 *dst, const u8 *next_hop) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; @@ -1693,7 +1694,7 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev, - u8 *dst) + const u8 *dst) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -1704,9 +1705,8 @@ static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev, return 0; } -static int ieee80211_change_mpath(struct wiphy *wiphy, - struct net_device *dev, - u8 *dst, u8 *next_hop) +static int ieee80211_change_mpath(struct wiphy *wiphy, struct net_device *dev, + const u8 *dst, const u8 *next_hop) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; @@ -1798,8 +1798,8 @@ static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev, - int idx, u8 *dst, u8 *next_hop, - struct mpath_info *pinfo) + int idx, u8 *dst, u8 *next_hop, + struct mpath_info *pinfo) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 57e0b2682cef..ed2b817d5ece 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1839,11 +1839,11 @@ int ieee80211_max_num_channels(struct ieee80211_local *local); /* TDLS */ int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, - u8 *peer, u8 action_code, u8 dialog_token, + const u8 *peer, u8 action_code, u8 dialog_token, u16 status_code, u32 peer_capability, const u8 *extra_ies, size_t extra_ies_len); int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, - u8 *peer, enum nl80211_tdls_operation oper); + const u8 *peer, enum nl80211_tdls_operation oper); #ifdef CONFIG_MAC80211_NOINLINE diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 8e14e2aaea11..652813b2d3df 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -40,8 +40,8 @@ static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata) return capab; } -static void ieee80211_tdls_add_link_ie(struct sk_buff *skb, u8 *src_addr, - u8 *peer, u8 *bssid) +static void ieee80211_tdls_add_link_ie(struct sk_buff *skb, const u8 *src_addr, + const u8 *peer, const u8 *bssid) { struct ieee80211_tdls_lnkie *lnkid; @@ -57,7 +57,7 @@ static void ieee80211_tdls_add_link_ie(struct sk_buff *skb, u8 *src_addr, static int ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, - u8 *peer, u8 action_code, u8 dialog_token, + const u8 *peer, u8 action_code, u8 dialog_token, u16 status_code, struct sk_buff *skb) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -130,7 +130,7 @@ ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev, - u8 *peer, u8 action_code, u8 dialog_token, + const u8 *peer, u8 action_code, u8 dialog_token, u16 status_code, struct sk_buff *skb) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -169,7 +169,7 @@ ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev, } int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, - u8 *peer, u8 action_code, u8 dialog_token, + const u8 *peer, u8 action_code, u8 dialog_token, u16 status_code, u32 peer_capability, const u8 *extra_ies, size_t extra_ies_len) { @@ -285,7 +285,7 @@ fail: } int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, - u8 *peer, enum nl80211_tdls_operation oper) + const u8 *peer, enum nl80211_tdls_operation oper) { struct sta_info *sta; struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); -- cgit v1.2.3-71-gd317 From c1e5f4714d591cc0a5e986613fdefa61abe98ac2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 19 May 2014 17:53:16 +0200 Subject: cfg80211: constify more pointers in the cfg80211 API This also propagates through the drivers. The orinoco driver uses the cfg80211 API structs for internal bookkeeping, and so needs a (void *) cast that removes the const - but that's OK because it allocates those pointers. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/wmi.c | 2 +- drivers/net/wireless/ath/ath6kl/wmi.h | 2 +- drivers/net/wireless/libertas/cfg.c | 5 ++--- drivers/net/wireless/libertas/defs.h | 3 ++- drivers/net/wireless/orinoco/hw.c | 4 ++-- drivers/net/wireless/orinoco/hw.h | 4 ++-- drivers/net/wireless/orinoco/wext.c | 4 ++-- drivers/staging/wlan-ng/cfg80211.c | 2 +- include/net/cfg80211.h | 23 ++++++++++++----------- net/wireless/ibss.c | 2 +- net/wireless/sme.c | 2 +- net/wireless/util.c | 3 ++- 12 files changed, 29 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath/ath6kl/wmi.c b/drivers/net/wireless/ath/ath6kl/wmi.c index 8b4ce28e3ce8..051094604e21 100644 --- a/drivers/net/wireless/ath/ath6kl/wmi.c +++ b/drivers/net/wireless/ath/ath6kl/wmi.c @@ -2322,7 +2322,7 @@ int ath6kl_wmi_addkey_cmd(struct wmi *wmi, u8 if_idx, u8 key_index, return ret; } -int ath6kl_wmi_add_krk_cmd(struct wmi *wmi, u8 if_idx, u8 *krk) +int ath6kl_wmi_add_krk_cmd(struct wmi *wmi, u8 if_idx, const u8 *krk) { struct sk_buff *skb; struct wmi_add_krk_cmd *cmd; diff --git a/drivers/net/wireless/ath/ath6kl/wmi.h b/drivers/net/wireless/ath/ath6kl/wmi.h index b5f226503baf..39ee35e60c1d 100644 --- a/drivers/net/wireless/ath/ath6kl/wmi.h +++ b/drivers/net/wireless/ath/ath6kl/wmi.h @@ -2617,7 +2617,7 @@ int ath6kl_wmi_addkey_cmd(struct wmi *wmi, u8 if_idx, u8 key_index, u8 *key_material, u8 key_op_ctrl, u8 *mac_addr, enum wmi_sync_flag sync_flag); -int ath6kl_wmi_add_krk_cmd(struct wmi *wmi, u8 if_idx, u8 *krk); +int ath6kl_wmi_add_krk_cmd(struct wmi *wmi, u8 if_idx, const u8 *krk); int ath6kl_wmi_deletekey_cmd(struct wmi *wmi, u8 if_idx, u8 key_index); int ath6kl_wmi_setpmkid_cmd(struct wmi *wmi, u8 if_idx, const u8 *bssid, const u8 *pmkid, bool set); diff --git a/drivers/net/wireless/libertas/cfg.c b/drivers/net/wireless/libertas/cfg.c index e4d7031df05c..47a998d8f99e 100644 --- a/drivers/net/wireless/libertas/cfg.c +++ b/drivers/net/wireless/libertas/cfg.c @@ -1006,9 +1006,8 @@ struct cmd_key_material { } __packed; static int lbs_set_key_material(struct lbs_private *priv, - int key_type, - int key_info, - u8 *key, u16 key_len) + int key_type, int key_info, + const u8 *key, u16 key_len) { struct cmd_key_material cmd; int ret; diff --git a/drivers/net/wireless/libertas/defs.h b/drivers/net/wireless/libertas/defs.h index ab966f08024a..407784aca627 100644 --- a/drivers/net/wireless/libertas/defs.h +++ b/drivers/net/wireless/libertas/defs.h @@ -90,7 +90,8 @@ do { if ((lbs_debug & (grp)) == (grp)) \ #define lbs_deb_cfg80211(fmt, args...) LBS_DEB_LL(LBS_DEB_CFG80211, " cfg80211", fmt, ##args) #ifdef DEBUG -static inline void lbs_deb_hex(unsigned int grp, const char *prompt, u8 *buf, int len) +static inline void lbs_deb_hex(unsigned int grp, const char *prompt, + const u8 *buf, int len) { int i = 0; diff --git a/drivers/net/wireless/orinoco/hw.c b/drivers/net/wireless/orinoco/hw.c index 49300d04efdf..e27e32851f1e 100644 --- a/drivers/net/wireless/orinoco/hw.c +++ b/drivers/net/wireless/orinoco/hw.c @@ -988,8 +988,8 @@ int __orinoco_hw_setup_enc(struct orinoco_private *priv) * tsc must be NULL or up to 8 bytes */ int __orinoco_hw_set_tkip_key(struct orinoco_private *priv, int key_idx, - int set_tx, u8 *key, u8 *rsc, size_t rsc_len, - u8 *tsc, size_t tsc_len) + int set_tx, const u8 *key, const u8 *rsc, + size_t rsc_len, const u8 *tsc, size_t tsc_len) { struct { __le16 idx; diff --git a/drivers/net/wireless/orinoco/hw.h b/drivers/net/wireless/orinoco/hw.h index 8f6831f4e328..466d1ede76f1 100644 --- a/drivers/net/wireless/orinoco/hw.h +++ b/drivers/net/wireless/orinoco/hw.h @@ -38,8 +38,8 @@ int __orinoco_hw_set_wap(struct orinoco_private *priv); int __orinoco_hw_setup_wepkeys(struct orinoco_private *priv); int __orinoco_hw_setup_enc(struct orinoco_private *priv); int __orinoco_hw_set_tkip_key(struct orinoco_private *priv, int key_idx, - int set_tx, u8 *key, u8 *rsc, size_t rsc_len, - u8 *tsc, size_t tsc_len); + int set_tx, const u8 *key, const u8 *rsc, + size_t rsc_len, const u8 *tsc, size_t tsc_len); int orinoco_clear_tkip_key(struct orinoco_private *priv, int key_idx); int __orinoco_hw_set_multicast_list(struct orinoco_private *priv, struct net_device *dev, diff --git a/drivers/net/wireless/orinoco/wext.c b/drivers/net/wireless/orinoco/wext.c index b7a867b50b94..6abdaf0aa052 100644 --- a/drivers/net/wireless/orinoco/wext.c +++ b/drivers/net/wireless/orinoco/wext.c @@ -52,9 +52,9 @@ static int orinoco_set_key(struct orinoco_private *priv, int index, priv->keys[index].seq_len = seq_len; if (key_len) - memcpy(priv->keys[index].key, key, key_len); + memcpy((void *)priv->keys[index].key, key, key_len); if (seq_len) - memcpy(priv->keys[index].seq, seq, seq_len); + memcpy((void *)priv->keys[index].seq, seq, seq_len); switch (alg) { case ORINOCO_ALG_TKIP: diff --git a/drivers/staging/wlan-ng/cfg80211.c b/drivers/staging/wlan-ng/cfg80211.c index f7eb8163d4fb..723319ee08f3 100644 --- a/drivers/staging/wlan-ng/cfg80211.c +++ b/drivers/staging/wlan-ng/cfg80211.c @@ -84,7 +84,7 @@ static int prism2_domibset_uint32(wlandevice_t *wlandev, u32 did, u32 data) } static int prism2_domibset_pstr32(wlandevice_t *wlandev, - u32 did, u8 len, u8 *data) + u32 did, u8 len, const u8 *data) { struct p80211msg_dot11req_mibset msg; p80211item_pstr32_t *mibitem = diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d4a602b92edf..3299d1b731ef 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -341,8 +341,8 @@ struct vif_params { * @seq_len: length of @seq. */ struct key_params { - u8 *key; - u8 *seq; + const u8 *key; + const u8 *seq; int key_len; int seq_len; u32 cipher; @@ -1169,7 +1169,7 @@ struct bss_parameters { int use_cts_prot; int use_short_preamble; int use_short_slot_time; - u8 *basic_rates; + const u8 *basic_rates; u8 basic_rates_len; int ap_isolate; int ht_opmode; @@ -1699,10 +1699,10 @@ struct cfg80211_disassoc_request { * @ht_capa_mask: The bits of ht_capa which are to be used. */ struct cfg80211_ibss_params { - u8 *ssid; - u8 *bssid; + const u8 *ssid; + const u8 *bssid; struct cfg80211_chan_def chandef; - u8 *ie; + const u8 *ie; u8 ssid_len, ie_len; u16 beacon_interval; u32 basic_rates; @@ -1811,8 +1811,8 @@ struct cfg80211_bitrate_mask { * @pmkid: The PMK material itself. */ struct cfg80211_pmksa { - u8 *bssid; - u8 *pmkid; + const u8 *bssid; + const u8 *pmkid; }; /** @@ -3289,7 +3289,7 @@ struct wireless_dev { struct cfg80211_ibss_params ibss; struct cfg80211_connect_params connect; struct cfg80211_cached_keys *keys; - u8 *ie; + const u8 *ie; size_t ie_len; u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; u8 ssid[IEEE80211_MAX_SSID_LEN]; @@ -3530,7 +3530,8 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, * Return: 0 on success, or a negative error code. */ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, - enum nl80211_iftype iftype, u8 *bssid, bool qos); + enum nl80211_iftype iftype, const u8 *bssid, + bool qos); /** * ieee80211_amsdu_to_8023s - decode an IEEE 802.11n A-MSDU frame @@ -4319,7 +4320,7 @@ void cfg80211_roamed_bss(struct net_device *dev, struct cfg80211_bss *bss, * and not try to connect to any AP any more. */ void cfg80211_disconnected(struct net_device *dev, u16 reason, - u8 *ie, size_t ie_len, gfp_t gfp); + const u8 *ie, size_t ie_len, gfp_t gfp); /** * cfg80211_ready_on_channel - notification of remain_on_channel start diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 6b50588b709f..8f345da3ea5f 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -420,8 +420,8 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev, if (len > 0 && ssid[len - 1] == '\0') len--; + memcpy(wdev->ssid, ssid, len); wdev->wext.ibss.ssid = wdev->ssid; - memcpy(wdev->wext.ibss.ssid, ssid, len); wdev->wext.ibss.ssid_len = len; wdev_lock(wdev); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 4bc21a2b1989..ea701bbacc6a 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -879,7 +879,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, } void cfg80211_disconnected(struct net_device *dev, u16 reason, - u8 *ie, size_t ie_len, gfp_t gfp) + const u8 *ie, size_t ie_len, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); diff --git a/net/wireless/util.c b/net/wireless/util.c index 8c61d5c6fad3..fa61ac9c9b26 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -476,7 +476,8 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, EXPORT_SYMBOL(ieee80211_data_to_8023); int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, - enum nl80211_iftype iftype, u8 *bssid, bool qos) + enum nl80211_iftype iftype, + const u8 *bssid, bool qos) { struct ieee80211_hdr hdr; u16 hdrlen, ethertype; -- cgit v1.2.3-71-gd317 From 922bd80fc33b5b90eb34b1485ebcf3c7b2e61618 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 19 May 2014 17:59:50 +0200 Subject: cfg80211: constify wowlan/coalesce mask/pattern pointers This requires changing the nl80211 parsing code a bit to use intermediate pointers for the allocation, but clarifies the API towards the drivers. Signed-off-by: Johannes Berg --- drivers/net/wireless/ti/wlcore/main.c | 2 +- drivers/net/wireless/ti/wlcore/wlcore_i.h | 4 ++-- include/net/cfg80211.h | 2 +- net/wireless/nl80211.c | 39 +++++++++++++++++-------------- 4 files changed, 26 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 077eb5b9cd74..02c91d6db753 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -1416,7 +1416,7 @@ void wl1271_rx_filter_free(struct wl12xx_rx_filter *filter) int wl1271_rx_filter_alloc_field(struct wl12xx_rx_filter *filter, u16 offset, u8 flags, - u8 *pattern, u8 len) + const u8 *pattern, u8 len) { struct wl12xx_rx_filter_field *field; diff --git a/drivers/net/wireless/ti/wlcore/wlcore_i.h b/drivers/net/wireless/ti/wlcore/wlcore_i.h index 756e890bc5ee..c2c34a84ff3d 100644 --- a/drivers/net/wireless/ti/wlcore/wlcore_i.h +++ b/drivers/net/wireless/ti/wlcore/wlcore_i.h @@ -512,8 +512,8 @@ int wl1271_recalc_rx_streaming(struct wl1271 *wl, struct wl12xx_vif *wlvif); void wl12xx_queue_recovery_work(struct wl1271 *wl); size_t wl12xx_copy_fwlog(struct wl1271 *wl, u8 *memblock, size_t maxlen); int wl1271_rx_filter_alloc_field(struct wl12xx_rx_filter *filter, - u16 offset, u8 flags, - u8 *pattern, u8 len); + u16 offset, u8 flags, + const u8 *pattern, u8 len); void wl1271_rx_filter_free(struct wl12xx_rx_filter *filter); struct wl12xx_rx_filter *wl1271_rx_filter_alloc(void); int wl1271_rx_filter_get_fields_size(struct wl12xx_rx_filter *filter); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 3299d1b731ef..fe4fa287f788 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1827,7 +1827,7 @@ struct cfg80211_pmksa { * memory, free @mask only! */ struct cfg80211_pkt_pattern { - u8 *mask, *pattern; + const u8 *mask, *pattern; int pattern_len; int pkt_offset; }; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ca19b1520389..49adf58646e6 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -8554,6 +8554,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN], rem) { + u8 *mask_pat; + nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat), nla_len(pat), NULL); err = -EINVAL; @@ -8577,19 +8579,18 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) goto error; new_triggers.patterns[i].pkt_offset = pkt_offset; - new_triggers.patterns[i].mask = - kmalloc(mask_len + pat_len, GFP_KERNEL); - if (!new_triggers.patterns[i].mask) { + mask_pat = kmalloc(mask_len + pat_len, GFP_KERNEL); + if (!mask_pat) { err = -ENOMEM; goto error; } - new_triggers.patterns[i].pattern = - new_triggers.patterns[i].mask + mask_len; - memcpy(new_triggers.patterns[i].mask, - nla_data(pat_tb[NL80211_PKTPAT_MASK]), + new_triggers.patterns[i].mask = mask_pat; + memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_MASK]), mask_len); + mask_pat += mask_len; + new_triggers.patterns[i].pattern = mask_pat; new_triggers.patterns[i].pattern_len = pat_len; - memcpy(new_triggers.patterns[i].pattern, + memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_PATTERN]), pat_len); i++; @@ -8781,6 +8782,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, nla_for_each_nested(pat, tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN], rem) { + u8 *mask_pat; + nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat), nla_len(pat), NULL); if (!pat_tb[NL80211_PKTPAT_MASK] || @@ -8802,17 +8805,19 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, return -EINVAL; new_rule->patterns[i].pkt_offset = pkt_offset; - new_rule->patterns[i].mask = - kmalloc(mask_len + pat_len, GFP_KERNEL); - if (!new_rule->patterns[i].mask) + mask_pat = kmalloc(mask_len + pat_len, GFP_KERNEL); + if (!mask_pat) return -ENOMEM; - new_rule->patterns[i].pattern = - new_rule->patterns[i].mask + mask_len; - memcpy(new_rule->patterns[i].mask, - nla_data(pat_tb[NL80211_PKTPAT_MASK]), mask_len); + + new_rule->patterns[i].mask = mask_pat; + memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_MASK]), + mask_len); + + mask_pat += mask_len; + new_rule->patterns[i].pattern = mask_pat; new_rule->patterns[i].pattern_len = pat_len; - memcpy(new_rule->patterns[i].pattern, - nla_data(pat_tb[NL80211_PKTPAT_PATTERN]), pat_len); + memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_PATTERN]), + pat_len); i++; } -- cgit v1.2.3-71-gd317 From b54197c43db88f4436717f554d623189cddce29e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 May 2014 16:51:50 +0200 Subject: virtio_scsi: use cmd_size Taken almost entirely from Nicholas Bellinger's scsi-mq conversion. Signed-off-by: Christoph Hellwig Acked-by: Paolo Bonzini Reviewed-by: Nicholas Bellinger --- drivers/scsi/virtio_scsi.c | 25 +++++++------------------ include/scsi/scsi_cmnd.h | 9 +++++++++ 2 files changed, 16 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index fc054935eb1f..d4727b339474 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -199,7 +199,6 @@ static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf) set_driver_byte(sc, DRIVER_SENSE); } - mempool_free(cmd, virtscsi_cmd_pool); sc->scsi_done(sc); atomic_dec(&tgt->reqs); @@ -242,8 +241,6 @@ static void virtscsi_complete_free(struct virtio_scsi *vscsi, void *buf) if (cmd->comp) complete_all(cmd->comp); - else - mempool_free(cmd, virtscsi_cmd_pool); } static void virtscsi_ctrl_done(struct virtqueue *vq) @@ -459,10 +456,9 @@ static int virtscsi_queuecommand(struct virtio_scsi *vscsi, struct virtio_scsi_vq *req_vq, struct scsi_cmnd *sc) { - struct virtio_scsi_cmd *cmd; - int ret; - struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev); + struct virtio_scsi_cmd *cmd = scsi_cmd_priv(sc); + BUG_ON(scsi_sg_count(sc) > shost->sg_tablesize); /* TODO: check feature bit and fail if unsupported? */ @@ -471,11 +467,6 @@ static int virtscsi_queuecommand(struct virtio_scsi *vscsi, dev_dbg(&sc->device->sdev_gendev, "cmd %p CDB: %#02x\n", sc, sc->cmnd[0]); - ret = SCSI_MLQUEUE_HOST_BUSY; - cmd = mempool_alloc(virtscsi_cmd_pool, GFP_ATOMIC); - if (!cmd) - goto out; - memset(cmd, 0, sizeof(*cmd)); cmd->sc = sc; cmd->req.cmd = (struct virtio_scsi_cmd_req){ @@ -494,13 +485,9 @@ static int virtscsi_queuecommand(struct virtio_scsi *vscsi, if (virtscsi_kick_cmd(req_vq, cmd, sizeof cmd->req.cmd, sizeof cmd->resp.cmd, - GFP_ATOMIC) == 0) - ret = 0; - else - mempool_free(cmd, virtscsi_cmd_pool); - -out: - return ret; + GFP_ATOMIC) != 0) + return SCSI_MLQUEUE_HOST_BUSY; + return 0; } static int virtscsi_queuecommand_single(struct Scsi_Host *sh, @@ -642,6 +629,7 @@ static struct scsi_host_template virtscsi_host_template_single = { .name = "Virtio SCSI HBA", .proc_name = "virtio_scsi", .this_id = -1, + .cmd_size = sizeof(struct virtio_scsi_cmd), .queuecommand = virtscsi_queuecommand_single, .eh_abort_handler = virtscsi_abort, .eh_device_reset_handler = virtscsi_device_reset, @@ -658,6 +646,7 @@ static struct scsi_host_template virtscsi_host_template_multi = { .name = "Virtio SCSI HBA", .proc_name = "virtio_scsi", .this_id = -1, + .cmd_size = sizeof(struct virtio_scsi_cmd), .queuecommand = virtscsi_queuecommand_multi, .eh_abort_handler = virtscsi_abort, .eh_device_reset_handler = virtscsi_device_reset, diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index dd7c998221b3..e016e2ac38df 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -133,6 +133,15 @@ struct scsi_cmnd { unsigned char tag; /* SCSI-II queued command tag */ }; +/* + * Return the driver private allocation behind the command. + * Only works if cmd_size is set in the host template. + */ +static inline void *scsi_cmd_priv(struct scsi_cmnd *cmd) +{ + return cmd + 1; +} + /* make sure not to use it with REQ_TYPE_BLOCK_PC commands */ static inline struct scsi_driver *scsi_cmd_to_driver(struct scsi_cmnd *cmd) { -- cgit v1.2.3-71-gd317 From 5533e0114425dcdb878f11b291f2727af8667a7c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 May 2014 19:33:07 -0400 Subject: cgroup: disallow debug controller on the default hierarchy The debug controller, as its name suggests, exposes cgroup core internals to userland to aid debugging. Unfortunately, except for the name, there's no provision to prevent its usage in production configurations and the controller is widely enabled and mounted leaking internal details to userland. Like most other debug information, the information exposed by debug isn't interesting even for debugging itself once the related parts are working reliably. This controller has no reason for existing. This patch implements cgrp_dfl_root_inhibit_ss_mask which can suppress specific subsystems on the default hierarchy and adds the debug subsystem to it so that it can be gradually deprecated as usages move towards the unified hierarchy. Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 2 ++ include/linux/cgroup_subsys.h | 11 +++++++---- kernel/cgroup.c | 21 ++++++++++++++++++--- 3 files changed, 27 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 4afe544d3547..8a111dd42d7a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -305,6 +305,8 @@ enum { * the flag is not created. * * - blkcg: blk-throttle becomes properly hierarchical. + * + * - debug: disallowed on the default hierarchy. */ CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 768fe44e19f0..98c4f9b12b03 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -7,10 +7,6 @@ SUBSYS(cpuset) #endif -#if IS_ENABLED(CONFIG_CGROUP_DEBUG) -SUBSYS(debug) -#endif - #if IS_ENABLED(CONFIG_CGROUP_SCHED) SUBSYS(cpu) #endif @@ -50,6 +46,13 @@ SUBSYS(net_prio) #if IS_ENABLED(CONFIG_CGROUP_HUGETLB) SUBSYS(hugetlb) #endif + +/* + * The following subsystems are not supported on the default hierarchy. + */ +#if IS_ENABLED(CONFIG_CGROUP_DEBUG) +SUBSYS(debug) +#endif /* * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS. */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 082bb842b11a..a5f75ac4e793 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -148,6 +148,13 @@ struct cgroup_root cgrp_dfl_root; */ static bool cgrp_dfl_root_visible; +/* some controllers are not supported in the default hierarchy */ +static const unsigned int cgrp_dfl_root_inhibit_ss_mask = 0 +#ifdef CONFIG_CGROUP_DEBUG + | (1 << debug_cgrp_id) +#endif + ; + /* The list of hierarchy roots */ static LIST_HEAD(cgroup_roots); @@ -1126,6 +1133,7 @@ static void cgroup_clear_dir(struct cgroup *cgrp, unsigned int subsys_mask) static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask) { struct cgroup_subsys *ss; + unsigned int tmp_ss_mask; int ssid, i, ret; lockdep_assert_held(&cgroup_mutex); @@ -1143,7 +1151,12 @@ static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask) return -EBUSY; } - ret = cgroup_populate_dir(&dst_root->cgrp, ss_mask); + /* skip creating root files on dfl_root for inhibited subsystems */ + tmp_ss_mask = ss_mask; + if (dst_root == &cgrp_dfl_root) + tmp_ss_mask &= ~cgrp_dfl_root_inhibit_ss_mask; + + ret = cgroup_populate_dir(&dst_root->cgrp, tmp_ss_mask); if (ret) { if (dst_root != &cgrp_dfl_root) return ret; @@ -2426,7 +2439,8 @@ static int cgroup_root_controllers_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; - cgroup_print_ss_mask(seq, cgrp->root->subsys_mask); + cgroup_print_ss_mask(seq, cgrp->root->subsys_mask & + ~cgrp_dfl_root_inhibit_ss_mask); return 0; } @@ -2564,7 +2578,8 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, if (tok[0] == '\0') continue; for_each_subsys(ss, ssid) { - if (ss->disabled || strcmp(tok + 1, ss->name)) + if (ss->disabled || strcmp(tok + 1, ss->name) || + ((1 << ss->id) & cgrp_dfl_root_inhibit_ss_mask)) continue; if (*tok == '+') { -- cgit v1.2.3-71-gd317 From 4f4aa2ec24dc45881849833a439558d3a378028c Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Sun, 18 May 2014 00:22:38 +0200 Subject: ssb: sprom: add dev_id field for value overriding standard ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some devices may have different features despite sharing the same ID (e.g. PCI ID). For example 14e4:4331 is usually a dual band, but this can be "limited". Device with "pci/x/y/devid=0x4332" supports 2.4 GHz only. Similarly 0x4333 will mean support for 5 GHz only. Add entry in SPROM so info described above can be extracted and stored. Signed-off-by: Rafał Miłecki Acked-by: Hauke Mehrtens Signed-off-by: John W. Linville --- arch/mips/bcm47xx/sprom.c | 1 + include/linux/ssb/ssb.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/arch/mips/bcm47xx/sprom.c b/arch/mips/bcm47xx/sprom.c index a8b5408dd349..da4cdb16844e 100644 --- a/arch/mips/bcm47xx/sprom.c +++ b/arch/mips/bcm47xx/sprom.c @@ -168,6 +168,7 @@ static void nvram_read_alpha2(const char *prefix, const char *name, static void bcm47xx_fill_sprom_r1234589(struct ssb_sprom *sprom, const char *prefix, bool fallback) { + nvram_read_u16(prefix, NULL, "devid", &sprom->dev_id, 0, fallback); nvram_read_u8(prefix, NULL, "ledbh0", &sprom->gpio0, 0xff, fallback); nvram_read_u8(prefix, NULL, "ledbh1", &sprom->gpio1, 0xff, fallback); nvram_read_u8(prefix, NULL, "ledbh2", &sprom->gpio2, 0xff, fallback); diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index 07ef9b82b66d..4568a5cc9ab8 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -33,6 +33,7 @@ struct ssb_sprom { u8 et1phyaddr; /* MII address for enet1 */ u8 et0mdcport; /* MDIO for enet0 */ u8 et1mdcport; /* MDIO for enet1 */ + u16 dev_id; /* Device ID overriding e.g. PCI ID */ u16 board_rev; /* Board revision number from SPROM. */ u16 board_num; /* Board number from SPROM. */ u16 board_type; /* Board type from SPROM. */ -- cgit v1.2.3-71-gd317 From 57be1f3f3ec1ccab6432615ca161c4c9ece2a2aa Mon Sep 17 00:00:00 2001 From: Hiren Tandel Date: Mon, 5 May 2014 19:43:31 +0900 Subject: NFC: Add RAW socket type support for SOCKPROTO_RAW This allows for a more generic NFC sniffing by using SOCKPROTO_RAW SOCK_RAW to read RAW NFC frames. This is for sniffing anything but LLCP (HCI, NCI, etc...). Signed-off-by: Hiren Tandel Signed-off-by: Rahul Tank Signed-off-by: Samuel Ortiz --- include/net/nfc/nfc.h | 3 ++ include/uapi/linux/nfc.h | 16 ++++++--- net/nfc/llcp_commands.c | 2 +- net/nfc/llcp_core.c | 11 +++--- net/nfc/nfc.h | 6 ++++ net/nfc/rawsock.c | 94 +++++++++++++++++++++++++++++++++++++++++++++--- 6 files changed, 117 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 2e8b40c16274..6c583e244de2 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -264,4 +264,7 @@ int nfc_add_se(struct nfc_dev *dev, u32 se_idx, u16 type); int nfc_remove_se(struct nfc_dev *dev, u32 se_idx); struct nfc_se *nfc_find_se(struct nfc_dev *dev, u32 se_idx); +void nfc_send_to_raw_sock(struct nfc_dev *dev, struct sk_buff *skb, + u8 payload_type, u8 direction); + #endif /* __NET_NFC_H */ diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index 9789dc95b6a8..9b19b4461928 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -273,11 +273,19 @@ struct sockaddr_nfc_llcp { * First byte is the adapter index * Second byte contains flags * - 0x01 - Direction (0=RX, 1=TX) - * - 0x02-0x80 - Reserved + * - 0x02-0x04 - Payload type (000=LLCP, 001=NCI, 010=HCI, 011=Digital, + * 100=Proprietary) + * - 0x05-0x80 - Reserved **/ -#define NFC_LLCP_RAW_HEADER_SIZE 2 -#define NFC_LLCP_DIRECTION_RX 0x00 -#define NFC_LLCP_DIRECTION_TX 0x01 +#define NFC_RAW_HEADER_SIZE 2 +#define NFC_DIRECTION_RX 0x00 +#define NFC_DIRECTION_TX 0x01 + +#define RAW_PAYLOAD_LLCP 0 +#define RAW_PAYLOAD_NCI 1 +#define RAW_PAYLOAD_HCI 2 +#define RAW_PAYLOAD_DIGITAL 3 +#define RAW_PAYLOAD_PROPRIETARY 4 /* socket option names */ #define NFC_LLCP_RW 0 diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index bec6ed15f503..a3ad69a4c648 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -387,7 +387,7 @@ int nfc_llcp_send_symm(struct nfc_dev *dev) __net_timestamp(skb); - nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_TX); + nfc_llcp_send_to_raw_sock(local, skb, NFC_DIRECTION_TX); return nfc_data_exchange(dev, local->target_idx, skb, nfc_llcp_recv, local); diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index b4671958fcf9..f6278da68763 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -680,16 +680,17 @@ void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local, continue; if (skb_copy == NULL) { - skb_copy = __pskb_copy(skb, NFC_LLCP_RAW_HEADER_SIZE, + skb_copy = __pskb_copy(skb, NFC_RAW_HEADER_SIZE, GFP_ATOMIC); if (skb_copy == NULL) continue; - data = skb_push(skb_copy, NFC_LLCP_RAW_HEADER_SIZE); + data = skb_push(skb_copy, NFC_RAW_HEADER_SIZE); data[0] = local->dev ? local->dev->idx : 0xFF; - data[1] = direction; + data[1] = direction & 0x01; + data[1] |= (RAW_PAYLOAD_LLCP << 1); } nskb = skb_clone(skb_copy, GFP_ATOMIC); @@ -747,7 +748,7 @@ static void nfc_llcp_tx_work(struct work_struct *work) __net_timestamp(skb); nfc_llcp_send_to_raw_sock(local, skb, - NFC_LLCP_DIRECTION_TX); + NFC_DIRECTION_TX); ret = nfc_data_exchange(local->dev, local->target_idx, skb, nfc_llcp_recv, local); @@ -1476,7 +1477,7 @@ static void nfc_llcp_rx_work(struct work_struct *work) __net_timestamp(skb); - nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_RX); + nfc_llcp_send_to_raw_sock(local, skb, NFC_DIRECTION_RX); nfc_llcp_rx_skb(local, skb); diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index 9d6e74f7e6b3..88d60064890e 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -40,6 +40,12 @@ struct nfc_rawsock { struct work_struct tx_work; bool tx_work_scheduled; }; + +struct nfc_sock_list { + struct hlist_head head; + rwlock_t lock; +}; + #define nfc_rawsock(sk) ((struct nfc_rawsock *) sk) #define to_rawsock_sk(_tx_work) \ ((struct sock *) container_of(_tx_work, struct nfc_rawsock, tx_work)) diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index c27a6e86cae4..8627c75063e2 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -27,6 +27,24 @@ #include "nfc.h" +static struct nfc_sock_list raw_sk_list = { + .lock = __RW_LOCK_UNLOCKED(raw_sk_list.lock) +}; + +void nfc_sock_link(struct nfc_sock_list *l, struct sock *sk) +{ + write_lock(&l->lock); + sk_add_node(sk, &l->head); + write_unlock(&l->lock); +} + +void nfc_sock_unlink(struct nfc_sock_list *l, struct sock *sk) +{ + write_lock(&l->lock); + sk_del_node_init(sk); + write_unlock(&l->lock); +} + static void rawsock_write_queue_purge(struct sock *sk) { pr_debug("sk=%p\n", sk); @@ -57,6 +75,9 @@ static int rawsock_release(struct socket *sock) if (!sk) return 0; + if (sock->type == SOCK_RAW) + nfc_sock_unlink(&raw_sk_list, sk); + sock_orphan(sk); sock_put(sk); @@ -275,6 +296,26 @@ static const struct proto_ops rawsock_ops = { .mmap = sock_no_mmap, }; +static const struct proto_ops rawsock_raw_ops = { + .family = PF_NFC, + .owner = THIS_MODULE, + .release = rawsock_release, + .bind = sock_no_bind, + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = sock_no_getname, + .poll = datagram_poll, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = sock_no_sendmsg, + .recvmsg = rawsock_recvmsg, + .mmap = sock_no_mmap, +}; + static void rawsock_destruct(struct sock *sk) { pr_debug("sk=%p\n", sk); @@ -300,10 +341,13 @@ static int rawsock_create(struct net *net, struct socket *sock, pr_debug("sock=%p\n", sock); - if (sock->type != SOCK_SEQPACKET) + if ((sock->type != SOCK_SEQPACKET) && (sock->type != SOCK_RAW)) return -ESOCKTNOSUPPORT; - sock->ops = &rawsock_ops; + if (sock->type == SOCK_RAW) + sock->ops = &rawsock_raw_ops; + else + sock->ops = &rawsock_ops; sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto); if (!sk) @@ -313,13 +357,53 @@ static int rawsock_create(struct net *net, struct socket *sock, sk->sk_protocol = nfc_proto->id; sk->sk_destruct = rawsock_destruct; sock->state = SS_UNCONNECTED; - - INIT_WORK(&nfc_rawsock(sk)->tx_work, rawsock_tx_work); - nfc_rawsock(sk)->tx_work_scheduled = false; + if (sock->type == SOCK_RAW) + nfc_sock_link(&raw_sk_list, sk); + else { + INIT_WORK(&nfc_rawsock(sk)->tx_work, rawsock_tx_work); + nfc_rawsock(sk)->tx_work_scheduled = false; + } return 0; } +void nfc_send_to_raw_sock(struct nfc_dev *dev, struct sk_buff *skb, + u8 payload_type, u8 direction) +{ + struct sk_buff *skb_copy = NULL, *nskb; + struct sock *sk; + u8 *data; + + read_lock(&raw_sk_list.lock); + + sk_for_each(sk, &raw_sk_list.head) { + if (!skb_copy) { + skb_copy = __pskb_copy(skb, NFC_RAW_HEADER_SIZE, + GFP_ATOMIC); + if (!skb_copy) + continue; + + data = skb_push(skb_copy, NFC_RAW_HEADER_SIZE); + + data[0] = dev ? dev->idx : 0xFF; + data[1] = direction & 0x01; + data[1] |= (payload_type << 1); + } + + nskb = skb_clone(skb_copy, GFP_ATOMIC); + if (!nskb) + continue; + + if (sock_queue_rcv_skb(sk, nskb)) + kfree_skb(nskb); + } + + read_unlock(&raw_sk_list.lock); + + kfree_skb(skb_copy); +} +EXPORT_SYMBOL(nfc_send_to_raw_sock); + static struct proto rawsock_proto = { .name = "NFC_RAW", .owner = THIS_MODULE, -- cgit v1.2.3-71-gd317 From 867d849fc844623a88ec7b380442952b5ffe5e68 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Mon, 19 May 2014 21:53:19 +0200 Subject: cfg80211: export expected throughput through get_station() Users may need information about the expected throughput towards a given peer. This value is supposed to consider the size overhead generated by the 802.11 header. This value is exported in kbps through the get_station() API by including it into the station_info object. Moreover, it is sent to user space when replying to the nl80211 GET_STATION command. This information will be useful to the batman-adv module which will use it for its new metric computation. Signed-off-by: Antonio Quartulli Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 62 ++++++++++++++++++++++++-------------------- include/uapi/linux/nl80211.h | 3 +++ net/wireless/nl80211.c | 4 +++ 3 files changed, 41 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index fe4fa287f788..857d6476a128 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -873,36 +873,38 @@ int cfg80211_check_station_change(struct wiphy *wiphy, * @STATION_INFO_NONPEER_PM: @nonpeer_pm filled * @STATION_INFO_CHAIN_SIGNAL: @chain_signal filled * @STATION_INFO_CHAIN_SIGNAL_AVG: @chain_signal_avg filled + * @STATION_INFO_EXPECTED_THROUGHPUT: @expected_throughput filled */ enum station_info_flags { - STATION_INFO_INACTIVE_TIME = 1<<0, - STATION_INFO_RX_BYTES = 1<<1, - STATION_INFO_TX_BYTES = 1<<2, - STATION_INFO_LLID = 1<<3, - STATION_INFO_PLID = 1<<4, - STATION_INFO_PLINK_STATE = 1<<5, - STATION_INFO_SIGNAL = 1<<6, - STATION_INFO_TX_BITRATE = 1<<7, - STATION_INFO_RX_PACKETS = 1<<8, - STATION_INFO_TX_PACKETS = 1<<9, - STATION_INFO_TX_RETRIES = 1<<10, - STATION_INFO_TX_FAILED = 1<<11, - STATION_INFO_RX_DROP_MISC = 1<<12, - STATION_INFO_SIGNAL_AVG = 1<<13, - STATION_INFO_RX_BITRATE = 1<<14, - STATION_INFO_BSS_PARAM = 1<<15, - STATION_INFO_CONNECTED_TIME = 1<<16, - STATION_INFO_ASSOC_REQ_IES = 1<<17, - STATION_INFO_STA_FLAGS = 1<<18, - STATION_INFO_BEACON_LOSS_COUNT = 1<<19, - STATION_INFO_T_OFFSET = 1<<20, - STATION_INFO_LOCAL_PM = 1<<21, - STATION_INFO_PEER_PM = 1<<22, - STATION_INFO_NONPEER_PM = 1<<23, - STATION_INFO_RX_BYTES64 = 1<<24, - STATION_INFO_TX_BYTES64 = 1<<25, - STATION_INFO_CHAIN_SIGNAL = 1<<26, - STATION_INFO_CHAIN_SIGNAL_AVG = 1<<27, + STATION_INFO_INACTIVE_TIME = BIT(0), + STATION_INFO_RX_BYTES = BIT(1), + STATION_INFO_TX_BYTES = BIT(2), + STATION_INFO_LLID = BIT(3), + STATION_INFO_PLID = BIT(4), + STATION_INFO_PLINK_STATE = BIT(5), + STATION_INFO_SIGNAL = BIT(6), + STATION_INFO_TX_BITRATE = BIT(7), + STATION_INFO_RX_PACKETS = BIT(8), + STATION_INFO_TX_PACKETS = BIT(9), + STATION_INFO_TX_RETRIES = BIT(10), + STATION_INFO_TX_FAILED = BIT(11), + STATION_INFO_RX_DROP_MISC = BIT(12), + STATION_INFO_SIGNAL_AVG = BIT(13), + STATION_INFO_RX_BITRATE = BIT(14), + STATION_INFO_BSS_PARAM = BIT(15), + STATION_INFO_CONNECTED_TIME = BIT(16), + STATION_INFO_ASSOC_REQ_IES = BIT(17), + STATION_INFO_STA_FLAGS = BIT(18), + STATION_INFO_BEACON_LOSS_COUNT = BIT(19), + STATION_INFO_T_OFFSET = BIT(20), + STATION_INFO_LOCAL_PM = BIT(21), + STATION_INFO_PEER_PM = BIT(22), + STATION_INFO_NONPEER_PM = BIT(23), + STATION_INFO_RX_BYTES64 = BIT(24), + STATION_INFO_TX_BYTES64 = BIT(25), + STATION_INFO_CHAIN_SIGNAL = BIT(26), + STATION_INFO_CHAIN_SIGNAL_AVG = BIT(27), + STATION_INFO_EXPECTED_THROUGHPUT = BIT(28), }; /** @@ -1024,6 +1026,8 @@ struct sta_bss_parameters { * @local_pm: local mesh STA power save mode * @peer_pm: peer mesh STA power save mode * @nonpeer_pm: non-peer mesh STA power save mode + * @expected_throughput: expected throughput in kbps (including 802.11 headers) + * towards this station. */ struct station_info { u32 filled; @@ -1062,6 +1066,8 @@ struct station_info { enum nl80211_mesh_power_mode peer_pm; enum nl80211_mesh_power_mode nonpeer_pm; + u32 expected_throughput; + /* * Note: Add a new enum station_info_flags value for each new field and * use it to check which fields are initialized. diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 0cfa827123ff..fb0efa1f9066 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2199,6 +2199,8 @@ enum nl80211_sta_bss_param { * Contains a nested array of signal strength attributes (u8, dBm) * @NL80211_STA_INFO_CHAIN_SIGNAL_AVG: per-chain signal strength average * Same format as NL80211_STA_INFO_CHAIN_SIGNAL. + * @NL80211_STA_EXPECTED_THROUGHPUT: expected throughput considering also the + * 802.11 header (u32, kbps) * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute */ @@ -2230,6 +2232,7 @@ enum nl80211_sta_info { NL80211_STA_INFO_TX_BYTES64, NL80211_STA_INFO_CHAIN_SIGNAL, NL80211_STA_INFO_CHAIN_SIGNAL_AVG, + NL80211_STA_INFO_EXPECTED_THROUGHPUT, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 49adf58646e6..62bdb1adaa4d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3650,6 +3650,10 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, nla_put_u32(msg, NL80211_STA_INFO_TX_FAILED, sinfo->tx_failed)) goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_EXPECTED_THROUGHPUT) && + nla_put_u32(msg, NL80211_STA_INFO_EXPECTED_THROUGHPUT, + sinfo->expected_throughput)) + goto nla_put_failure; if ((sinfo->filled & STATION_INFO_BEACON_LOSS_COUNT) && nla_put_u32(msg, NL80211_STA_INFO_BEACON_LOSS, sinfo->beacon_loss_count)) -- cgit v1.2.3-71-gd317 From e69595c2501094d85d5bbca87592acb8a481109a Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 19 Feb 2014 19:36:08 +0200 Subject: drm: Make the vblank disable timer per-crtc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently there's one per-device vblank disable timer, and it gets reset wheneven the vblank refcount for any crtc drops to zero. That means that one crtc could accidentally be keeping the vblank interrupts for other crtcs enabled even if there are no users for them. Make the disable timer per-crtc to avoid this issue. Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 40 +++++++++++++++++++++------------------- include/drm/drmP.h | 4 +++- 2 files changed, 24 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index ea20c4aa1b6b..90c59a8c820f 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -140,33 +140,34 @@ static void vblank_disable_and_save(struct drm_device *dev, int crtc) static void vblank_disable_fn(unsigned long arg) { - struct drm_device *dev = (struct drm_device *)arg; + struct drm_vblank_crtc *vblank = (void *)arg; + struct drm_device *dev = vblank->dev; unsigned long irqflags; - int i; + int crtc = vblank->crtc; if (!dev->vblank_disable_allowed) return; - for (i = 0; i < dev->num_crtcs; i++) { - spin_lock_irqsave(&dev->vbl_lock, irqflags); - if (atomic_read(&dev->vblank[i].refcount) == 0 && - dev->vblank[i].enabled) { - DRM_DEBUG("disabling vblank on crtc %d\n", i); - vblank_disable_and_save(dev, i); - } - spin_unlock_irqrestore(&dev->vbl_lock, irqflags); + spin_lock_irqsave(&dev->vbl_lock, irqflags); + if (atomic_read(&vblank->refcount) == 0 && vblank->enabled) { + DRM_DEBUG("disabling vblank on crtc %d\n", crtc); + vblank_disable_and_save(dev, crtc); } + spin_unlock_irqrestore(&dev->vbl_lock, irqflags); } void drm_vblank_cleanup(struct drm_device *dev) { + int crtc; + /* Bail if the driver didn't call drm_vblank_init() */ if (dev->num_crtcs == 0) return; - del_timer_sync(&dev->vblank_disable_timer); - - vblank_disable_fn((unsigned long)dev); + for (crtc = 0; crtc < dev->num_crtcs; crtc++) { + del_timer_sync(&dev->vblank[crtc].disable_timer); + vblank_disable_fn((unsigned long)&dev->vblank[crtc]); + } kfree(dev->vblank); @@ -178,8 +179,6 @@ int drm_vblank_init(struct drm_device *dev, int num_crtcs) { int i, ret = -ENOMEM; - setup_timer(&dev->vblank_disable_timer, vblank_disable_fn, - (unsigned long)dev); spin_lock_init(&dev->vbl_lock); spin_lock_init(&dev->vblank_time_lock); @@ -189,8 +188,13 @@ int drm_vblank_init(struct drm_device *dev, int num_crtcs) if (!dev->vblank) goto err; - for (i = 0; i < num_crtcs; i++) + for (i = 0; i < num_crtcs; i++) { + dev->vblank[i].dev = dev; + dev->vblank[i].crtc = i; init_waitqueue_head(&dev->vblank[i].queue); + setup_timer(&dev->vblank[i].disable_timer, vblank_disable_fn, + (unsigned long)&dev->vblank[i]); + } DRM_INFO("Supports vblank timestamp caching Rev 2 (21.10.2013).\n"); @@ -900,7 +904,7 @@ void drm_vblank_put(struct drm_device *dev, int crtc) /* Last user schedules interrupt disable */ if (atomic_dec_and_test(&dev->vblank[crtc].refcount) && (drm_vblank_offdelay > 0)) - mod_timer(&dev->vblank_disable_timer, + mod_timer(&dev->vblank[crtc].disable_timer, jiffies + ((drm_vblank_offdelay * HZ)/1000)); } EXPORT_SYMBOL(drm_vblank_put); @@ -909,8 +913,6 @@ EXPORT_SYMBOL(drm_vblank_put); * drm_vblank_off - disable vblank events on a CRTC * @dev: DRM device * @crtc: CRTC in question - * - * Caller must hold event lock. */ void drm_vblank_off(struct drm_device *dev, int crtc) { diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 12f10bc2395f..9d982d483f12 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1024,14 +1024,17 @@ struct drm_pending_vblank_event { }; struct drm_vblank_crtc { + struct drm_device *dev; /* pointer to the drm_device */ wait_queue_head_t queue; /**< VBLANK wait queue */ struct timeval time[DRM_VBLANKTIME_RBSIZE]; /**< timestamp of current count */ + struct timer_list disable_timer; /* delayed disable timer */ atomic_t count; /**< number of VBLANK interrupts */ atomic_t refcount; /* number of users of vblank interruptsper crtc */ u32 last; /* protected by dev->vbl_lock, used */ /* for wraparound handling */ u32 last_wait; /* Last vblank seqno waited per CRTC */ unsigned int inmodeset; /* Display driver is setting mode */ + int crtc; /* crtc index */ bool enabled; /* so we don't call enable more than once per disable */ }; @@ -1119,7 +1122,6 @@ struct drm_device { spinlock_t vblank_time_lock; /**< Protects vblank count and time updates during vblank enable/disable */ spinlock_t vbl_lock; - struct timer_list vblank_disable_timer; u32 max_vblank_count; /**< size of vblank counter register */ -- cgit v1.2.3-71-gd317 From f2752282f7f7b566b07113dd5aa130725aa95ac4 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 19 Feb 2014 21:29:49 +0200 Subject: drm: Add drm_vblank_on() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_vblank_off() will turn off vblank interrupts, but as long as the refcount is elevated drm_vblank_get() will not re-enable them. This is a problem is someone is holding a vblank reference while a modeset is happening, and the driver requires vblank interrupt to work during that time. Add drm_vblank_on() as a counterpart to drm_vblank_off() which will re-enabled vblank interrupts if the refcount is already elevated. This will allow drivers to choose the specific places in the modeset sequence at which vblank interrupts get disabled and enabled. Testcase: igt/kms_flip/*-vs-suspend Signed-off-by: Ville Syrjälä [danvet: Add Testcase tag for the igt I've written.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 72 ++++++++++++++++++++++++++---------- drivers/gpu/drm/i915/intel_display.c | 8 ++++ include/drm/drmP.h | 1 + 3 files changed, 62 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 13d671ed3421..dd786d84daab 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -839,6 +839,41 @@ static void drm_update_vblank_count(struct drm_device *dev, int crtc) smp_mb__after_atomic_inc(); } +/** + * drm_vblank_enable - enable the vblank interrupt on a CRTC + * @dev: DRM device + * @crtc: CRTC in question + */ +static int drm_vblank_enable(struct drm_device *dev, int crtc) +{ + int ret = 0; + + assert_spin_locked(&dev->vbl_lock); + + spin_lock(&dev->vblank_time_lock); + + if (!dev->vblank[crtc].enabled) { + /* Enable vblank irqs under vblank_time_lock protection. + * All vblank count & timestamp updates are held off + * until we are done reinitializing master counter and + * timestamps. Filtercode in drm_handle_vblank() will + * prevent double-accounting of same vblank interval. + */ + ret = dev->driver->enable_vblank(dev, crtc); + DRM_DEBUG("enabling vblank on crtc %d, ret: %d\n", crtc, ret); + if (ret) + atomic_dec(&dev->vblank[crtc].refcount); + else { + dev->vblank[crtc].enabled = true; + drm_update_vblank_count(dev, crtc); + } + } + + spin_unlock(&dev->vblank_time_lock); + + return ret; +} + /** * drm_vblank_get - get a reference count on vblank events * @dev: DRM device @@ -858,25 +893,7 @@ int drm_vblank_get(struct drm_device *dev, int crtc) spin_lock_irqsave(&dev->vbl_lock, irqflags); /* Going from 0->1 means we have to enable interrupts again */ if (atomic_add_return(1, &dev->vblank[crtc].refcount) == 1) { - spin_lock(&dev->vblank_time_lock); - if (!dev->vblank[crtc].enabled) { - /* Enable vblank irqs under vblank_time_lock protection. - * All vblank count & timestamp updates are held off - * until we are done reinitializing master counter and - * timestamps. Filtercode in drm_handle_vblank() will - * prevent double-accounting of same vblank interval. - */ - ret = dev->driver->enable_vblank(dev, crtc); - DRM_DEBUG("enabling vblank on crtc %d, ret: %d\n", - crtc, ret); - if (ret) - atomic_dec(&dev->vblank[crtc].refcount); - else { - dev->vblank[crtc].enabled = true; - drm_update_vblank_count(dev, crtc); - } - } - spin_unlock(&dev->vblank_time_lock); + ret = drm_vblank_enable(dev, crtc); } else { if (!dev->vblank[crtc].enabled) { atomic_dec(&dev->vblank[crtc].refcount); @@ -945,6 +962,23 @@ void drm_vblank_off(struct drm_device *dev, int crtc) } EXPORT_SYMBOL(drm_vblank_off); +/** + * drm_vblank_on - enable vblank events on a CRTC + * @dev: DRM device + * @crtc: CRTC in question + */ +void drm_vblank_on(struct drm_device *dev, int crtc) +{ + unsigned long irqflags; + + spin_lock_irqsave(&dev->vbl_lock, irqflags); + /* re-enable interrupts if there's are users left */ + if (atomic_read(&dev->vblank[crtc].refcount) != 0) + WARN_ON(drm_vblank_enable(dev, crtc)); + spin_unlock_irqrestore(&dev->vbl_lock, irqflags); +} +EXPORT_SYMBOL(drm_vblank_on); + /** * drm_vblank_pre_modeset - account for vblanks across mode sets * @dev: DRM device diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 72b5c34d8ce7..313f29d1aae2 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3737,6 +3737,8 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc) * happening. */ intel_wait_for_vblank(dev, intel_crtc->pipe); + + drm_vblank_on(dev, pipe); } /* IPS only exists on ULT machines and is tied to pipe A. */ @@ -3828,6 +3830,8 @@ static void haswell_crtc_enable(struct drm_crtc *crtc) * to change the workaround. */ haswell_mode_set_planes_workaround(intel_crtc); ilk_crtc_enable_planes(crtc); + + drm_vblank_on(dev, pipe); } static void ironlake_pfit_disable(struct intel_crtc *crtc) @@ -4351,6 +4355,8 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc) for_each_encoder_on_crtc(dev, crtc, encoder) encoder->enable(encoder); + + drm_vblank_on(dev, pipe); } static void i9xx_crtc_enable(struct drm_crtc *crtc) @@ -4398,6 +4404,8 @@ static void i9xx_crtc_enable(struct drm_crtc *crtc) for_each_encoder_on_crtc(dev, crtc, encoder) encoder->enable(encoder); + + drm_vblank_on(dev, pipe); } static void i9xx_pfit_disable(struct intel_crtc *crtc) diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 9d982d483f12..7339b2b00724 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1360,6 +1360,7 @@ extern bool drm_handle_vblank(struct drm_device *dev, int crtc); extern int drm_vblank_get(struct drm_device *dev, int crtc); extern void drm_vblank_put(struct drm_device *dev, int crtc); extern void drm_vblank_off(struct drm_device *dev, int crtc); +extern void drm_vblank_on(struct drm_device *dev, int crtc); extern void drm_vblank_cleanup(struct drm_device *dev); extern u32 drm_get_last_vbltimestamp(struct drm_device *dev, int crtc, struct timeval *tvblank, unsigned flags); -- cgit v1.2.3-71-gd317 From 66507c7bc8895f0da6b4ad87e96d61a9f7d7a118 Mon Sep 17 00:00:00 2001 From: Kamal Dasu Date: Thu, 1 May 2014 20:51:19 -0400 Subject: mtd: nand: Add support to use nand_base poi databuf as bounce buffer nand_base can be passed a kmap()'d buffers from highmem by filesystems like jffs2. This results in failure to map the physical address of the DMA buffer on various contoller driver on different platforms. This change adds a chip option to use preallocated databuf as bounce buffers used in nand_do_read_ops() and nand_do_write_ops(). This allows for specific nand controller driver to set this option as needed. Signed-off-by: Kamal Dasu Signed-off-by: Brian Norris --- drivers/mtd/nand/nand_base.c | 37 +++++++++++++++++++++++++++++++------ include/linux/mtd/nand.h | 5 +++++ 2 files changed, 36 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 7853b9b0a05e..1b844b8c621f 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -1500,6 +1501,7 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from, mtd->oobavail : mtd->oobsize; uint8_t *bufpoi, *oob, *buf; + int use_bufpoi; unsigned int max_bitflips = 0; int retry_mode = 0; bool ecc_fail = false; @@ -1522,9 +1524,20 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from, bytes = min(mtd->writesize - col, readlen); aligned = (bytes == mtd->writesize); + if (!aligned) + use_bufpoi = 1; + else if (chip->options & NAND_USE_BOUNCE_BUFFER) + use_bufpoi = !virt_addr_valid(buf); + else + use_bufpoi = 0; + /* Is the current page in the buffer? */ if (realpage != chip->pagebuf || oob) { - bufpoi = aligned ? buf : chip->buffers->databuf; + bufpoi = use_bufpoi ? chip->buffers->databuf : buf; + + if (use_bufpoi && aligned) + pr_debug("%s: using read bounce buffer for buf@%p\n", + __func__, buf); read_retry: chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page); @@ -1546,7 +1559,7 @@ read_retry: ret = chip->ecc.read_page(mtd, chip, bufpoi, oob_required, page); if (ret < 0) { - if (!aligned) + if (use_bufpoi) /* Invalidate page cache */ chip->pagebuf = -1; break; @@ -1555,7 +1568,7 @@ read_retry: max_bitflips = max_t(unsigned int, max_bitflips, ret); /* Transfer not aligned data */ - if (!aligned) { + if (use_bufpoi) { if (!NAND_HAS_SUBPAGE_READ(chip) && !oob && !(mtd->ecc_stats.failed - ecc_failures) && (ops->mode != MTD_OPS_RAW)) { @@ -2375,11 +2388,23 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to, int bytes = mtd->writesize; int cached = writelen > bytes && page != blockmask; uint8_t *wbuf = buf; + int use_bufpoi; + int part_pagewr = (column || writelen < (mtd->writesize - 1)); + + if (part_pagewr) + use_bufpoi = 1; + else if (chip->options & NAND_USE_BOUNCE_BUFFER) + use_bufpoi = !virt_addr_valid(buf); + else + use_bufpoi = 0; - /* Partial page write? */ - if (unlikely(column || writelen < (mtd->writesize - 1))) { + /* Partial page write?, or need to use bounce buffer */ + if (use_bufpoi) { + pr_debug("%s: using write bounce buffer for buf@%p\n", + __func__, buf); cached = 0; - bytes = min_t(int, bytes - column, (int) writelen); + if (part_pagewr) + bytes = min_t(int, bytes - column, writelen); chip->pagebuf = -1; memset(chip->buffers->databuf, 0xff, mtd->writesize); memcpy(&chip->buffers->databuf[column], buf, bytes); diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 7a922e6c4e4b..2f0af2891f0f 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -175,6 +175,11 @@ typedef enum { #define NAND_OWN_BUFFERS 0x00020000 /* Chip may not exist, so silence any errors in scan */ #define NAND_SCAN_SILENT_NODEV 0x00040000 +/* + * This option could be defined by controller drivers to protect against + * kmap'ed, vmalloc'ed highmem buffers being passed from upper layers + */ +#define NAND_USE_BOUNCE_BUFFER 0x00080000 /* * Autodetect nand buswidth with readid/onfi. * This suppose the driver will configure the hardware in 8 bits mode -- cgit v1.2.3-71-gd317 From 96ba9dd65788a0bd2a7d1e57ec78b7642f0ccc25 Mon Sep 17 00:00:00 2001 From: Vincenzo Aliberti Date: Thu, 15 May 2014 23:19:32 +0200 Subject: mtd: lpddr: add driver for LPDDR2-NVM PCM memories Signed-off-by: Vincenzo Aliberti Signed-off-by: Brian Norris --- drivers/mtd/lpddr/Kconfig | 12 +- drivers/mtd/lpddr/Makefile | 1 + drivers/mtd/lpddr/lpddr2_nvm.c | 507 +++++++++++++++++++++++++++++++++++++++++ include/uapi/mtd/mtd-abi.h | 1 + 4 files changed, 519 insertions(+), 2 deletions(-) create mode 100644 drivers/mtd/lpddr/lpddr2_nvm.c (limited to 'include') diff --git a/drivers/mtd/lpddr/Kconfig b/drivers/mtd/lpddr/Kconfig index 265f969817e3..468f06dea45d 100644 --- a/drivers/mtd/lpddr/Kconfig +++ b/drivers/mtd/lpddr/Kconfig @@ -1,5 +1,5 @@ -menu "LPDDR flash memory drivers" - depends on MTD!=n +menu "LPDDR & LPDDR2 PCM memory drivers" + depends on MTD config MTD_LPDDR tristate "Support for LPDDR flash chips" @@ -17,4 +17,12 @@ config MTD_QINFO_PROBE Window QINFO interface, permits software to be used for entire families of devices. This serves similar purpose of CFI on legacy Flash products + +config MTD_LPDDR2_NVM + depends on MTD + tristate "Support for LPDDR2-NVM flash chips" + help + This option enables support of PCM memories with a LPDDR2-NVM + (Low power double data rate 2) interface. + endmenu diff --git a/drivers/mtd/lpddr/Makefile b/drivers/mtd/lpddr/Makefile index da48e46b5812..881d440d483e 100644 --- a/drivers/mtd/lpddr/Makefile +++ b/drivers/mtd/lpddr/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_MTD_QINFO_PROBE) += qinfo_probe.o obj-$(CONFIG_MTD_LPDDR) += lpddr_cmds.o +obj-$(CONFIG_MTD_LPDDR2_NVM) += lpddr2_nvm.o diff --git a/drivers/mtd/lpddr/lpddr2_nvm.c b/drivers/mtd/lpddr/lpddr2_nvm.c new file mode 100644 index 000000000000..063cec40d0ae --- /dev/null +++ b/drivers/mtd/lpddr/lpddr2_nvm.c @@ -0,0 +1,507 @@ +/* + * LPDDR2-NVM MTD driver. This module provides read, write, erase, lock/unlock + * support for LPDDR2-NVM PCM memories + * + * Copyright © 2012 Micron Technology, Inc. + * + * Vincenzo Aliberti + * Domenico Manna + * Many thanks to Andrea Vigilante for initial enabling + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Parameters */ +#define ERASE_BLOCKSIZE (0x00020000/2) /* in Word */ +#define WRITE_BUFFSIZE (0x00000400/2) /* in Word */ +#define OW_BASE_ADDRESS 0x00000000 /* OW offset */ +#define BUS_WIDTH 0x00000020 /* x32 devices */ + +/* PFOW symbols address offset */ +#define PFOW_QUERY_STRING_P (0x0000/2) /* in Word */ +#define PFOW_QUERY_STRING_F (0x0002/2) /* in Word */ +#define PFOW_QUERY_STRING_O (0x0004/2) /* in Word */ +#define PFOW_QUERY_STRING_W (0x0006/2) /* in Word */ + +/* OW registers address */ +#define CMD_CODE_OFS (0x0080/2) /* in Word */ +#define CMD_DATA_OFS (0x0084/2) /* in Word */ +#define CMD_ADD_L_OFS (0x0088/2) /* in Word */ +#define CMD_ADD_H_OFS (0x008A/2) /* in Word */ +#define MPR_L_OFS (0x0090/2) /* in Word */ +#define MPR_H_OFS (0x0092/2) /* in Word */ +#define CMD_EXEC_OFS (0x00C0/2) /* in Word */ +#define STATUS_REG_OFS (0x00CC/2) /* in Word */ +#define PRG_BUFFER_OFS (0x0010/2) /* in Word */ + +/* Datamask */ +#define MR_CFGMASK 0x8000 +#define SR_OK_DATAMASK 0x0080 + +/* LPDDR2-NVM Commands */ +#define LPDDR2_NVM_LOCK 0x0061 +#define LPDDR2_NVM_UNLOCK 0x0062 +#define LPDDR2_NVM_SW_PROGRAM 0x0041 +#define LPDDR2_NVM_SW_OVERWRITE 0x0042 +#define LPDDR2_NVM_BUF_PROGRAM 0x00E9 +#define LPDDR2_NVM_BUF_OVERWRITE 0x00EA +#define LPDDR2_NVM_ERASE 0x0020 + +/* LPDDR2-NVM Registers offset */ +#define LPDDR2_MODE_REG_DATA 0x0040 +#define LPDDR2_MODE_REG_CFG 0x0050 + +/* + * Internal Type Definitions + * pcm_int_data contains memory controller details: + * @reg_data : LPDDR2_MODE_REG_DATA register address after remapping + * @reg_cfg : LPDDR2_MODE_REG_CFG register address after remapping + * &bus_width: memory bus-width (eg: x16 2 Bytes, x32 4 Bytes) + */ +struct pcm_int_data { + void __iomem *ctl_regs; + int bus_width; +}; + +static DEFINE_MUTEX(lpdd2_nvm_mutex); + +/* + * Build a map_word starting from an u_long + */ +static inline map_word build_map_word(u_long myword) +{ + map_word val = { {0} }; + val.x[0] = myword; + return val; +} + +/* + * Build Mode Register Configuration DataMask based on device bus-width + */ +static inline u_int build_mr_cfgmask(u_int bus_width) +{ + u_int val = MR_CFGMASK; + + if (bus_width == 0x0004) /* x32 device */ + val = val << 16; + + return val; +} + +/* + * Build Status Register OK DataMask based on device bus-width + */ +static inline u_int build_sr_ok_datamask(u_int bus_width) +{ + u_int val = SR_OK_DATAMASK; + + if (bus_width == 0x0004) /* x32 device */ + val = (val << 16)+val; + + return val; +} + +/* + * Evaluates Overlay Window Control Registers address + */ +static inline u_long ow_reg_add(struct map_info *map, u_long offset) +{ + u_long val = 0; + struct pcm_int_data *pcm_data = map->fldrv_priv; + + val = map->pfow_base + offset*pcm_data->bus_width; + + return val; +} + +/* + * Enable lpddr2-nvm Overlay Window + * Overlay Window is a memory mapped area containing all LPDDR2-NVM registers + * used by device commands as well as uservisible resources like Device Status + * Register, Device ID, etc + */ +static inline void ow_enable(struct map_info *map) +{ + struct pcm_int_data *pcm_data = map->fldrv_priv; + + writel_relaxed(build_mr_cfgmask(pcm_data->bus_width) | 0x18, + pcm_data->ctl_regs + LPDDR2_MODE_REG_CFG); + writel_relaxed(0x01, pcm_data->ctl_regs + LPDDR2_MODE_REG_DATA); +} + +/* + * Disable lpddr2-nvm Overlay Window + * Overlay Window is a memory mapped area containing all LPDDR2-NVM registers + * used by device commands as well as uservisible resources like Device Status + * Register, Device ID, etc + */ +static inline void ow_disable(struct map_info *map) +{ + struct pcm_int_data *pcm_data = map->fldrv_priv; + + writel_relaxed(build_mr_cfgmask(pcm_data->bus_width) | 0x18, + pcm_data->ctl_regs + LPDDR2_MODE_REG_CFG); + writel_relaxed(0x02, pcm_data->ctl_regs + LPDDR2_MODE_REG_DATA); +} + +/* + * Execute lpddr2-nvm operations + */ +static int lpddr2_nvm_do_op(struct map_info *map, u_long cmd_code, + u_long cmd_data, u_long cmd_add, u_long cmd_mpr, u_char *buf) +{ + map_word add_l = { {0} }, add_h = { {0} }, mpr_l = { {0} }, + mpr_h = { {0} }, data_l = { {0} }, cmd = { {0} }, + exec_cmd = { {0} }, sr; + map_word data_h = { {0} }; /* only for 2x x16 devices stacked */ + u_long i, status_reg, prg_buff_ofs; + struct pcm_int_data *pcm_data = map->fldrv_priv; + u_int sr_ok_datamask = build_sr_ok_datamask(pcm_data->bus_width); + + /* Builds low and high words for OW Control Registers */ + add_l.x[0] = cmd_add & 0x0000FFFF; + add_h.x[0] = (cmd_add >> 16) & 0x0000FFFF; + mpr_l.x[0] = cmd_mpr & 0x0000FFFF; + mpr_h.x[0] = (cmd_mpr >> 16) & 0x0000FFFF; + cmd.x[0] = cmd_code & 0x0000FFFF; + exec_cmd.x[0] = 0x0001; + data_l.x[0] = cmd_data & 0x0000FFFF; + data_h.x[0] = (cmd_data >> 16) & 0x0000FFFF; /* only for 2x x16 */ + + /* Set Overlay Window Control Registers */ + map_write(map, cmd, ow_reg_add(map, CMD_CODE_OFS)); + map_write(map, data_l, ow_reg_add(map, CMD_DATA_OFS)); + map_write(map, add_l, ow_reg_add(map, CMD_ADD_L_OFS)); + map_write(map, add_h, ow_reg_add(map, CMD_ADD_H_OFS)); + map_write(map, mpr_l, ow_reg_add(map, MPR_L_OFS)); + map_write(map, mpr_h, ow_reg_add(map, MPR_H_OFS)); + if (pcm_data->bus_width == 0x0004) { /* 2x16 devices stacked */ + map_write(map, cmd, ow_reg_add(map, CMD_CODE_OFS) + 2); + map_write(map, data_h, ow_reg_add(map, CMD_DATA_OFS) + 2); + map_write(map, add_l, ow_reg_add(map, CMD_ADD_L_OFS) + 2); + map_write(map, add_h, ow_reg_add(map, CMD_ADD_H_OFS) + 2); + map_write(map, mpr_l, ow_reg_add(map, MPR_L_OFS) + 2); + map_write(map, mpr_h, ow_reg_add(map, MPR_H_OFS) + 2); + } + + /* Fill Program Buffer */ + if ((cmd_code == LPDDR2_NVM_BUF_PROGRAM) || + (cmd_code == LPDDR2_NVM_BUF_OVERWRITE)) { + prg_buff_ofs = (map_read(map, + ow_reg_add(map, PRG_BUFFER_OFS))).x[0]; + for (i = 0; i < cmd_mpr; i++) { + map_write(map, build_map_word(buf[i]), map->pfow_base + + prg_buff_ofs + i); + } + } + + /* Command Execute */ + map_write(map, exec_cmd, ow_reg_add(map, CMD_EXEC_OFS)); + if (pcm_data->bus_width == 0x0004) /* 2x16 devices stacked */ + map_write(map, exec_cmd, ow_reg_add(map, CMD_EXEC_OFS) + 2); + + /* Status Register Check */ + do { + sr = map_read(map, ow_reg_add(map, STATUS_REG_OFS)); + status_reg = sr.x[0]; + if (pcm_data->bus_width == 0x0004) {/* 2x16 devices stacked */ + sr = map_read(map, ow_reg_add(map, + STATUS_REG_OFS) + 2); + status_reg += sr.x[0] << 16; + } + } while ((status_reg & sr_ok_datamask) != sr_ok_datamask); + + return (((status_reg & sr_ok_datamask) == sr_ok_datamask) ? 0 : -EIO); +} + +/* + * Execute lpddr2-nvm operations @ block level + */ +static int lpddr2_nvm_do_block_op(struct mtd_info *mtd, loff_t start_add, + uint64_t len, u_char block_op) +{ + struct map_info *map = mtd->priv; + u_long add, end_add; + int ret = 0; + + mutex_lock(&lpdd2_nvm_mutex); + + ow_enable(map); + + add = start_add; + end_add = add + len; + + do { + ret = lpddr2_nvm_do_op(map, block_op, 0x00, add, add, NULL); + if (ret) + goto out; + add += mtd->erasesize; + } while (add < end_add); + +out: + ow_disable(map); + mutex_unlock(&lpdd2_nvm_mutex); + return ret; +} + +/* + * verify presence of PFOW string + */ +static int lpddr2_nvm_pfow_present(struct map_info *map) +{ + map_word pfow_val[4]; + unsigned int found = 1; + + mutex_lock(&lpdd2_nvm_mutex); + + ow_enable(map); + + /* Load string from array */ + pfow_val[0] = map_read(map, ow_reg_add(map, PFOW_QUERY_STRING_P)); + pfow_val[1] = map_read(map, ow_reg_add(map, PFOW_QUERY_STRING_F)); + pfow_val[2] = map_read(map, ow_reg_add(map, PFOW_QUERY_STRING_O)); + pfow_val[3] = map_read(map, ow_reg_add(map, PFOW_QUERY_STRING_W)); + + /* Verify the string loaded vs expected */ + if (!map_word_equal(map, build_map_word('P'), pfow_val[0])) + found = 0; + if (!map_word_equal(map, build_map_word('F'), pfow_val[1])) + found = 0; + if (!map_word_equal(map, build_map_word('O'), pfow_val[2])) + found = 0; + if (!map_word_equal(map, build_map_word('W'), pfow_val[3])) + found = 0; + + ow_disable(map); + + mutex_unlock(&lpdd2_nvm_mutex); + + return found; +} + +/* + * lpddr2_nvm driver read method + */ +static int lpddr2_nvm_read(struct mtd_info *mtd, loff_t start_add, + size_t len, size_t *retlen, u_char *buf) +{ + struct map_info *map = mtd->priv; + + mutex_lock(&lpdd2_nvm_mutex); + + *retlen = len; + + map_copy_from(map, buf, start_add, *retlen); + + mutex_unlock(&lpdd2_nvm_mutex); + return 0; +} + +/* + * lpddr2_nvm driver write method + */ +static int lpddr2_nvm_write(struct mtd_info *mtd, loff_t start_add, + size_t len, size_t *retlen, const u_char *buf) +{ + struct map_info *map = mtd->priv; + struct pcm_int_data *pcm_data = map->fldrv_priv; + u_long add, current_len, tot_len, target_len, my_data; + u_char *write_buf = (u_char *)buf; + int ret = 0; + + mutex_lock(&lpdd2_nvm_mutex); + + ow_enable(map); + + /* Set start value for the variables */ + add = start_add; + target_len = len; + tot_len = 0; + + while (tot_len < target_len) { + if (!(IS_ALIGNED(add, mtd->writesize))) { /* do sw program */ + my_data = write_buf[tot_len]; + my_data += (write_buf[tot_len+1]) << 8; + if (pcm_data->bus_width == 0x0004) {/* 2x16 devices */ + my_data += (write_buf[tot_len+2]) << 16; + my_data += (write_buf[tot_len+3]) << 24; + } + ret = lpddr2_nvm_do_op(map, LPDDR2_NVM_SW_OVERWRITE, + my_data, add, 0x00, NULL); + if (ret) + goto out; + + add += pcm_data->bus_width; + tot_len += pcm_data->bus_width; + } else { /* do buffer program */ + current_len = min(target_len - tot_len, + (u_long) mtd->writesize); + ret = lpddr2_nvm_do_op(map, LPDDR2_NVM_BUF_OVERWRITE, + 0x00, add, current_len, write_buf + tot_len); + if (ret) + goto out; + + add += current_len; + tot_len += current_len; + } + } + +out: + *retlen = tot_len; + ow_disable(map); + mutex_unlock(&lpdd2_nvm_mutex); + return ret; +} + +/* + * lpddr2_nvm driver erase method + */ +static int lpddr2_nvm_erase(struct mtd_info *mtd, struct erase_info *instr) +{ + int ret = lpddr2_nvm_do_block_op(mtd, instr->addr, instr->len, + LPDDR2_NVM_ERASE); + if (!ret) { + instr->state = MTD_ERASE_DONE; + mtd_erase_callback(instr); + } + + return ret; +} + +/* + * lpddr2_nvm driver unlock method + */ +static int lpddr2_nvm_unlock(struct mtd_info *mtd, loff_t start_add, + uint64_t len) +{ + return lpddr2_nvm_do_block_op(mtd, start_add, len, LPDDR2_NVM_UNLOCK); +} + +/* + * lpddr2_nvm driver lock method + */ +static int lpddr2_nvm_lock(struct mtd_info *mtd, loff_t start_add, + uint64_t len) +{ + return lpddr2_nvm_do_block_op(mtd, start_add, len, LPDDR2_NVM_LOCK); +} + +/* + * lpddr2_nvm driver probe method + */ +static int lpddr2_nvm_probe(struct platform_device *pdev) +{ + struct map_info *map; + struct mtd_info *mtd; + struct resource *add_range; + struct resource *control_regs; + struct pcm_int_data *pcm_data; + + /* Allocate memory control_regs data structures */ + pcm_data = devm_kzalloc(&pdev->dev, sizeof(*pcm_data), GFP_KERNEL); + if (!pcm_data) + return -ENOMEM; + + pcm_data->bus_width = BUS_WIDTH; + + /* Allocate memory for map_info & mtd_info data structures */ + map = devm_kzalloc(&pdev->dev, sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + + mtd = devm_kzalloc(&pdev->dev, sizeof(*mtd), GFP_KERNEL); + if (!mtd) + return -ENOMEM; + + /* lpddr2_nvm address range */ + add_range = platform_get_resource(pdev, IORESOURCE_MEM, 0); + + /* Populate map_info data structure */ + *map = (struct map_info) { + .virt = devm_ioremap_resource(&pdev->dev, add_range), + .name = pdev->dev.init_name, + .phys = add_range->start, + .size = resource_size(add_range), + .bankwidth = pcm_data->bus_width / 2, + .pfow_base = OW_BASE_ADDRESS, + .fldrv_priv = pcm_data, + }; + if (IS_ERR(map->virt)) + return PTR_ERR(map->virt); + + simple_map_init(map); /* fill with default methods */ + + control_regs = platform_get_resource(pdev, IORESOURCE_MEM, 1); + pcm_data->ctl_regs = devm_ioremap_resource(&pdev->dev, control_regs); + if (IS_ERR(pcm_data->ctl_regs)) + return PTR_ERR(pcm_data->ctl_regs); + + /* Populate mtd_info data structure */ + *mtd = (struct mtd_info) { + .name = pdev->dev.init_name, + .type = MTD_RAM, + .priv = map, + .size = resource_size(add_range), + .erasesize = ERASE_BLOCKSIZE * pcm_data->bus_width, + .writesize = 1, + .writebufsize = WRITE_BUFFSIZE * pcm_data->bus_width, + .flags = (MTD_CAP_NVRAM | MTD_POWERUP_LOCK), + ._read = lpddr2_nvm_read, + ._write = lpddr2_nvm_write, + ._erase = lpddr2_nvm_erase, + ._unlock = lpddr2_nvm_unlock, + ._lock = lpddr2_nvm_lock, + }; + + /* Verify the presence of the device looking for PFOW string */ + if (!lpddr2_nvm_pfow_present(map)) { + pr_err("device not recognized\n"); + return -EINVAL; + } + /* Parse partitions and register the MTD device */ + return mtd_device_parse_register(mtd, NULL, NULL, NULL, 0); +} + +/* + * lpddr2_nvm driver remove method + */ +static int lpddr2_nvm_remove(struct platform_device *pdev) +{ + return mtd_device_unregister(dev_get_drvdata(&pdev->dev)); +} + +/* Initialize platform_driver data structure for lpddr2_nvm */ +static struct platform_driver lpddr2_nvm_drv = { + .driver = { + .name = "lpddr2_nvm", + }, + .probe = lpddr2_nvm_probe, + .remove = lpddr2_nvm_remove, +}; + +module_platform_driver(lpddr2_nvm_drv); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Vincenzo Aliberti "); +MODULE_DESCRIPTION("MTD driver for LPDDR2-NVM PCM memories"); diff --git a/include/uapi/mtd/mtd-abi.h b/include/uapi/mtd/mtd-abi.h index e272ea060e38..763bb6950402 100644 --- a/include/uapi/mtd/mtd-abi.h +++ b/include/uapi/mtd/mtd-abi.h @@ -109,6 +109,7 @@ struct mtd_write_req { #define MTD_CAP_RAM (MTD_WRITEABLE | MTD_BIT_WRITEABLE | MTD_NO_ERASE) #define MTD_CAP_NORFLASH (MTD_WRITEABLE | MTD_BIT_WRITEABLE) #define MTD_CAP_NANDFLASH (MTD_WRITEABLE) +#define MTD_CAP_NVRAM (MTD_WRITEABLE | MTD_BIT_WRITEABLE | MTD_NO_ERASE) /* Obsolete ECC byte placement modes (used with obsolete MEMGETOOBSEL) */ #define MTD_NANDECC_OFF 0 // Switch off ECC (Not recommended) -- cgit v1.2.3-71-gd317 From 27c9fd607587e6c3b517590df4cd35ac85f3d0bd Mon Sep 17 00:00:00 2001 From: pekon gupta Date: Mon, 19 May 2014 13:24:39 +0530 Subject: mtd: nand: omap: add support for BCH16_ECC - GPMC driver updates This patch add support for BCH16_ECC in GPMC (controller) driver: - extends configuration space to include BCH16 registers - extends parsing of DT binding for selecting BCH16 ecc-scheme Signed-off-by: Pekon Gupta Acked-by: Tony Lindgren Signed-off-by: Brian Norris --- arch/arm/mach-omap2/gpmc.c | 15 +++++++++++++++ include/linux/platform_data/mtd-nand-omap2.h | 5 +++++ 2 files changed, 20 insertions(+) (limited to 'include') diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c index ab43755364f5..9b27773db040 100644 --- a/arch/arm/mach-omap2/gpmc.c +++ b/arch/arm/mach-omap2/gpmc.c @@ -68,6 +68,9 @@ #define GPMC_ECC_BCH_RESULT_1 0x244 /* not available on OMAP2 */ #define GPMC_ECC_BCH_RESULT_2 0x248 /* not available on OMAP2 */ #define GPMC_ECC_BCH_RESULT_3 0x24c /* not available on OMAP2 */ +#define GPMC_ECC_BCH_RESULT_4 0x300 /* not available on OMAP2 */ +#define GPMC_ECC_BCH_RESULT_5 0x304 /* not available on OMAP2 */ +#define GPMC_ECC_BCH_RESULT_6 0x308 /* not available on OMAP2 */ /* GPMC ECC control settings */ #define GPMC_ECC_CTRL_ECCCLEAR 0x100 @@ -666,6 +669,12 @@ void gpmc_update_nand_reg(struct gpmc_nand_regs *reg, int cs) GPMC_BCH_SIZE * i; reg->gpmc_bch_result3[i] = gpmc_base + GPMC_ECC_BCH_RESULT_3 + GPMC_BCH_SIZE * i; + reg->gpmc_bch_result4[i] = gpmc_base + GPMC_ECC_BCH_RESULT_4 + + i * GPMC_BCH_SIZE; + reg->gpmc_bch_result5[i] = gpmc_base + GPMC_ECC_BCH_RESULT_5 + + i * GPMC_BCH_SIZE; + reg->gpmc_bch_result6[i] = gpmc_base + GPMC_ECC_BCH_RESULT_6 + + i * GPMC_BCH_SIZE; } } @@ -1401,6 +1410,12 @@ static int gpmc_probe_nand_child(struct platform_device *pdev, else gpmc_nand_data->ecc_opt = OMAP_ECC_BCH8_CODE_HW_DETECTION_SW; + else if (!strcmp(s, "bch16")) + if (gpmc_nand_data->elm_of_node) + gpmc_nand_data->ecc_opt = + OMAP_ECC_BCH16_CODE_HW; + else + pr_err("%s: BCH16 requires ELM support\n", __func__); else pr_err("%s: ti,nand-ecc-opt invalid value\n", __func__); diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h index 3e9dd6676b97..660c029d694f 100644 --- a/include/linux/platform_data/mtd-nand-omap2.h +++ b/include/linux/platform_data/mtd-nand-omap2.h @@ -31,6 +31,8 @@ enum omap_ecc { OMAP_ECC_BCH8_CODE_HW_DETECTION_SW, /* 8-bit ECC calculation by GPMC, Error detection by ELM */ OMAP_ECC_BCH8_CODE_HW, + /* 16-bit ECC calculation by GPMC, Error detection by ELM */ + OMAP_ECC_BCH16_CODE_HW, }; struct gpmc_nand_regs { @@ -50,6 +52,9 @@ struct gpmc_nand_regs { void __iomem *gpmc_bch_result1[GPMC_BCH_NUM_REMAINDER]; void __iomem *gpmc_bch_result2[GPMC_BCH_NUM_REMAINDER]; void __iomem *gpmc_bch_result3[GPMC_BCH_NUM_REMAINDER]; + void __iomem *gpmc_bch_result4[GPMC_BCH_NUM_REMAINDER]; + void __iomem *gpmc_bch_result5[GPMC_BCH_NUM_REMAINDER]; + void __iomem *gpmc_bch_result6[GPMC_BCH_NUM_REMAINDER]; }; struct omap_nand_platform_data { -- cgit v1.2.3-71-gd317 From 2be589e4b28457f148640dc6addf6da24af64b7f Mon Sep 17 00:00:00 2001 From: pekon gupta Date: Mon, 19 May 2014 13:24:40 +0530 Subject: mtd: nand: omap: add support for BCH16_ECC - ELM driver updates ELM hardware engine is used to detect ECC errors for BCHx ecc-schemes (like BCH4/BCH8/BCH16). This patch extends configuration of ELM registers for adding support of BCH16_HW ecc-scheme. Signed-off-by: Pekon Gupta Signed-off-by: Brian Norris --- drivers/mtd/devices/elm.c | 36 ++++++++++++++++++++++++++++++++++++ include/linux/platform_data/elm.h | 3 ++- 2 files changed, 38 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/mtd/devices/elm.c b/drivers/mtd/devices/elm.c index 0a037b15c11b..7df86948e6d4 100644 --- a/drivers/mtd/devices/elm.c +++ b/drivers/mtd/devices/elm.c @@ -213,6 +213,28 @@ static void elm_load_syndrome(struct elm_info *info, val = cpu_to_be32(*(u32 *) &ecc[0]) >> 12; elm_write_reg(info, offset, val); break; + case BCH16_ECC: + val = cpu_to_be32(*(u32 *) &ecc[22]); + elm_write_reg(info, offset, val); + offset += 4; + val = cpu_to_be32(*(u32 *) &ecc[18]); + elm_write_reg(info, offset, val); + offset += 4; + val = cpu_to_be32(*(u32 *) &ecc[14]); + elm_write_reg(info, offset, val); + offset += 4; + val = cpu_to_be32(*(u32 *) &ecc[10]); + elm_write_reg(info, offset, val); + offset += 4; + val = cpu_to_be32(*(u32 *) &ecc[6]); + elm_write_reg(info, offset, val); + offset += 4; + val = cpu_to_be32(*(u32 *) &ecc[2]); + elm_write_reg(info, offset, val); + offset += 4; + val = cpu_to_be32(*(u32 *) &ecc[0]) >> 16; + elm_write_reg(info, offset, val); + break; default: pr_err("invalid config bch_type\n"); } @@ -436,6 +458,13 @@ static int elm_context_save(struct elm_info *info) for (i = 0; i < ERROR_VECTOR_MAX; i++) { offset = i * SYNDROME_FRAGMENT_REG_SIZE; switch (bch_type) { + case BCH16_ECC: + regs->elm_syndrome_fragment_6[i] = elm_read_reg(info, + ELM_SYNDROME_FRAGMENT_6 + offset); + regs->elm_syndrome_fragment_5[i] = elm_read_reg(info, + ELM_SYNDROME_FRAGMENT_5 + offset); + regs->elm_syndrome_fragment_4[i] = elm_read_reg(info, + ELM_SYNDROME_FRAGMENT_4 + offset); case BCH8_ECC: regs->elm_syndrome_fragment_3[i] = elm_read_reg(info, ELM_SYNDROME_FRAGMENT_3 + offset); @@ -474,6 +503,13 @@ static int elm_context_restore(struct elm_info *info) for (i = 0; i < ERROR_VECTOR_MAX; i++) { offset = i * SYNDROME_FRAGMENT_REG_SIZE; switch (bch_type) { + case BCH16_ECC: + elm_write_reg(info, ELM_SYNDROME_FRAGMENT_6 + offset, + regs->elm_syndrome_fragment_6[i]); + elm_write_reg(info, ELM_SYNDROME_FRAGMENT_5 + offset, + regs->elm_syndrome_fragment_5[i]); + elm_write_reg(info, ELM_SYNDROME_FRAGMENT_4 + offset, + regs->elm_syndrome_fragment_4[i]); case BCH8_ECC: elm_write_reg(info, ELM_SYNDROME_FRAGMENT_3 + offset, regs->elm_syndrome_fragment_3[i]); diff --git a/include/linux/platform_data/elm.h b/include/linux/platform_data/elm.h index 4edb40676b3f..780d1e97f620 100644 --- a/include/linux/platform_data/elm.h +++ b/include/linux/platform_data/elm.h @@ -21,6 +21,7 @@ enum bch_ecc { BCH4_ECC = 0, BCH8_ECC, + BCH16_ECC, }; /* ELM support 8 error syndrome process */ @@ -38,7 +39,7 @@ struct elm_errorvec { bool error_reported; bool error_uncorrectable; int error_count; - int error_loc[ERROR_VECTOR_MAX]; + int error_loc[16]; }; void elm_decode_bch_error_page(struct device *dev, u8 *ecc_calc, -- cgit v1.2.3-71-gd317 From 7d10d2610cc02d432168ca0c5d964cd9e85c1b06 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Mon, 19 May 2014 09:21:09 +0200 Subject: net: cdc_ncm: fix 64bit division build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The upper timer_interval limit is arbitrary and much higher than anything usable in the real world. Reducing it from 15s to ~4s to make the timer_interval fit in an u32 does not make much difference. The limit is still outside the practical bounds. This eliminates the need for a 64bit timer_interval, fixing a build error related to 64bit division: drivers/built-in.o: In function `cdc_ncm_get_coalesce': ak8975.c:(.text+0x1ac994): undefined reference to `__aeabi_uldivmod' Reported-by: Stephen Rothwell Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 4 ++-- include/linux/usb/cdc_ncm.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 2d0caf1eea25..ad2a386a6e92 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -138,7 +138,7 @@ static int cdc_ncm_get_coalesce(struct net_device *netdev, ec->tx_max_coalesced_frames = ctx->tx_max / ctx->max_datagram_size; /* the timer will fire CDC_NCM_TIMER_PENDING_CNT times in a row */ - ec->tx_coalesce_usecs = (ctx->timer_interval * CDC_NCM_TIMER_PENDING_CNT) / NSEC_PER_USEC; + ec->tx_coalesce_usecs = ctx->timer_interval / (NSEC_PER_USEC / CDC_NCM_TIMER_PENDING_CNT); return 0; } @@ -164,7 +164,7 @@ static int cdc_ncm_set_coalesce(struct net_device *netdev, return -EINVAL; spin_lock_bh(&ctx->mtx); - ctx->timer_interval = ec->tx_coalesce_usecs * NSEC_PER_USEC / CDC_NCM_TIMER_PENDING_CNT; + ctx->timer_interval = ec->tx_coalesce_usecs * (NSEC_PER_USEC / CDC_NCM_TIMER_PENDING_CNT); if (!ctx->timer_interval) ctx->tx_timer_pending = 0; spin_unlock_bh(&ctx->mtx); diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 8c5e38819828..7c9b484735c5 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -78,7 +78,7 @@ #define CDC_NCM_TIMER_PENDING_CNT 2 #define CDC_NCM_TIMER_INTERVAL_USEC 400UL #define CDC_NCM_TIMER_INTERVAL_MIN 5UL -#define CDC_NCM_TIMER_INTERVAL_MAX (15UL * USEC_PER_SEC) +#define CDC_NCM_TIMER_INTERVAL_MAX (U32_MAX / NSEC_PER_USEC) #define cdc_ncm_comm_intf_is_mbim(x) ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \ (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE) @@ -104,7 +104,7 @@ struct cdc_ncm_ctx { spinlock_t mtx; atomic_t stop; - u64 timer_interval; + u32 timer_interval; u32 max_ndp_size; u32 tx_timer_pending; -- cgit v1.2.3-71-gd317 From 3796ce1d4d4b330a75005c5eda105603ce9d4071 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 19 May 2014 22:42:32 +0200 Subject: pwm: add period and polarity to struct pwm_lookup Add period and polarity members to struct pwm_lookup so that platforms using the lookup table can be treated the same way as those using the device tree. Signed-off-by: Alexandre Belloni Signed-off-by: Thierry Reding --- drivers/pwm/core.c | 8 +++++++- include/linux/pwm.h | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index a80471399c20..4b66bf09ee55 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -661,10 +661,16 @@ struct pwm_device *pwm_get(struct device *dev, const char *con_id) } } + mutex_unlock(&pwm_lookup_lock); + if (chip) pwm = pwm_request_from_chip(chip, index, con_id ?: dev_id); + if (IS_ERR(pwm)) + return pwm; + + pwm_set_period(pwm, p->period); + pwm_set_polarity(pwm, p->polarity); - mutex_unlock(&pwm_lookup_lock); return pwm; } diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 4717f54051cb..2f45e2fe5b93 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -274,6 +274,8 @@ struct pwm_lookup { unsigned int index; const char *dev_id; const char *con_id; + unsigned int period; + enum pwm_polarity polarity; }; #define PWM_LOOKUP(_provider, _index, _dev_id, _con_id) \ -- cgit v1.2.3-71-gd317 From cca674d47e59665630f3005291b61bb883015fc5 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Mon, 19 May 2014 21:53:20 +0200 Subject: mac80211: export the expected throughput Add get_expected_throughput() API to mac80211 so that each driver can implement its own version based on the RC algorithm they are using (might be using an HW RC algo). The API returns a value expressed in Kbps. Also, add the new get_expected_throughput() member to the rate_control_ops structure in order to be able to query the RC algorithm (this patch provides an implementation of this API for both minstrel and minstrel_ht). The related member in the station_info object is now filled accordingly when dumping a station. Cc: Felix Fietkau Signed-off-by: Antonio Quartulli Signed-off-by: Johannes Berg --- include/net/mac80211.h | 7 +++++++ net/mac80211/cfg.c | 13 +++++++++++++ net/mac80211/driver-ops.h | 13 +++++++++++++ net/mac80211/rc80211_minstrel.c | 12 ++++++++++++ net/mac80211/rc80211_minstrel_ht.c | 17 +++++++++++++++++ net/mac80211/trace.h | 32 ++++++++++++++++++++++++++++++++ 6 files changed, 94 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a34f26a4ed18..2c78997bc48d 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2769,6 +2769,10 @@ enum ieee80211_roc_type { * information in bss_conf is set up and the beacon can be retrieved. A * channel context is bound before this is called. * @leave_ibss: Leave the IBSS again. + * + * @get_expected_throughput: extract the expected throughput towards the + * specified station. The returned value is expressed in Kbps. It returns 0 + * if the RC algorithm does not have proper data to provide. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -2962,6 +2966,7 @@ struct ieee80211_ops { int (*join_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); void (*leave_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); + u32 (*get_expected_throughput)(struct ieee80211_sta *sta); }; /** @@ -4535,6 +4540,8 @@ struct rate_control_ops { void (*add_sta_debugfs)(void *priv, void *priv_sta, struct dentry *dir); void (*remove_sta_debugfs)(void *priv, void *priv_sta); + + u32 (*get_expected_throughput)(void *priv_sta); }; static inline int rate_supported(struct ieee80211_sta *sta, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index ac45304590d8..d7513a503be1 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -472,8 +472,10 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; + struct rate_control_ref *ref = local->rate_ctrl; struct timespec uptime; u64 packets = 0; + u32 thr = 0; int i, ac; sinfo->generation = sdata->local->sta_generation; @@ -587,6 +589,17 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_ASSOCIATED); if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER); + + /* check if the driver has a SW RC implementation */ + if (ref && ref->ops->get_expected_throughput) + thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv); + else + thr = drv_get_expected_throughput(local, &sta->sta); + + if (thr != 0) { + sinfo->filled |= STATION_INFO_EXPECTED_THROUGHPUT; + sinfo->expected_throughput = thr; + } } static const char ieee80211_gstrings_sta_stats[][ETH_GSTRING_LEN] = { diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index df1d50291344..696ef78b1fb7 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1156,4 +1156,17 @@ static inline void drv_leave_ibss(struct ieee80211_local *local, trace_drv_return_void(local); } +static inline u32 drv_get_expected_throughput(struct ieee80211_local *local, + struct ieee80211_sta *sta) +{ + u32 ret = 0; + + trace_drv_get_expected_throughput(sta); + if (local->ops->get_expected_throughput) + ret = local->ops->get_expected_throughput(sta); + trace_drv_return_u32(local, ret); + + return ret; +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 26fd94fa0aed..1c1469c36dca 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -657,6 +657,17 @@ minstrel_free(void *priv) kfree(priv); } +static u32 minstrel_get_expected_throughput(void *priv_sta) +{ + struct minstrel_sta_info *mi = priv_sta; + int idx = mi->max_tp_rate[0]; + + /* convert pkt per sec in kbps (1200 is the average pkt size used for + * computing cur_tp + */ + return MINSTREL_TRUNC(mi->r[idx].cur_tp) * 1200 * 8 / 1024; +} + const struct rate_control_ops mac80211_minstrel = { .name = "minstrel", .tx_status = minstrel_tx_status, @@ -670,6 +681,7 @@ const struct rate_control_ops mac80211_minstrel = { .add_sta_debugfs = minstrel_add_sta_debugfs, .remove_sta_debugfs = minstrel_remove_sta_debugfs, #endif + .get_expected_throughput = minstrel_get_expected_throughput, }; int __init diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 4e916ad51a20..85c1e74b7714 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -1032,6 +1032,22 @@ minstrel_ht_free(void *priv) mac80211_minstrel.free(priv); } +static u32 minstrel_ht_get_expected_throughput(void *priv_sta) +{ + struct minstrel_ht_sta_priv *msp = priv_sta; + struct minstrel_ht_sta *mi = &msp->ht; + int i, j; + + if (!msp->is_ht) + return mac80211_minstrel.get_expected_throughput(priv_sta); + + i = mi->max_tp_rate / MCS_GROUP_RATES; + j = mi->max_tp_rate % MCS_GROUP_RATES; + + /* convert cur_tp from pkt per second in kbps */ + return mi->groups[i].rates[j].cur_tp * AVG_PKT_SIZE * 8 / 1024; +} + static const struct rate_control_ops mac80211_minstrel_ht = { .name = "minstrel_ht", .tx_status = minstrel_ht_tx_status, @@ -1046,6 +1062,7 @@ static const struct rate_control_ops mac80211_minstrel_ht = { .add_sta_debugfs = minstrel_ht_add_sta_debugfs, .remove_sta_debugfs = minstrel_ht_remove_sta_debugfs, #endif + .get_expected_throughput = minstrel_ht_get_expected_throughput, }; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index a0b0aea76525..942f64b8ce0e 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -184,6 +184,20 @@ TRACE_EVENT(drv_return_bool, "true" : "false") ); +TRACE_EVENT(drv_return_u32, + TP_PROTO(struct ieee80211_local *local, u32 ret), + TP_ARGS(local, ret), + TP_STRUCT__entry( + LOCAL_ENTRY + __field(u32, ret) + ), + TP_fast_assign( + LOCAL_ASSIGN; + __entry->ret = ret; + ), + TP_printk(LOCAL_PR_FMT " - %u", LOCAL_PR_ARG, __entry->ret) +); + TRACE_EVENT(drv_return_u64, TP_PROTO(struct ieee80211_local *local, u64 ret), TP_ARGS(local, ret), @@ -1499,6 +1513,24 @@ DEFINE_EVENT(local_sdata_evt, drv_leave_ibss, TP_ARGS(local, sdata) ); +TRACE_EVENT(drv_get_expected_throughput, + TP_PROTO(struct ieee80211_sta *sta), + + TP_ARGS(sta), + + TP_STRUCT__entry( + STA_ENTRY + ), + + TP_fast_assign( + STA_ASSIGN; + ), + + TP_printk( + STA_PR_FMT, STA_PR_ARG + ) +); + /* * Tracing for API calls that drivers call. */ -- cgit v1.2.3-71-gd317 From 7406353d43c8e2faf478721e87aeb6f2f9685de0 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Mon, 19 May 2014 21:53:21 +0200 Subject: cfg80211: implement cfg80211_get_station cfg80211 API Implement and export the new cfg80211_get_station() API. This utility can be used by other kernel modules to obtain detailed information about a given wireless station. It will be in particular useful to batman-adv which will implement a wireless rate based metric. Signed-off-by: Antonio Quartulli Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 13 +++++++++++++ net/wireless/rdev-ops.h | 2 +- net/wireless/util.c | 18 ++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 857d6476a128..a75fabd18502 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1074,6 +1074,19 @@ struct station_info { */ }; +/** + * cfg80211_get_station - retrieve information about a given station + * @dev: the device where the station is supposed to be connected to + * @mac_addr: the mac address of the station of interest + * @sinfo: pointer to the structure to fill with the information + * + * Returns 0 on success and sinfo is filled with the available information + * otherwise returns a negative error code and the content of sinfo has to be + * considered undefined. + */ +int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, + struct station_info *sinfo); + /** * enum monitor_flags - monitor flags * diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 00cdf73ba6c4..d95bbe348138 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -199,7 +199,7 @@ static inline int rdev_change_station(struct cfg80211_registered_device *rdev, } static inline int rdev_get_station(struct cfg80211_registered_device *rdev, - struct net_device *dev, u8 *mac, + struct net_device *dev, const u8 *mac, struct station_info *sinfo) { int ret; diff --git a/net/wireless/util.c b/net/wireless/util.c index fa61ac9c9b26..728f1c0dc70d 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1546,6 +1546,24 @@ unsigned int ieee80211_get_num_supported_channels(struct wiphy *wiphy) } EXPORT_SYMBOL(ieee80211_get_num_supported_channels); +int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, + struct station_info *sinfo) +{ + struct cfg80211_registered_device *rdev; + struct wireless_dev *wdev; + + wdev = dev->ieee80211_ptr; + if (!wdev) + return -EOPNOTSUPP; + + rdev = wiphy_to_rdev(wdev->wiphy); + if (!rdev->ops->get_station) + return -EOPNOTSUPP; + + return rdev_get_station(rdev, dev, mac_addr, sinfo); +} +EXPORT_SYMBOL(cfg80211_get_station); + /* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ /* Ethernet-II snap header (RFC1042 for most EtherTypes) */ const unsigned char rfc1042_header[] __aligned(2) = -- cgit v1.2.3-71-gd317 From dc671157139918eaf61f73db1bd6dd02960b66e2 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 19 May 2014 22:42:34 +0200 Subject: pwm: renesas-tpu: remove unused struct tpu_pwm_platform_data The struct is not used anymore and the polarity initialization will be done using the PWM lookup table (or device tree). Signed-off-by: Alexandre Belloni Acked-by: Laurent Pinchart Acked-by: Simon Horman Signed-off-by: Thierry Reding --- drivers/pwm/pwm-renesas-tpu.c | 19 +++---------------- include/linux/platform_data/pwm-renesas-tpu.h | 16 ---------------- 2 files changed, 3 insertions(+), 32 deletions(-) delete mode 100644 include/linux/platform_data/pwm-renesas-tpu.h (limited to 'include') diff --git a/drivers/pwm/pwm-renesas-tpu.c b/drivers/pwm/pwm-renesas-tpu.c index cc13ff4cfab0..3b71b42e89d5 100644 --- a/drivers/pwm/pwm-renesas-tpu.c +++ b/drivers/pwm/pwm-renesas-tpu.c @@ -21,13 +21,14 @@ #include #include #include -#include #include #include #include #include #include +#define TPU_CHANNEL_MAX 4 + #define TPU_TSTR 0x00 /* Timer start register (shared) */ #define TPU_TCRn 0x00 /* Timer control register */ @@ -87,7 +88,6 @@ struct tpu_pwm_device { struct tpu_device { struct platform_device *pdev; - enum pwm_polarity polarities[TPU_CHANNEL_MAX]; struct pwm_chip chip; spinlock_t lock; @@ -229,7 +229,7 @@ static int tpu_pwm_request(struct pwm_chip *chip, struct pwm_device *_pwm) pwm->tpu = tpu; pwm->channel = _pwm->hwpwm; - pwm->polarity = tpu->polarities[pwm->channel]; + pwm->polarity = PWM_POLARITY_NORMAL; pwm->prescaler = 0; pwm->period = 0; pwm->duty = 0; @@ -388,16 +388,6 @@ static const struct pwm_ops tpu_pwm_ops = { * Probe and remove */ -static void tpu_parse_pdata(struct tpu_device *tpu) -{ - struct tpu_pwm_platform_data *pdata = tpu->pdev->dev.platform_data; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(tpu->polarities); ++i) - tpu->polarities[i] = pdata ? pdata->channels[i].polarity - : PWM_POLARITY_NORMAL; -} - static int tpu_probe(struct platform_device *pdev) { struct tpu_device *tpu; @@ -411,9 +401,6 @@ static int tpu_probe(struct platform_device *pdev) spin_lock_init(&tpu->lock); tpu->pdev = pdev; - /* Initialize device configuration from platform data. */ - tpu_parse_pdata(tpu); - /* Map memory, get clock and pin control. */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); tpu->base = devm_ioremap_resource(&pdev->dev, res); diff --git a/include/linux/platform_data/pwm-renesas-tpu.h b/include/linux/platform_data/pwm-renesas-tpu.h deleted file mode 100644 index a7220b10ddab..000000000000 --- a/include/linux/platform_data/pwm-renesas-tpu.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef __PWM_RENESAS_TPU_H__ -#define __PWM_RENESAS_TPU_H__ - -#include - -#define TPU_CHANNEL_MAX 4 - -struct tpu_pwm_channel_data { - enum pwm_polarity polarity; -}; - -struct tpu_pwm_platform_data { - struct tpu_pwm_channel_data channels[TPU_CHANNEL_MAX]; -}; - -#endif /* __PWM_RENESAS_TPU_H__ */ -- cgit v1.2.3-71-gd317 From 4284402924cc55e182008ca7e9d4fb1e891ff5ae Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 19 May 2014 22:42:37 +0200 Subject: pwm: modify PWM_LOOKUP to initialize all struct pwm_lookup members Now that PWM_LOOKUP is not used anymore, modify it to initialize all the members of struct pwm_lookup. Signed-off-by: Alexandre Belloni Signed-off-by: Thierry Reding --- Documentation/pwm.txt | 3 ++- include/linux/pwm.h | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/Documentation/pwm.txt b/Documentation/pwm.txt index 0527f615b115..ca895fd211e4 100644 --- a/Documentation/pwm.txt +++ b/Documentation/pwm.txt @@ -19,7 +19,8 @@ should instead register a static mapping that can be used to match PWM consumers to providers, as given in the following example: static struct pwm_lookup board_pwm_lookup[] = { - PWM_LOOKUP("tegra-pwm", 0, "pwm-backlight", NULL), + PWM_LOOKUP("tegra-pwm", 0, "pwm-backlight", NULL, + 50000, PWM_POLARITY_NORMAL), }; static void __init board_init(void) diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 2f45e2fe5b93..e90628cac8fa 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -278,12 +278,14 @@ struct pwm_lookup { enum pwm_polarity polarity; }; -#define PWM_LOOKUP(_provider, _index, _dev_id, _con_id) \ +#define PWM_LOOKUP(_provider, _index, _dev_id, _con_id, _period, _polarity) \ { \ .provider = _provider, \ .index = _index, \ .dev_id = _dev_id, \ .con_id = _con_id, \ + .period = _period, \ + .polarity = _polarity \ } #if IS_ENABLED(CONFIG_PWM) -- cgit v1.2.3-71-gd317 From 89dd6a4b34c5a7658a4017c93b2fe66c964399d0 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 15 May 2014 15:32:12 +0200 Subject: drm/irq: Add kms-native crtc interface functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to start somewhere ... With this the only places left in i915 where we use pipe integers is in the interrupt handling code. And there it actually makes some amount of sense. v2: - Polish kerneldoc a bit (Thierry). - Drop "dev" parameter since it's unecessary. - Split out i915 changes (Thierry). Cc: Thierry Reding Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++ include/drm/drmP.h | 5 +++ 2 files changed, 82 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index f3dafccca456..5ba9a614e7ad 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -916,6 +916,8 @@ static int drm_vblank_enable(struct drm_device *dev, int crtc) * Acquire a reference count on vblank events to avoid having them disabled * while in use. * + * This is the legacy version of drm_crtc_vblank_get(). + * * Returns: * Zero on success, nonzero on failure. */ @@ -940,6 +942,24 @@ int drm_vblank_get(struct drm_device *dev, int crtc) } EXPORT_SYMBOL(drm_vblank_get); +/** + * drm_crtc_vblank_get - get a reference count on vblank events + * @crtc: which CRTC to own + * + * Acquire a reference count on vblank events to avoid having them disabled + * while in use. + * + * This is the native kms version of drm_vblank_off(). + * + * Returns: + * Zero on success, nonzero on failure. + */ +int drm_crtc_vblank_get(struct drm_crtc *crtc) +{ + return drm_vblank_get(crtc->dev, drm_crtc_index(crtc)); +} +EXPORT_SYMBOL(drm_crtc_vblank_get); + /** * drm_vblank_put - give up ownership of vblank events * @dev: DRM device @@ -947,6 +967,8 @@ EXPORT_SYMBOL(drm_vblank_get); * * Release ownership of a given vblank counter, turning off interrupts * if possible. Disable interrupts after drm_vblank_offdelay milliseconds. + * + * This is the legacy version of drm_crtc_vblank_put(). */ void drm_vblank_put(struct drm_device *dev, int crtc) { @@ -960,6 +982,21 @@ void drm_vblank_put(struct drm_device *dev, int crtc) } EXPORT_SYMBOL(drm_vblank_put); +/** + * drm_crtc_vblank_put - give up ownership of vblank events + * @crtc: which counter to give up + * + * Release ownership of a given vblank counter, turning off interrupts + * if possible. Disable interrupts after drm_vblank_offdelay milliseconds. + * + * This is the native kms version of drm_vblank_put(). + */ +void drm_crtc_vblank_put(struct drm_crtc *crtc) +{ + drm_vblank_put(crtc->dev, drm_crtc_index(crtc)); +} +EXPORT_SYMBOL(drm_crtc_vblank_put); + /** * drm_vblank_off - disable vblank events on a CRTC * @dev: DRM device @@ -971,6 +1008,8 @@ EXPORT_SYMBOL(drm_vblank_put); * * Drivers must use this function when the hardware vblank counter can get * reset, e.g. when suspending. + * + * This is the legacy version of drm_crtc_vblank_off(). */ void drm_vblank_off(struct drm_device *dev, int crtc) { @@ -1003,6 +1042,25 @@ void drm_vblank_off(struct drm_device *dev, int crtc) } EXPORT_SYMBOL(drm_vblank_off); +/** + * drm_crtc_vblank_off - disable vblank events on a CRTC + * @crtc: CRTC in question + * + * Drivers can use this function to shut down the vblank interrupt handling when + * disabling a crtc. This function ensures that the latest vblank frame count is + * stored so that drm_vblank_on can restore it again. + * + * Drivers must use this function when the hardware vblank counter can get + * reset, e.g. when suspending. + * + * This is the native kms version of drm_vblank_off(). + */ +void drm_crtc_vblank_off(struct drm_crtc *crtc) +{ + drm_vblank_off(crtc->dev, drm_crtc_index(crtc)); +} +EXPORT_SYMBOL(drm_crtc_vblank_off); + /** * drm_vblank_on - enable vblank events on a CRTC * @dev: DRM device @@ -1012,6 +1070,8 @@ EXPORT_SYMBOL(drm_vblank_off); * drm_vblank_off() again. Note that calls to drm_vblank_on() and * drm_vblank_off() can be unbalanced and so can also be unconditionaly called * in driver load code to reflect the current hardware state of the crtc. + * + * This is the legacy version of drm_crtc_vblank_on(). */ void drm_vblank_on(struct drm_device *dev, int crtc) { @@ -1025,6 +1085,23 @@ void drm_vblank_on(struct drm_device *dev, int crtc) } EXPORT_SYMBOL(drm_vblank_on); +/** + * drm_crtc_vblank_on - enable vblank events on a CRTC + * @crtc: CRTC in question + * + * This functions restores the vblank interrupt state captured with + * drm_vblank_off() again. Note that calls to drm_vblank_on() and + * drm_vblank_off() can be unbalanced and so can also be unconditionaly called + * in driver load code to reflect the current hardware state of the crtc. + * + * This is the native kms version of drm_vblank_on(). + */ +void drm_crtc_vblank_on(struct drm_crtc *crtc) +{ + drm_vblank_on(crtc->dev, drm_crtc_index(crtc)); +} +EXPORT_SYMBOL(drm_crtc_vblank_on); + /** * drm_vblank_pre_modeset - account for vblanks across mode sets * @dev: DRM device diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 7339b2b00724..76ccaabd0418 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1359,9 +1359,14 @@ extern void drm_send_vblank_event(struct drm_device *dev, int crtc, extern bool drm_handle_vblank(struct drm_device *dev, int crtc); extern int drm_vblank_get(struct drm_device *dev, int crtc); extern void drm_vblank_put(struct drm_device *dev, int crtc); +extern int drm_crtc_vblank_get(struct drm_crtc *crtc); +extern void drm_crtc_vblank_put(struct drm_crtc *crtc); extern void drm_vblank_off(struct drm_device *dev, int crtc); extern void drm_vblank_on(struct drm_device *dev, int crtc); +extern void drm_crtc_vblank_off(struct drm_crtc *crtc); +extern void drm_crtc_vblank_on(struct drm_crtc *crtc); extern void drm_vblank_cleanup(struct drm_device *dev); + extern u32 drm_get_last_vbltimestamp(struct drm_device *dev, int crtc, struct timeval *tvblank, unsigned flags); extern int drm_calc_vbltimestamp_from_scanoutpos(struct drm_device *dev, -- cgit v1.2.3-71-gd317 From 5b3e507820c6e120bc2680c0d35f9d9d81fcb98d Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Wed, 14 May 2014 14:58:08 -0300 Subject: mtd: nand: pxa3xx: Use ECC strength and step size devicetree binding This commit adds support for the user to specify the ECC strength and step size through the devicetree. We keep the previous behavior, when there is no DT parameter provided. Signed-off-by: Ezequiel Garcia Signed-off-by: Brian Norris --- drivers/mtd/nand/pxa3xx_nand.c | 17 +++++++++++++++-- include/linux/platform_data/mtd-nand-pxa3xx.h | 3 +++ 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c index 3b66a6460d67..2a9add06c2d5 100644 --- a/drivers/mtd/nand/pxa3xx_nand.c +++ b/drivers/mtd/nand/pxa3xx_nand.c @@ -1519,8 +1519,13 @@ KEEP_CONFIG: } } - ecc_strength = chip->ecc_strength_ds; - ecc_step = chip->ecc_step_ds; + if (pdata->ecc_strength && pdata->ecc_step_size) { + ecc_strength = pdata->ecc_strength; + ecc_step = pdata->ecc_step_size; + } else { + ecc_strength = chip->ecc_strength_ds; + ecc_step = chip->ecc_step_ds; + } /* Set default ECC strength requirements on non-ONFI devices */ if (ecc_strength < 1 && ecc_step < 1) { @@ -1729,6 +1734,14 @@ static int pxa3xx_nand_probe_dt(struct platform_device *pdev) of_property_read_u32(np, "num-cs", &pdata->num_cs); pdata->flash_bbt = of_get_nand_on_flash_bbt(np); + pdata->ecc_strength = of_get_nand_ecc_strength(np); + if (pdata->ecc_strength < 0) + pdata->ecc_strength = 0; + + pdata->ecc_step_size = of_get_nand_ecc_step_size(np); + if (pdata->ecc_step_size < 0) + pdata->ecc_step_size = 0; + pdev->dev.platform_data = pdata; return 0; diff --git a/include/linux/platform_data/mtd-nand-pxa3xx.h b/include/linux/platform_data/mtd-nand-pxa3xx.h index a94147124929..ac4ea2e641c7 100644 --- a/include/linux/platform_data/mtd-nand-pxa3xx.h +++ b/include/linux/platform_data/mtd-nand-pxa3xx.h @@ -58,6 +58,9 @@ struct pxa3xx_nand_platform_data { /* use an flash-based bad block table */ bool flash_bbt; + /* requested ECC strength and ECC step size */ + int ecc_strength, ecc_step_size; + const struct mtd_partition *parts[NUM_CHIP_SELECT]; unsigned int nr_parts[NUM_CHIP_SELECT]; -- cgit v1.2.3-71-gd317 From 5fe821a9dee241fa450703ab7015d970ee0cfb8d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 19 May 2014 14:56:14 -0700 Subject: net: filter: cleanup invocation of internal BPF Kernel API for classic BPF socket filters is: sk_unattached_filter_create() - validate classic BPF, convert, JIT SK_RUN_FILTER() - run it sk_unattached_filter_destroy() - destroy socket filter Cleanup internal BPF kernel API as following: sk_filter_select_runtime() - final step of internal BPF creation. Try to JIT internal BPF program, if JIT is not available select interpreter SK_RUN_FILTER() - run it sk_filter_free() - free internal BPF program Disallow direct calls to BPF interpreter. Execution of the BPF program should be done with SK_RUN_FILTER() macro. Example of internal BPF create, run, destroy: struct sk_filter *fp; fp = kzalloc(sk_filter_size(prog_len), GFP_KERNEL); memcpy(fp->insni, prog, prog_len * sizeof(fp->insni[0])); fp->len = prog_len; sk_filter_select_runtime(fp); SK_RUN_FILTER(fp, ctx); sk_filter_free(fp); Sockets, seccomp, testsuite, tracing are using different ways to populate sk_filter, so first steps of program creation are not common. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/filter.h | 6 ++---- kernel/seccomp.c | 6 ++---- lib/test_bpf.c | 4 ++-- net/core/filter.c | 44 ++++++++++++++++++++++++++++---------------- 4 files changed, 34 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 9d5ae0a2c954..7977b3958e25 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -184,10 +184,8 @@ static inline unsigned int sk_filter_size(unsigned int proglen) int sk_filter(struct sock *sk, struct sk_buff *skb); -u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx, - const struct sock_filter_int *insni); -u32 sk_run_filter_int_skb(const struct sk_buff *ctx, - const struct sock_filter_int *insni); +void sk_filter_select_runtime(struct sk_filter *fp); +void sk_filter_free(struct sk_filter *fp); int sk_convert_filter(struct sock_filter *prog, int len, struct sock_filter_int *new_prog, int *new_len); diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 7e02d624cc50..1036b6f2fded 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -273,10 +273,8 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) atomic_set(&filter->usage, 1); filter->prog->len = new_len; - filter->prog->bpf_func = (void *)sk_run_filter_int_seccomp; - /* JIT internal BPF into native HW instructions */ - bpf_int_jit_compile(filter->prog); + sk_filter_select_runtime(filter->prog); /* * If there is an existing filter, make it the prev and don't drop its @@ -340,7 +338,7 @@ void put_seccomp_filter(struct task_struct *tsk) while (orig && atomic_dec_and_test(&orig->usage)) { struct seccomp_filter *freeme = orig; orig = orig->prev; - bpf_jit_free(freeme->prog); + sk_filter_free(freeme->prog); kfree(freeme); } } diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 3603ebcd5d65..e160934430eb 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -1489,7 +1489,7 @@ static __init int test_bpf(void) memcpy(fp_ext->insns, tests[i].insns_int, fprog.len * 8); fp->len = fprog.len; - fp->bpf_func = sk_run_filter_int_skb; + sk_filter_select_runtime(fp); } else { err = sk_unattached_filter_create(&fp, &fprog); if (tests[i].data_type == EXPECTED_FAIL) { @@ -1516,7 +1516,7 @@ static __init int test_bpf(void) if (tests[i].data_type != SKB_INT) sk_unattached_filter_destroy(fp); else - kfree(fp); + sk_filter_free(fp); if (err) { pr_cont("FAIL %d\n", err); diff --git a/net/core/filter.c b/net/core/filter.c index 32c5b44c537e..7067cb240d3e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -153,7 +153,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) * keep, 0 for none. @ctx is the data we are operating on, @insn is the * array of filter instructions. */ -unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn) +static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn) { u64 stack[MAX_BPF_STACK / sizeof(u64)]; u64 regs[MAX_BPF_REG], tmp; @@ -571,15 +571,6 @@ load_byte: return 0; } -u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx, - const struct sock_filter_int *insni) - __attribute__ ((alias ("__sk_run_filter"))); - -u32 sk_run_filter_int_skb(const struct sk_buff *ctx, - const struct sock_filter_int *insni) - __attribute__ ((alias ("__sk_run_filter"))); -EXPORT_SYMBOL_GPL(sk_run_filter_int_skb); - /* Helper to find the offset of pkt_type in sk_buff structure. We want * to make sure its still a 3bit field starting at a byte boundary; * taken from arch/x86/net/bpf_jit_comp.c. @@ -1397,7 +1388,7 @@ static void sk_filter_release_rcu(struct rcu_head *rcu) struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); sk_release_orig_filter(fp); - bpf_jit_free(fp); + sk_filter_free(fp); } /** @@ -1497,7 +1488,6 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, goto out_err_free; } - fp->bpf_func = sk_run_filter_int_skb; fp->len = new_len; /* 2nd pass: remap sock_filter insns into sock_filter_int insns. */ @@ -1510,6 +1500,8 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, */ goto out_err_free; + sk_filter_select_runtime(fp); + kfree(old_prog); return fp; @@ -1528,6 +1520,29 @@ void __weak bpf_int_jit_compile(struct sk_filter *prog) { } +/** + * sk_filter_select_runtime - select execution runtime for BPF program + * @fp: sk_filter populated with internal BPF program + * + * try to JIT internal BPF program, if JIT is not available select interpreter + * BPF program will be executed via SK_RUN_FILTER() macro + */ +void sk_filter_select_runtime(struct sk_filter *fp) +{ + fp->bpf_func = (void *) __sk_run_filter; + + /* Probe if internal BPF can be JITed */ + bpf_int_jit_compile(fp); +} +EXPORT_SYMBOL_GPL(sk_filter_select_runtime); + +/* free internal BPF program */ +void sk_filter_free(struct sk_filter *fp) +{ + bpf_jit_free(fp); +} +EXPORT_SYMBOL_GPL(sk_filter_free); + static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, struct sock *sk) { @@ -1548,12 +1563,9 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, /* JIT compiler couldn't process this filter, so do the * internal BPF translation for the optimized interpreter. */ - if (!fp->jited) { + if (!fp->jited) fp = __sk_migrate_filter(fp, sk); - /* Probe if internal BPF can be jit-ed */ - bpf_int_jit_compile(fp); - } return fp; } -- cgit v1.2.3-71-gd317 From ba730340f96c01160b5f26f81e8fb38f8cb1821c Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Thu, 15 May 2014 18:15:31 +0400 Subject: dmaengine: fix comment typo Fix comment typo. Signed-off-by: Alexander Popov Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 8300fb87b84a..cbb168e04dc1 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -292,7 +292,7 @@ struct dma_chan_dev { }; /** - * enum dma_slave_buswidth - defines bus with of the DMA slave + * enum dma_slave_buswidth - defines bus width of the DMA slave * device, source or target buses */ enum dma_slave_buswidth { -- cgit v1.2.3-71-gd317 From 67af9811539be83dbdc0739215d29af23c870405 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 18 May 2014 10:15:24 +0300 Subject: cfg80211: allow RSSI compensation Channels in 2.4GHz band overlap, this means that if we send a probe request on channel 1 and then move to channel 2, we will hear the probe response on channel 2. In this case, the RSSI will be lower than if we had heard it on the channel on which it was sent (1 in this case). The firmware / low level driver can parse the channel in the DS IE or HT IE and compensate the RSSI so that it will still have a valid value even if we heard the frame on an adjacent channel. This can be done up to a certain offset. Add this offset as a configuration for the low level driver. A low level driver that can compensate the low RSSI in this case should assign the maximal offset for which the RSSI value is still valid. Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 7 +++++++ net/wireless/scan.c | 12 ++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a75fabd18502..920ec8c1ce54 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2961,6 +2961,12 @@ struct wiphy_vendor_command { * and probe responses. This value should be set if the driver * wishes to limit the number of csa counters. Default (0) means * infinite. + * @max_adj_channel_rssi_comp: max offset of between the channel on which the + * frame was sent and the channel on which the frame was heard for which + * the reported rssi is still valid. If a driver is able to compensate the + * low rssi when a frame is heard on different channel, then it should set + * this variable to the maximal offset for which it can compensate. + * This value should be set in MHz. */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -3079,6 +3085,7 @@ struct wiphy { u16 max_ap_assoc_sta; u8 max_num_csa_counters; + u8 max_adj_channel_rssi_comp; char priv[0] __aligned(NETDEV_ALIGN); }; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 0f5da18cc619..77c56eef0574 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -883,6 +883,7 @@ cfg80211_inform_bss_width(struct wiphy *wiphy, struct cfg80211_bss_ies *ies; struct ieee80211_channel *channel; struct cfg80211_internal_bss tmp = {}, *res; + bool signal_valid; if (WARN_ON(!wiphy)) return NULL; @@ -919,8 +920,9 @@ cfg80211_inform_bss_width(struct wiphy *wiphy, rcu_assign_pointer(tmp.pub.beacon_ies, ies); rcu_assign_pointer(tmp.pub.ies, ies); - res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, - rx_channel == channel); + signal_valid = abs(rx_channel->center_freq - channel->center_freq) <= + wiphy->max_adj_channel_rssi_comp; + res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid); if (!res) return NULL; @@ -944,6 +946,7 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy, struct cfg80211_internal_bss tmp = {}, *res; struct cfg80211_bss_ies *ies; struct ieee80211_channel *channel; + bool signal_valid; size_t ielen = len - offsetof(struct ieee80211_mgmt, u.probe_resp.variable); @@ -991,8 +994,9 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy, tmp.pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int); tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info); - res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, - rx_channel == channel); + signal_valid = abs(rx_channel->center_freq - channel->center_freq) <= + wiphy->max_adj_channel_rssi_comp; + res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid); if (!res) return NULL; -- cgit v1.2.3-71-gd317 From bf3b5ec66bd03d66e9ea729aaca013ea1047a797 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 25 Apr 2014 12:55:30 +0100 Subject: mmc: sdio_irq: rework sdio irq handling Rather than the SDIO support spawning it's own thread for handling card interrupts, use the generic IRQ infrastructure for this, triggering it from the host interface's interrupt handling directly. This avoids a race between the parent thread waiting to receive an interrupt response from the card, and the slow startup from the sdio irq thread, which can occur as a result of high system load (eg, while udev is running.) Signed-off-by: Russell King Tested-by: Markus Pargmann Tested-by: Stephen Warren [Ulf Hansson] Resolved conflict Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- drivers/mmc/core/sdio_irq.c | 41 +++++++++++++++++++++++++++++++---------- include/linux/mmc/host.h | 3 +++ 2 files changed, 34 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/mmc/core/sdio_irq.c b/drivers/mmc/core/sdio_irq.c index aaa90460ed23..5cc13c8d35bb 100644 --- a/drivers/mmc/core/sdio_irq.c +++ b/drivers/mmc/core/sdio_irq.c @@ -90,6 +90,15 @@ static int process_sdio_pending_irqs(struct mmc_host *host) return ret; } +void sdio_run_irqs(struct mmc_host *host) +{ + mmc_claim_host(host); + host->sdio_irq_pending = true; + process_sdio_pending_irqs(host); + mmc_release_host(host); +} +EXPORT_SYMBOL_GPL(sdio_run_irqs); + static int sdio_irq_thread(void *_host) { struct mmc_host *host = _host; @@ -189,14 +198,20 @@ static int sdio_card_irq_get(struct mmc_card *card) WARN_ON(!host->claimed); if (!host->sdio_irqs++) { - atomic_set(&host->sdio_irq_thread_abort, 0); - host->sdio_irq_thread = - kthread_run(sdio_irq_thread, host, "ksdioirqd/%s", - mmc_hostname(host)); - if (IS_ERR(host->sdio_irq_thread)) { - int err = PTR_ERR(host->sdio_irq_thread); - host->sdio_irqs--; - return err; + if (!(host->caps2 & MMC_CAP2_SDIO_IRQ_NOTHREAD)) { + atomic_set(&host->sdio_irq_thread_abort, 0); + host->sdio_irq_thread = + kthread_run(sdio_irq_thread, host, + "ksdioirqd/%s", mmc_hostname(host)); + if (IS_ERR(host->sdio_irq_thread)) { + int err = PTR_ERR(host->sdio_irq_thread); + host->sdio_irqs--; + return err; + } + } else { + mmc_host_clk_hold(host); + host->ops->enable_sdio_irq(host, 1); + mmc_host_clk_release(host); } } @@ -211,8 +226,14 @@ static int sdio_card_irq_put(struct mmc_card *card) BUG_ON(host->sdio_irqs < 1); if (!--host->sdio_irqs) { - atomic_set(&host->sdio_irq_thread_abort, 1); - kthread_stop(host->sdio_irq_thread); + if (!(host->caps2 & MMC_CAP2_SDIO_IRQ_NOTHREAD)) { + atomic_set(&host->sdio_irq_thread_abort, 1); + kthread_stop(host->sdio_irq_thread); + } else { + mmc_host_clk_hold(host); + host->ops->enable_sdio_irq(host, 0); + mmc_host_clk_release(host); + } } return 0; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index cd595275e118..7960424d0bc0 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -282,6 +282,7 @@ struct mmc_host { #define MMC_CAP2_HS400_1_2V (1 << 16) /* Can support HS400 1.2V */ #define MMC_CAP2_HS400 (MMC_CAP2_HS400_1_8V | \ MMC_CAP2_HS400_1_2V) +#define MMC_CAP2_SDIO_IRQ_NOTHREAD (1 << 17) mmc_pm_flag_t pm_caps; /* supported pm features */ @@ -397,6 +398,8 @@ static inline void mmc_signal_sdio_irq(struct mmc_host *host) wake_up_process(host->sdio_irq_thread); } +void sdio_run_irqs(struct mmc_host *host); + #ifdef CONFIG_REGULATOR int mmc_regulator_get_ocrmask(struct regulator *supply); int mmc_regulator_set_ocr(struct mmc_host *mmc, -- cgit v1.2.3-71-gd317 From 781e989cf593c71d26bdca74f5e77b3651fc060e Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 25 Apr 2014 12:55:46 +0100 Subject: mmc: sdhci: convert to new SDIO IRQ handling Use a generic threaded interrupt handler for SDIO interrupt handling, rather than allowing the SDIO core code to buggily spawn its own thread. This results in host drivers to be more in control of how SDIO interrupts are acknowledged in the hardware, rather than having the internals of the SDIO core placed upon them, possibly resulting in sub-standard handling. At least one SDHCI implementation specifies a very specific sequence to deal with a card interrupt. Signed-off-by: Russell King Tested-by: Markus Pargmann Tested-by: Stephen Warren Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci.c | 62 +++++++++++++++++++++++++++++------------------ include/linux/mmc/sdhci.h | 2 ++ 2 files changed, 41 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 4b6cca2130bc..4a0622d52ae5 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2428,10 +2428,10 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask) static irqreturn_t sdhci_irq(int irq, void *dev_id) { - irqreturn_t result; + irqreturn_t result = IRQ_NONE; struct sdhci_host *host = dev_id; u32 intmask, mask, unexpected = 0; - int cardint = 0, max_loops = 16; + int max_loops = 16; spin_lock(&host->lock); @@ -2490,8 +2490,11 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id) pr_err("%s: Card is consuming too much power!\n", mmc_hostname(host->mmc)); - if (intmask & SDHCI_INT_CARD_INT) - cardint = 1; + if (intmask & SDHCI_INT_CARD_INT) { + sdhci_enable_sdio_irq_nolock(host, false); + host->thread_isr |= SDHCI_INT_CARD_INT; + result = IRQ_WAKE_THREAD; + } intmask &= ~(SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE | SDHCI_INT_CMD_MASK | SDHCI_INT_DATA_MASK | @@ -2503,17 +2506,10 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id) sdhci_writel(host, intmask, SDHCI_INT_STATUS); } - result = IRQ_HANDLED; + if (result == IRQ_NONE) + result = IRQ_HANDLED; intmask = sdhci_readl(host, SDHCI_INT_STATUS); - - /* - * If we know we'll call the driver to signal SDIO IRQ, - * disregard further indications of Card Interrupt in - * the status to avoid a needless loop. - */ - if (cardint) - intmask &= ~SDHCI_INT_CARD_INT; } while (intmask && --max_loops); out: spin_unlock(&host->lock); @@ -2523,15 +2519,33 @@ out: mmc_hostname(host->mmc), unexpected); sdhci_dumpregs(host); } - /* - * We have to delay this as it calls back into the driver. - */ - if (cardint) - mmc_signal_sdio_irq(host->mmc); return result; } +static irqreturn_t sdhci_thread_irq(int irq, void *dev_id) +{ + struct sdhci_host *host = dev_id; + unsigned long flags; + u32 isr; + + spin_lock_irqsave(&host->lock, flags); + isr = host->thread_isr; + host->thread_isr = 0; + spin_unlock_irqrestore(&host->lock, flags); + + if (isr & SDHCI_INT_CARD_INT) { + sdio_run_irqs(host->mmc); + + spin_lock_irqsave(&host->lock, flags); + if (host->flags & SDHCI_SDIO_IRQ_ENABLED) + sdhci_enable_sdio_irq_nolock(host, true); + spin_unlock_irqrestore(&host->lock, flags); + } + + return isr ? IRQ_HANDLED : IRQ_NONE; +} + /*****************************************************************************\ * * * Suspend/resume * @@ -2601,8 +2615,9 @@ int sdhci_resume_host(struct sdhci_host *host) } if (!device_may_wakeup(mmc_dev(host->mmc))) { - ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED, - mmc_hostname(host->mmc), host); + ret = request_threaded_irq(host->irq, sdhci_irq, + sdhci_thread_irq, IRQF_SHARED, + mmc_hostname(host->mmc), host); if (ret) return ret; } else { @@ -2681,7 +2696,7 @@ int sdhci_runtime_suspend_host(struct sdhci_host *host) sdhci_mask_irqs(host, SDHCI_INT_ALL_MASK); spin_unlock_irqrestore(&host->lock, flags); - synchronize_irq(host->irq); + synchronize_hardirq(host->irq); spin_lock_irqsave(&host->lock, flags); host->runtime_suspended = true; @@ -2937,6 +2952,7 @@ int sdhci_add_host(struct sdhci_host *host) mmc->max_busy_timeout = (1 << 27) / host->timeout_clk; mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE | MMC_CAP_CMD23; + mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD; if (host->quirks & SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12) host->flags |= SDHCI_AUTO_CMD12; @@ -3226,8 +3242,8 @@ int sdhci_add_host(struct sdhci_host *host) sdhci_init(host, 0); - ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED, - mmc_hostname(mmc), host); + ret = request_threaded_irq(host->irq, sdhci_irq, sdhci_thread_irq, + IRQF_SHARED, mmc_hostname(mmc), host); if (ret) { pr_err("%s: Failed to request IRQ %d: %d\n", mmc_hostname(mmc), host->irq, ret); diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index 7be12b883485..d1aa97b77dd9 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -177,6 +177,8 @@ struct sdhci_host { unsigned int ocr_avail_mmc; u32 ocr_mask; /* available voltages */ + u32 thread_isr; + wait_queue_head_t buf_ready_int; /* Waitqueue for Buffer Read Ready interrupt */ unsigned int tuning_done; /* Condition flag set when CMD19 succeeds */ -- cgit v1.2.3-71-gd317 From 3560db8e247aa35bc6b287ec7ec51cd41abd512e Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 25 Apr 2014 12:55:51 +0100 Subject: mmc: sdhci: push card_tasklet into threaded irq handler There's no requirement to have the card tasklet separate now that we have a threaded interrupt handler, so kill this and move the called code into the threaded part of the handler. Signed-off-by: Russell King Tested-by: Markus Pargmann Tested-by: Stephen Warren Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci.c | 23 +++++++++-------------- include/linux/mmc/sdhci.h | 3 +-- 2 files changed, 10 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 4a0622d52ae5..8def3919b32c 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2128,15 +2128,6 @@ static const struct mmc_host_ops sdhci_ops = { * * \*****************************************************************************/ -static void sdhci_tasklet_card(unsigned long param) -{ - struct sdhci_host *host = (struct sdhci_host*)param; - - sdhci_card_event(host->mmc); - - mmc_detect_change(host->mmc, msecs_to_jiffies(200)); -} - static void sdhci_tasklet_finish(unsigned long param) { struct sdhci_host *host; @@ -2477,7 +2468,10 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id) sdhci_writel(host, intmask & (SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE), SDHCI_INT_STATUS); - tasklet_schedule(&host->card_tasklet); + + host->thread_isr |= intmask & (SDHCI_INT_CARD_INSERT | + SDHCI_INT_CARD_REMOVE); + result = IRQ_WAKE_THREAD; } if (intmask & SDHCI_INT_CMD_MASK) @@ -2534,6 +2528,11 @@ static irqreturn_t sdhci_thread_irq(int irq, void *dev_id) host->thread_isr = 0; spin_unlock_irqrestore(&host->lock, flags); + if (isr & (SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE)) { + sdhci_card_event(host->mmc); + mmc_detect_change(host->mmc, msecs_to_jiffies(200)); + } + if (isr & SDHCI_INT_CARD_INT) { sdio_run_irqs(host->mmc); @@ -3224,8 +3223,6 @@ int sdhci_add_host(struct sdhci_host *host) /* * Init tasklets. */ - tasklet_init(&host->card_tasklet, - sdhci_tasklet_card, (unsigned long)host); tasklet_init(&host->finish_tasklet, sdhci_tasklet_finish, (unsigned long)host); @@ -3290,7 +3287,6 @@ reset: free_irq(host->irq, host); #endif untasklet: - tasklet_kill(&host->card_tasklet); tasklet_kill(&host->finish_tasklet); return ret; @@ -3334,7 +3330,6 @@ void sdhci_remove_host(struct sdhci_host *host, int dead) del_timer_sync(&host->timer); - tasklet_kill(&host->card_tasklet); tasklet_kill(&host->finish_tasklet); if (host->vmmc) { diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index d1aa97b77dd9..f1c8e14e8751 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -164,8 +164,7 @@ struct sdhci_host { dma_addr_t adma_addr; /* Mapped ADMA descr. table */ dma_addr_t align_addr; /* Mapped bounce buffer */ - struct tasklet_struct card_tasklet; /* Tasklet structures */ - struct tasklet_struct finish_tasklet; + struct tasklet_struct finish_tasklet; /* Tasklet structures */ struct timer_list timer; /* Timer for timeouts */ -- cgit v1.2.3-71-gd317 From b537f94ce19583de1882f539a5cc49aa99260aca Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 25 Apr 2014 12:56:01 +0100 Subject: mmc: sdhci: more efficient interrupt enable register handling Rather than wasting cycles read-modify-writing the interrupt enable registers, cache the value locally instead. Signed-off-by: Russell King Tested-by: Markus Pargmann Tested-by: Stephen Warren Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci.c | 98 +++++++++++++++++++++++------------------------ include/linux/mmc/sdhci.h | 3 ++ 2 files changed, 50 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 0ecbcc4c29d2..4a98ee29d136 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -131,27 +131,6 @@ static void sdhci_dumpregs(struct sdhci_host *host) * * \*****************************************************************************/ -static void sdhci_clear_set_irqs(struct sdhci_host *host, u32 clear, u32 set) -{ - u32 ier; - - ier = sdhci_readl(host, SDHCI_INT_ENABLE); - ier &= ~clear; - ier |= set; - sdhci_writel(host, ier, SDHCI_INT_ENABLE); - sdhci_writel(host, ier, SDHCI_SIGNAL_ENABLE); -} - -static void sdhci_unmask_irqs(struct sdhci_host *host, u32 irqs) -{ - sdhci_clear_set_irqs(host, 0, irqs); -} - -static void sdhci_mask_irqs(struct sdhci_host *host, u32 irqs) -{ - sdhci_clear_set_irqs(host, irqs, 0); -} - static void sdhci_set_card_detection(struct sdhci_host *host, bool enable) { u32 present, irqs; @@ -165,9 +144,12 @@ static void sdhci_set_card_detection(struct sdhci_host *host, bool enable) irqs = present ? SDHCI_INT_CARD_REMOVE : SDHCI_INT_CARD_INSERT; if (enable) - sdhci_unmask_irqs(host, irqs); + host->ier |= irqs; else - sdhci_mask_irqs(host, irqs); + host->ier &= ~irqs; + + sdhci_writel(host, host->ier, SDHCI_INT_ENABLE); + sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE); } static void sdhci_enable_card_detection(struct sdhci_host *host) @@ -183,17 +165,12 @@ static void sdhci_disable_card_detection(struct sdhci_host *host) static void sdhci_reset(struct sdhci_host *host, u8 mask) { unsigned long timeout; - u32 uninitialized_var(ier); - if (host->quirks & SDHCI_QUIRK_NO_CARD_NO_RESET) { if (!(sdhci_readl(host, SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT)) return; } - if (host->quirks & SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET) - ier = sdhci_readl(host, SDHCI_INT_ENABLE); - if (host->ops->platform_reset_enter) host->ops->platform_reset_enter(host, mask); @@ -224,8 +201,10 @@ static void sdhci_reset(struct sdhci_host *host, u8 mask) if (host->ops->platform_reset_exit) host->ops->platform_reset_exit(host, mask); - if (host->quirks & SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET) - sdhci_clear_set_irqs(host, SDHCI_INT_ALL_MASK, ier); + if (host->quirks & SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET) { + sdhci_writel(host, host->ier, SDHCI_INT_ENABLE); + sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE); + } if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) { if ((host->ops->enable_dma) && (mask & SDHCI_RESET_ALL)) @@ -242,11 +221,14 @@ static void sdhci_init(struct sdhci_host *host, int soft) else sdhci_reset(host, SDHCI_RESET_ALL); - sdhci_clear_set_irqs(host, SDHCI_INT_ALL_MASK, - SDHCI_INT_BUS_POWER | SDHCI_INT_DATA_END_BIT | - SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_TIMEOUT | SDHCI_INT_INDEX | - SDHCI_INT_END_BIT | SDHCI_INT_CRC | SDHCI_INT_TIMEOUT | - SDHCI_INT_DATA_END | SDHCI_INT_RESPONSE); + host->ier = SDHCI_INT_BUS_POWER | SDHCI_INT_DATA_END_BIT | + SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_TIMEOUT | + SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC | + SDHCI_INT_TIMEOUT | SDHCI_INT_DATA_END | + SDHCI_INT_RESPONSE; + + sdhci_writel(host, host->ier, SDHCI_INT_ENABLE); + sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE); if (soft) { /* force clock reconfiguration */ @@ -721,9 +703,12 @@ static void sdhci_set_transfer_irqs(struct sdhci_host *host) u32 dma_irqs = SDHCI_INT_DMA_END | SDHCI_INT_ADMA_ERROR; if (host->flags & SDHCI_REQ_USE_DMA) - sdhci_clear_set_irqs(host, pio_irqs, dma_irqs); + host->ier = (host->ier & ~pio_irqs) | dma_irqs; else - sdhci_clear_set_irqs(host, dma_irqs, pio_irqs); + host->ier = (host->ier & ~dma_irqs) | pio_irqs; + + sdhci_writel(host, host->ier, SDHCI_INT_ENABLE); + sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE); } static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd) @@ -1713,9 +1698,12 @@ static void sdhci_enable_sdio_irq_nolock(struct sdhci_host *host, int enable) { if (!(host->flags & SDHCI_DEVICE_DEAD)) { if (enable) - sdhci_unmask_irqs(host, SDHCI_INT_CARD_INT); + host->ier |= SDHCI_INT_CARD_INT; else - sdhci_mask_irqs(host, SDHCI_INT_CARD_INT); + host->ier &= ~SDHCI_INT_CARD_INT; + + sdhci_writel(host, host->ier, SDHCI_INT_ENABLE); + sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE); mmiowb(); } } @@ -1857,7 +1845,6 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode) { struct sdhci_host *host; u16 ctrl; - u32 ier; int tuning_loop_counter = MAX_TUNING_LOOP; unsigned long timeout; int err = 0; @@ -1911,8 +1898,8 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode) * to make sure we don't hit a controller bug, we _only_ * enable Buffer Read Ready interrupt here. */ - ier = sdhci_readl(host, SDHCI_INT_ENABLE); - sdhci_clear_set_irqs(host, ier, SDHCI_INT_DATA_AVAIL); + sdhci_writel(host, SDHCI_INT_DATA_AVAIL, SDHCI_INT_ENABLE); + sdhci_writel(host, SDHCI_INT_DATA_AVAIL, SDHCI_SIGNAL_ENABLE); /* * Issue CMD19 repeatedly till Execute Tuning is set to 0 or the number @@ -2047,7 +2034,8 @@ out: if (err && (host->flags & SDHCI_USING_RETUNING_TIMER)) err = 0; - sdhci_clear_set_irqs(host, SDHCI_INT_DATA_AVAIL, ier); + sdhci_writel(host, host->ier, SDHCI_INT_ENABLE); + sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE); spin_unlock_irqrestore(&host->lock, flags); sdhci_runtime_pm_put(host); @@ -2460,10 +2448,12 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id) * More testing are needed here to ensure it works * for other platforms though. */ - sdhci_mask_irqs(host, present ? SDHCI_INT_CARD_INSERT : - SDHCI_INT_CARD_REMOVE); - sdhci_unmask_irqs(host, present ? SDHCI_INT_CARD_REMOVE : - SDHCI_INT_CARD_INSERT); + host->ier &= ~(SDHCI_INT_CARD_INSERT | + SDHCI_INT_CARD_REMOVE); + host->ier |= present ? SDHCI_INT_CARD_REMOVE : + SDHCI_INT_CARD_INSERT; + sdhci_writel(host, host->ier, SDHCI_INT_ENABLE); + sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE); sdhci_writel(host, intmask & (SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE), SDHCI_INT_STATUS); @@ -2592,7 +2582,9 @@ int sdhci_suspend_host(struct sdhci_host *host) } if (!device_may_wakeup(mmc_dev(host->mmc))) { - sdhci_mask_irqs(host, SDHCI_INT_ALL_MASK); + host->ier = 0; + sdhci_writel(host, 0, SDHCI_INT_ENABLE); + sdhci_writel(host, 0, SDHCI_SIGNAL_ENABLE); free_irq(host->irq, host); } else { sdhci_enable_irq_wakeups(host); @@ -2691,7 +2683,9 @@ int sdhci_runtime_suspend_host(struct sdhci_host *host) } spin_lock_irqsave(&host->lock, flags); - sdhci_mask_irqs(host, SDHCI_INT_ALL_MASK & ~SDHCI_INT_CARD_INT); + host->ier &= SDHCI_INT_CARD_INT; + sdhci_writel(host, host->ier, SDHCI_INT_ENABLE); + sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE); spin_unlock_irqrestore(&host->lock, flags); synchronize_hardirq(host->irq); @@ -3282,7 +3276,8 @@ int sdhci_add_host(struct sdhci_host *host) #ifdef SDHCI_USE_LEDS_CLASS reset: sdhci_reset(host, SDHCI_RESET_ALL); - sdhci_mask_irqs(host, SDHCI_INT_ALL_MASK); + sdhci_writel(host, 0, SDHCI_INT_ENABLE); + sdhci_writel(host, 0, SDHCI_SIGNAL_ENABLE); free_irq(host->irq, host); #endif untasklet: @@ -3324,7 +3319,8 @@ void sdhci_remove_host(struct sdhci_host *host, int dead) if (!dead) sdhci_reset(host, SDHCI_RESET_ALL); - sdhci_mask_irqs(host, SDHCI_INT_ALL_MASK); + sdhci_writel(host, 0, SDHCI_INT_ENABLE); + sdhci_writel(host, 0, SDHCI_SIGNAL_ENABLE); free_irq(host->irq, host); del_timer_sync(&host->timer); diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index f1c8e14e8751..9361d8ef509d 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -178,6 +178,9 @@ struct sdhci_host { u32 thread_isr; + /* cached registers */ + u32 ier; + wait_queue_head_t buf_ready_int; /* Waitqueue for Buffer Read Ready interrupt */ unsigned int tuning_done; /* Condition flag set when CMD19 succeeds */ -- cgit v1.2.3-71-gd317 From 0718e59ae259f7c48155b4e852d8b0632d59028e Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 25 Apr 2014 12:57:18 +0100 Subject: mmc: sdhci: move FSL ESDHC reset handling quirk into esdhc code The Freescale esdhc driver is the only driver which needs the interrupt registers restored after a reset. Move this quirk to be part of the ESDHC driver implementation. Signed-off-by: Russell King Tested-by: Markus Pargmann Tested-by: Stephen Warren Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci-esdhc-imx.c | 10 +++++++++- drivers/mmc/host/sdhci-esdhc.h | 3 +-- drivers/mmc/host/sdhci.c | 5 ----- include/linux/mmc/sdhci.h | 2 -- 4 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index b1d74fa33c5f..812c5772d900 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -876,6 +876,14 @@ static int esdhc_set_uhs_signaling(struct sdhci_host *host, unsigned int uhs) return esdhc_change_pinstate(host, uhs); } +static void esdhc_reset(struct sdhci_host *host, u8 mask) +{ + sdhci_reset(host, mask); + + sdhci_writel(host, host->ier, SDHCI_INT_ENABLE); + sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE); +} + static struct sdhci_ops sdhci_esdhc_ops = { .read_l = esdhc_readl_le, .read_w = esdhc_readw_le, @@ -888,7 +896,7 @@ static struct sdhci_ops sdhci_esdhc_ops = { .get_ro = esdhc_pltfm_get_ro, .set_bus_width = esdhc_pltfm_set_bus_width, .set_uhs_signaling = esdhc_set_uhs_signaling, - .reset = sdhci_reset, + .reset = esdhc_reset, }; static const struct sdhci_pltfm_data sdhci_esdhc_imx_pdata = { diff --git a/drivers/mmc/host/sdhci-esdhc.h b/drivers/mmc/host/sdhci-esdhc.h index a7d9f95a7b03..de69bddc3afc 100644 --- a/drivers/mmc/host/sdhci-esdhc.h +++ b/drivers/mmc/host/sdhci-esdhc.h @@ -22,8 +22,7 @@ SDHCI_QUIRK_NO_BUSY_IRQ | \ SDHCI_QUIRK_NONSTANDARD_CLOCK | \ SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK | \ - SDHCI_QUIRK_PIO_NEEDS_DELAY | \ - SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET) + SDHCI_QUIRK_PIO_NEEDS_DELAY) #define ESDHC_SYSTEM_CONTROL 0x2c #define ESDHC_CLOCK_MASK 0x0000fff0 diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 5e25147e92f7..074157e8e73d 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -203,11 +203,6 @@ static void sdhci_do_reset(struct sdhci_host *host, u8 mask) host->ops->reset(host, mask); - if (host->quirks & SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET) { - sdhci_writel(host, host->ier, SDHCI_INT_ENABLE); - sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE); - } - if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) { if ((host->ops->enable_dma) && (mask & SDHCI_RESET_ALL)) host->ops->enable_dma(host); diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index 9361d8ef509d..02919ef99419 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -61,8 +61,6 @@ struct sdhci_host { #define SDHCI_QUIRK_NONSTANDARD_CLOCK (1<<17) /* Controller does not like fast PIO transfers */ #define SDHCI_QUIRK_PIO_NEEDS_DELAY (1<<18) -/* Controller losing signal/interrupt enable states after reset */ -#define SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET (1<<19) /* Controller has to be forced to use block size of 2048 bytes */ #define SDHCI_QUIRK_FORCE_BLK_SZ_2048 (1<<20) /* Controller cannot do multi-block transfers */ -- cgit v1.2.3-71-gd317 From 1771059cf5f9c09e37ef6315df8acf120f2642fc Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 25 Apr 2014 12:58:55 +0100 Subject: mmc: sdhci: convert sdhci_set_clock() into a library function Signed-off-by: Russell King Tested-by: Markus Pargmann Tested-by: Stephen Warren Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci-acpi.c | 2 ++ drivers/mmc/host/sdhci-bcm-kona.c | 1 + drivers/mmc/host/sdhci-bcm2835.c | 1 + drivers/mmc/host/sdhci-cns3xxx.c | 3 +-- drivers/mmc/host/sdhci-dove.c | 1 + drivers/mmc/host/sdhci-esdhc.h | 1 - drivers/mmc/host/sdhci-of-arasan.c | 1 + drivers/mmc/host/sdhci-of-hlwd.c | 1 + drivers/mmc/host/sdhci-pci.c | 1 + drivers/mmc/host/sdhci-pltfm.c | 1 + drivers/mmc/host/sdhci-pxav2.c | 1 + drivers/mmc/host/sdhci-pxav3.c | 1 + drivers/mmc/host/sdhci-s3c.c | 19 ++++++++++++++----- drivers/mmc/host/sdhci-sirf.c | 1 + drivers/mmc/host/sdhci-spear.c | 1 + drivers/mmc/host/sdhci-tegra.c | 1 + drivers/mmc/host/sdhci.c | 17 ++++++----------- drivers/mmc/host/sdhci.h | 1 + include/linux/mmc/sdhci.h | 2 -- 19 files changed, 36 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index aca84a682551..323e2a688563 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -102,12 +102,14 @@ static void sdhci_acpi_int_hw_reset(struct sdhci_host *host) } static const struct sdhci_ops sdhci_acpi_ops_dflt = { + .set_clock = sdhci_set_clock, .enable_dma = sdhci_acpi_enable_dma, .set_bus_width = sdhci_set_bus_width, .reset = sdhci_reset, }; static const struct sdhci_ops sdhci_acpi_ops_int = { + .set_clock = sdhci_set_clock, .enable_dma = sdhci_acpi_enable_dma, .set_bus_width = sdhci_set_bus_width, .reset = sdhci_reset, diff --git a/drivers/mmc/host/sdhci-bcm-kona.c b/drivers/mmc/host/sdhci-bcm-kona.c index 7b97bfab910d..e610811c09b0 100644 --- a/drivers/mmc/host/sdhci-bcm-kona.c +++ b/drivers/mmc/host/sdhci-bcm-kona.c @@ -206,6 +206,7 @@ static void sdhci_bcm_kona_init_74_clocks(struct sdhci_host *host, } static struct sdhci_ops sdhci_bcm_kona_ops = { + .set_clock = sdhci_set_clock, .get_max_clock = sdhci_bcm_kona_get_max_clk, .get_timeout_clock = sdhci_bcm_kona_get_timeout_clock, .platform_send_init_74_clocks = sdhci_bcm_kona_init_74_clocks, diff --git a/drivers/mmc/host/sdhci-bcm2835.c b/drivers/mmc/host/sdhci-bcm2835.c index 289b1c80d5fc..74906d6008e1 100644 --- a/drivers/mmc/host/sdhci-bcm2835.c +++ b/drivers/mmc/host/sdhci-bcm2835.c @@ -131,6 +131,7 @@ static const struct sdhci_ops bcm2835_sdhci_ops = { .read_l = bcm2835_sdhci_readl, .read_w = bcm2835_sdhci_readw, .read_b = bcm2835_sdhci_readb, + .set_clock = sdhci_set_clock, .get_max_clock = sdhci_pltfm_clk_get_max_clock, .get_min_clock = bcm2835_sdhci_get_min_clock, .set_bus_width = sdhci_set_bus_width, diff --git a/drivers/mmc/host/sdhci-cns3xxx.c b/drivers/mmc/host/sdhci-cns3xxx.c index 416f4a4c2e35..587d73ef33ff 100644 --- a/drivers/mmc/host/sdhci-cns3xxx.c +++ b/drivers/mmc/host/sdhci-cns3xxx.c @@ -89,8 +89,7 @@ static const struct sdhci_pltfm_data sdhci_cns3xxx_pdata = { SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK | SDHCI_QUIRK_INVERTED_WRITE_PROTECT | SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN | - SDHCI_QUIRK_BROKEN_TIMEOUT_VAL | - SDHCI_QUIRK_NONSTANDARD_CLOCK, + SDHCI_QUIRK_BROKEN_TIMEOUT_VAL, }; static int sdhci_cns3xxx_probe(struct platform_device *pdev) diff --git a/drivers/mmc/host/sdhci-dove.c b/drivers/mmc/host/sdhci-dove.c index 1408cc11d881..8ef4ab52f8e0 100644 --- a/drivers/mmc/host/sdhci-dove.c +++ b/drivers/mmc/host/sdhci-dove.c @@ -86,6 +86,7 @@ static u32 sdhci_dove_readl(struct sdhci_host *host, int reg) static const struct sdhci_ops sdhci_dove_ops = { .read_w = sdhci_dove_readw, .read_l = sdhci_dove_readl, + .set_clock = sdhci_set_clock, .set_bus_width = sdhci_set_bus_width, .reset = sdhci_reset, }; diff --git a/drivers/mmc/host/sdhci-esdhc.h b/drivers/mmc/host/sdhci-esdhc.h index de69bddc3afc..3497cfaf683c 100644 --- a/drivers/mmc/host/sdhci-esdhc.h +++ b/drivers/mmc/host/sdhci-esdhc.h @@ -20,7 +20,6 @@ #define ESDHC_DEFAULT_QUIRKS (SDHCI_QUIRK_FORCE_BLK_SZ_2048 | \ SDHCI_QUIRK_NO_BUSY_IRQ | \ - SDHCI_QUIRK_NONSTANDARD_CLOCK | \ SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK | \ SDHCI_QUIRK_PIO_NEEDS_DELAY) diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index faef21740584..f0ee594f25d1 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -52,6 +52,7 @@ static unsigned int sdhci_arasan_get_timeout_clock(struct sdhci_host *host) } static struct sdhci_ops sdhci_arasan_ops = { + .set_clock = sdhci_set_clock, .get_max_clock = sdhci_pltfm_clk_get_max_clock, .get_timeout_clock = sdhci_arasan_get_timeout_clock, .set_bus_width = sdhci_set_bus_width, diff --git a/drivers/mmc/host/sdhci-of-hlwd.c b/drivers/mmc/host/sdhci-of-hlwd.c index fb01958cb18e..a4a1f0f2c0a0 100644 --- a/drivers/mmc/host/sdhci-of-hlwd.c +++ b/drivers/mmc/host/sdhci-of-hlwd.c @@ -58,6 +58,7 @@ static const struct sdhci_ops sdhci_hlwd_ops = { .write_l = sdhci_hlwd_writel, .write_w = sdhci_hlwd_writew, .write_b = sdhci_hlwd_writeb, + .set_clock = sdhci_set_clock, .set_bus_width = sdhci_set_bus_width, .reset = sdhci_reset, }; diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index 87f9dd91f68c..b3a28f6b170e 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -1078,6 +1078,7 @@ static void sdhci_pci_hw_reset(struct sdhci_host *host) } static const struct sdhci_ops sdhci_pci_ops = { + .set_clock = sdhci_set_clock, .enable_dma = sdhci_pci_enable_dma, .set_bus_width = sdhci_pci_set_bus_width, .reset = sdhci_reset, diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c index bfbf467b61c7..1fb89f44bd58 100644 --- a/drivers/mmc/host/sdhci-pltfm.c +++ b/drivers/mmc/host/sdhci-pltfm.c @@ -45,6 +45,7 @@ unsigned int sdhci_pltfm_clk_get_max_clock(struct sdhci_host *host) EXPORT_SYMBOL_GPL(sdhci_pltfm_clk_get_max_clock); static const struct sdhci_ops sdhci_pltfm_ops = { + .set_clock = sdhci_set_clock, .set_bus_width = sdhci_set_bus_width, .reset = sdhci_reset, }; diff --git a/drivers/mmc/host/sdhci-pxav2.c b/drivers/mmc/host/sdhci-pxav2.c index 2eee0c8b88eb..db5257bf032e 100644 --- a/drivers/mmc/host/sdhci-pxav2.c +++ b/drivers/mmc/host/sdhci-pxav2.c @@ -112,6 +112,7 @@ static void pxav2_mmc_set_bus_width(struct sdhci_host *host, int width) } static const struct sdhci_ops pxav2_sdhci_ops = { + .set_clock = sdhci_set_clock, .get_max_clock = sdhci_pltfm_clk_get_max_clock, .set_bus_width = pxav2_mmc_set_bus_width, .reset = pxav2_reset, diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c index 86564233ae93..8a40e079a57e 100644 --- a/drivers/mmc/host/sdhci-pxav3.c +++ b/drivers/mmc/host/sdhci-pxav3.c @@ -225,6 +225,7 @@ static int pxav3_set_uhs_signaling(struct sdhci_host *host, unsigned int uhs) } static const struct sdhci_ops pxav3_sdhci_ops = { + .set_clock = sdhci_set_clock, .set_uhs_signaling = pxav3_set_uhs_signaling, .platform_send_init_74_clocks = pxav3_gen_init_74_clocks, .get_max_clock = sdhci_pltfm_clk_get_max_clock, diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c index 9d710b748b9c..9e6f1c52982c 100644 --- a/drivers/mmc/host/sdhci-s3c.c +++ b/drivers/mmc/host/sdhci-s3c.c @@ -55,6 +55,8 @@ struct sdhci_s3c { struct clk *clk_io; struct clk *clk_bus[MAX_BUS_CLK]; unsigned long clk_rates[MAX_BUS_CLK]; + + bool no_divider; }; /** @@ -67,6 +69,7 @@ struct sdhci_s3c { */ struct sdhci_s3c_drv_data { unsigned int sdhci_quirks; + bool no_divider; }; static inline struct sdhci_s3c *to_s3c(struct sdhci_host *host) @@ -116,7 +119,7 @@ static unsigned int sdhci_s3c_consider_clock(struct sdhci_s3c *ourhost, * If controller uses a non-standard clock division, find the best clock * speed possible with selected clock source and skip the division. */ - if (ourhost->host->quirks & SDHCI_QUIRK_NONSTANDARD_CLOCK) { + if (ourhost->no_divider) { rate = clk_round_rate(clksrc, wanted); return wanted - rate; } @@ -161,8 +164,10 @@ static void sdhci_s3c_set_clock(struct sdhci_host *host, unsigned int clock) host->mmc->actual_clock = 0; /* don't bother if the clock is going off. */ - if (clock == 0) + if (clock == 0) { + sdhci_set_clock(host, clock); return; + } for (src = 0; src < MAX_BUS_CLK; src++) { delta = sdhci_s3c_consider_clock(ourhost, src, clock); @@ -214,6 +219,8 @@ static void sdhci_s3c_set_clock(struct sdhci_host *host, unsigned int clock) if (clock < 25 * 1000000) ctrl |= (S3C_SDHCI_CTRL3_FCSEL3 | S3C_SDHCI_CTRL3_FCSEL2); writel(ctrl, host->ioaddr + S3C_SDHCI_CONTROL3); + + sdhci_set_clock(host, clock); } /** @@ -603,8 +610,10 @@ static int sdhci_s3c_probe(struct platform_device *pdev) /* Setup quirks for the controller */ host->quirks |= SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC; host->quirks |= SDHCI_QUIRK_NO_HISPD_BIT; - if (drv_data) + if (drv_data) { host->quirks |= drv_data->sdhci_quirks; + sc->no_divider = drv_data->no_divider; + } #ifndef CONFIG_MMC_SDHCI_S3C_DMA @@ -653,7 +662,7 @@ static int sdhci_s3c_probe(struct platform_device *pdev) * If controller does not have internal clock divider, * we can use overriding functions instead of default. */ - if (host->quirks & SDHCI_QUIRK_NONSTANDARD_CLOCK) { + if (sc->no_divider) { sdhci_s3c_ops.set_clock = sdhci_cmu_set_clock; sdhci_s3c_ops.get_min_clock = sdhci_cmu_get_min_clock; sdhci_s3c_ops.get_max_clock = sdhci_cmu_get_max_clock; @@ -794,7 +803,7 @@ static const struct dev_pm_ops sdhci_s3c_pmops = { #if defined(CONFIG_CPU_EXYNOS4210) || defined(CONFIG_SOC_EXYNOS4212) static struct sdhci_s3c_drv_data exynos4_sdhci_drv_data = { - .sdhci_quirks = SDHCI_QUIRK_NONSTANDARD_CLOCK, + .no_divider = true, }; #define EXYNOS4_SDHCI_DRV_DATA ((kernel_ulong_t)&exynos4_sdhci_drv_data) #else diff --git a/drivers/mmc/host/sdhci-sirf.c b/drivers/mmc/host/sdhci-sirf.c index 5d79e10e1ba2..3b775348b470 100644 --- a/drivers/mmc/host/sdhci-sirf.c +++ b/drivers/mmc/host/sdhci-sirf.c @@ -28,6 +28,7 @@ static unsigned int sdhci_sirf_get_max_clk(struct sdhci_host *host) } static struct sdhci_ops sdhci_sirf_ops = { + .set_clock = sdhci_set_clock, .get_max_clock = sdhci_sirf_get_max_clk, .set_bus_width = sdhci_set_bus_width, .reset = sdhci_reset, diff --git a/drivers/mmc/host/sdhci-spear.c b/drivers/mmc/host/sdhci-spear.c index c2a2bedc8813..8bf64ab36720 100644 --- a/drivers/mmc/host/sdhci-spear.c +++ b/drivers/mmc/host/sdhci-spear.c @@ -38,6 +38,7 @@ struct spear_sdhci { /* sdhci ops */ static const struct sdhci_ops sdhci_pltfm_ops = { + .set_clock = sdhci_set_clock, .set_bus_width = sdhci_set_bus_width, .reset = sdhci_reset, }; diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index 7754c0319fda..a0a8b5cc3b0c 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -153,6 +153,7 @@ static const struct sdhci_ops tegra_sdhci_ops = { .read_l = tegra_sdhci_readl, .read_w = tegra_sdhci_readw, .write_l = tegra_sdhci_writel, + .set_clock = sdhci_set_clock, .set_bus_width = tegra_sdhci_set_bus_width, .reset = tegra_sdhci_reset, }; diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index d9b91fc17bb0..69e58d071b33 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1112,19 +1112,13 @@ static u16 sdhci_get_preset_value(struct sdhci_host *host) return preset; } -static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock) +void sdhci_set_clock(struct sdhci_host *host, unsigned int clock) { int div = 0; /* Initialized for compiler warning */ int real_div = div, clk_mul = 1; u16 clk = 0; unsigned long timeout; - if (host->ops->set_clock) { - host->ops->set_clock(host, clock); - if (host->quirks & SDHCI_QUIRK_NONSTANDARD_CLOCK) - return; - } - host->mmc->actual_clock = 0; sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL); @@ -1221,6 +1215,7 @@ clock_set: clk |= SDHCI_CLOCK_CARD_EN; sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL); } +EXPORT_SYMBOL_GPL(sdhci_set_clock); static int sdhci_set_power(struct sdhci_host *host, unsigned short power) { @@ -1439,7 +1434,7 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios) sdhci_enable_preset_value(host, false); if (!ios->clock || ios->clock != host->clock) { - sdhci_set_clock(host, ios->clock); + host->ops->set_clock(host, ios->clock); host->clock = ios->clock; } @@ -1510,7 +1505,7 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios) sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL); /* Re-enable SD Clock */ - sdhci_set_clock(host, host->clock); + host->ops->set_clock(host, host->clock); } @@ -1555,7 +1550,7 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios) } /* Re-enable SD Clock */ - sdhci_set_clock(host, host->clock); + host->ops->set_clock(host, host->clock); } else sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL); @@ -2129,7 +2124,7 @@ static void sdhci_tasklet_finish(unsigned long param) /* Some controllers need this kick or reset won't work here */ if (host->quirks & SDHCI_QUIRK_CLOCK_BEFORE_RESET) /* This is to force an update */ - sdhci_set_clock(host, host->clock); + host->ops->set_clock(host, host->clock); /* Spec says we should do both at the same time, but Ricoh controllers do not like that. */ diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 7d84cb3b0e00..ac20195f667b 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -400,6 +400,7 @@ static inline bool sdhci_sdio_irq_enabled(struct sdhci_host *host) return !!(host->flags & SDHCI_SDIO_IRQ_ENABLED); } +void sdhci_set_clock(struct sdhci_host *host, unsigned int clock); void sdhci_set_bus_width(struct sdhci_host *host, int width); void sdhci_reset(struct sdhci_host *host, u8 mask); diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index 02919ef99419..72a90baf111f 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -57,8 +57,6 @@ struct sdhci_host { #define SDHCI_QUIRK_BROKEN_CARD_DETECTION (1<<15) /* Controller reports inverted write-protect state */ #define SDHCI_QUIRK_INVERTED_WRITE_PROTECT (1<<16) -/* Controller has nonstandard clock management */ -#define SDHCI_QUIRK_NONSTANDARD_CLOCK (1<<17) /* Controller does not like fast PIO transfers */ #define SDHCI_QUIRK_PIO_NEEDS_DELAY (1<<18) /* Controller has to be forced to use block size of 2048 bytes */ -- cgit v1.2.3-71-gd317 From d975f121011a58223c7936ab483c3374a83236c3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 25 Apr 2014 12:59:31 +0100 Subject: mmc: sdhci: cache timing information locally Rather than reading back the timing information from the registers, cache it locally. This allows implementations to translate the UHS timing by overriding the set_uhs_signaling() method as required without also having to emulate the SDHCI_HOST_CONTROL2 register. Signed-off-by: Russell King Tested-by: Markus Pargmann Tested-by: Stephen Warren [Ulf Hansson] Resolved conflict Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci.c | 23 ++++++++++++----------- include/linux/mmc/sdhci.h | 2 ++ 2 files changed, 14 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 0073aae0adcb..956799c75df2 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1083,24 +1083,23 @@ static void sdhci_finish_command(struct sdhci_host *host) static u16 sdhci_get_preset_value(struct sdhci_host *host) { - u16 ctrl, preset = 0; + u16 preset = 0; - ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2); - - switch (ctrl & SDHCI_CTRL_UHS_MASK) { - case SDHCI_CTRL_UHS_SDR12: + switch (host->timing) { + case MMC_TIMING_UHS_SDR12: preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR12); break; - case SDHCI_CTRL_UHS_SDR25: + case MMC_TIMING_UHS_SDR25: preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR25); break; - case SDHCI_CTRL_UHS_SDR50: + case MMC_TIMING_UHS_SDR50: preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR50); break; - case SDHCI_CTRL_UHS_SDR104: + case MMC_TIMING_UHS_SDR104: + case MMC_TIMING_MMC_HS200: preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR104); break; - case SDHCI_CTRL_UHS_DDR50: + case MMC_TIMING_UHS_DDR50: preset = sdhci_readw(host, SDHCI_PRESET_FOR_DDR50); break; default: @@ -1538,6 +1537,7 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios) sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL); host->ops->set_uhs_signaling(host, ios->timing); + host->timing = ios->timing; if (!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) && ((ios->timing == MMC_TIMING_UHS_SDR12) || @@ -1842,12 +1842,13 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode) * If the Host Controller supports the HS200 mode then the * tuning function has to be executed. */ - if (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR50) && + if (host->timing == MMC_TIMING_UHS_SDR50 && (host->flags & SDHCI_SDR50_NEEDS_TUNING || host->flags & SDHCI_SDR104_NEEDS_TUNING)) requires_tuning_nonuhs = true; - if (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR104) || + if (host->timing == MMC_TIMING_MMC_HS200 || + host->timing == MMC_TIMING_UHS_SDR104 || requires_tuning_nonuhs) ctrl |= SDHCI_CTRL_EXEC_TUNING; else { diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index 72a90baf111f..7f3efbab8732 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -172,6 +172,8 @@ struct sdhci_host { unsigned int ocr_avail_mmc; u32 ocr_mask; /* available voltages */ + unsigned timing; /* Current timing */ + u32 thread_isr; /* cached registers */ -- cgit v1.2.3-71-gd317 From da91a8f9c0f56d75b35bfe2e2456187ab55b3639 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 25 Apr 2014 13:00:12 +0100 Subject: mmc: sdhci: track whether preset mode is currently enabled in hardware Track whether preset mode is currently enabled in hardware, and use that when making decisions elsewhere in the code rather than reading the register and checking the bit. Signed-off-by: Russell King Tested-by: Markus Pargmann Tested-by: Stephen Warren Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci.c | 44 ++++++++++++++++++++++++++------------------ include/linux/mmc/sdhci.h | 1 + 2 files changed, 27 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index effd9e5d1d81..447eef8217c7 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -205,9 +205,14 @@ static void sdhci_do_reset(struct sdhci_host *host, u8 mask) host->ops->reset(host, mask); - if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) { - if ((host->ops->enable_dma) && (mask & SDHCI_RESET_ALL)) - host->ops->enable_dma(host); + if (mask & SDHCI_RESET_ALL) { + if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) { + if (host->ops->enable_dma) + host->ops->enable_dma(host); + } + + /* Resetting the controller clears many */ + host->preset_enabled = false; } } @@ -1126,8 +1131,7 @@ void sdhci_set_clock(struct sdhci_host *host, unsigned int clock) return; if (host->version >= SDHCI_SPEC_300) { - if (sdhci_readw(host, SDHCI_HOST_CONTROL2) & - SDHCI_CTRL_PRESET_VAL_ENABLE) { + if (host->preset_enabled) { u16 pre_val; clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL); @@ -1493,13 +1497,13 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios) (ios->timing == MMC_TIMING_UHS_SDR25)) ctrl |= SDHCI_CTRL_HISPD; - ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2); - if (!(ctrl_2 & SDHCI_CTRL_PRESET_VAL_ENABLE)) { + if (!host->preset_enabled) { sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL); /* * We only need to set Driver Strength if the * preset value enable is not set. */ + ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2); ctrl_2 &= ~SDHCI_CTRL_DRV_TYPE_MASK; if (ios->drv_type == MMC_SET_DRIVER_TYPE_A) ctrl_2 |= SDHCI_CTRL_DRV_TYPE_A; @@ -2018,26 +2022,30 @@ out: static void sdhci_enable_preset_value(struct sdhci_host *host, bool enable) { - u16 ctrl; - /* Host Controller v3.00 defines preset value registers */ if (host->version < SDHCI_SPEC_300) return; - ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2); - /* * We only enable or disable Preset Value if they are not already * enabled or disabled respectively. Otherwise, we bail out. */ - if (enable && !(ctrl & SDHCI_CTRL_PRESET_VAL_ENABLE)) { - ctrl |= SDHCI_CTRL_PRESET_VAL_ENABLE; - sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2); - host->flags |= SDHCI_PV_ENABLED; - } else if (!enable && (ctrl & SDHCI_CTRL_PRESET_VAL_ENABLE)) { - ctrl &= ~SDHCI_CTRL_PRESET_VAL_ENABLE; + if (host->preset_enabled != enable) { + u16 ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2); + + if (enable) + ctrl |= SDHCI_CTRL_PRESET_VAL_ENABLE; + else + ctrl &= ~SDHCI_CTRL_PRESET_VAL_ENABLE; + sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2); - host->flags &= ~SDHCI_PV_ENABLED; + + if (enable) + host->flags |= SDHCI_PV_ENABLED; + else + host->flags &= ~SDHCI_PV_ENABLED; + + host->preset_enabled = enable; } } diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index 7f3efbab8732..08abe9941884 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -143,6 +143,7 @@ struct sdhci_host { bool runtime_suspended; /* Host is runtime suspended */ bool bus_on; /* Bus power prevents runtime suspend */ + bool preset_enabled; /* Preset is enabled */ struct mmc_request *mrq; /* Current request */ struct mmc_command *cmd; /* Current command */ -- cgit v1.2.3-71-gd317 From ee526d515ad12e9fee2d2dbfc7f626c0a5c7f417 Mon Sep 17 00:00:00 2001 From: Balaji T K Date: Fri, 9 May 2014 22:16:53 +0530 Subject: mmc: omap_hsmmc: split omap-dma header file moving dmaengine consumer specific function to omap-dmaengine.h to Resolve build failure seen with sh-allmodconfig: include/linux/omap-dma.h:171:8: error: expected identifier before numeric constant make[4]: *** [drivers/mmc/host/omap_hsmmc.o] Error 1 Cc: Russell King - ARM Linux Cc: Tony Lindgren Signed-off-by: Balaji T K Acked-by: Tony Lindgren Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- drivers/mmc/host/omap_hsmmc.c | 2 +- include/linux/omap-dma.h | 19 +------------------ include/linux/omap-dmaengine.h | 21 +++++++++++++++++++++ 3 files changed, 23 insertions(+), 19 deletions(-) create mode 100644 include/linux/omap-dmaengine.h (limited to 'include') diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index cba71d69a79c..6b7b75585926 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/include/linux/omap-dma.h b/include/linux/omap-dma.h index 41a13e70f41f..999f52d3d1e7 100644 --- a/include/linux/omap-dma.h +++ b/include/linux/omap-dma.h @@ -1,23 +1,6 @@ -/* - * OMAP DMA Engine support - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ #ifndef __LINUX_OMAP_DMA_H #define __LINUX_OMAP_DMA_H - -struct dma_chan; - -#if defined(CONFIG_DMA_OMAP) || defined(CONFIG_DMA_OMAP_MODULE) -bool omap_dma_filter_fn(struct dma_chan *, void *); -#else -static inline bool omap_dma_filter_fn(struct dma_chan *c, void *d) -{ - return false; -} -#endif +#include /* * Legacy OMAP DMA handling defines and functions diff --git a/include/linux/omap-dmaengine.h b/include/linux/omap-dmaengine.h new file mode 100644 index 000000000000..2b0b6aa01922 --- /dev/null +++ b/include/linux/omap-dmaengine.h @@ -0,0 +1,21 @@ +/* + * OMAP DMA Engine support + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __LINUX_OMAP_DMAENGINE_H +#define __LINUX_OMAP_DMAENGINE_H + +struct dma_chan; + +#if defined(CONFIG_DMA_OMAP) || defined(CONFIG_DMA_OMAP_MODULE) +bool omap_dma_filter_fn(struct dma_chan *, void *); +#else +static inline bool omap_dma_filter_fn(struct dma_chan *c, void *d) +{ + return false; +} +#endif +#endif /* __LINUX_OMAP_DMAENGINE_H */ -- cgit v1.2.3-71-gd317 From 73e4354444eef5251e5cdfd388ab02ef9f2e727e Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 22 May 2014 16:42:41 +0800 Subject: workqueue: declare system_highpri_wq system_highpri_wq is exported to modules via EXPORT_SYMBOL_GPL(), but it was forgotten to be declared in workqueue.h. So we add the declaration and a short description for it. tj: Minor comment tweak. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index d93d28b2ec73..b263b29bd98b 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -340,6 +340,9 @@ enum { * short queue flush time. Don't queue works which can run for too * long. * + * system_highpri_wq is similar to system_wq but for work items which + * require WQ_HIGHPRI. + * * system_long_wq is similar to system_wq but may host long running * works. Queue flushing might take relatively long. * @@ -358,6 +361,7 @@ enum { * 'wq_power_efficient' is disabled. See WQ_POWER_EFFICIENT for more info. */ extern struct workqueue_struct *system_wq; +extern struct workqueue_struct *system_highpri_wq; extern struct workqueue_struct *system_long_wq; extern struct workqueue_struct *system_unbound_wq; extern struct workqueue_struct *system_freezable_wq; -- cgit v1.2.3-71-gd317 From 79bc251f0e0aea67bc230c530f7fa57f66f9cdf3 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 22 May 2014 16:43:44 +0800 Subject: workqueue: remove unused WORK_CPU_END WORK_CPU_END is totally unused since 4e8b22bd1a37 ("workqueue: fix pool ID allocation leakage and remove BUILD_BUG_ON() in init_workqueues"). It should be removed. After it is removed, the comment "special cpu IDs" is not precise due to there is only one special CPU ID (WORK_CPU_UNBOUND) left, so we also change this comment to the description for WORK_CPU_UNBOUND. tj: Minor description and comment tweaks. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index b263b29bd98b..b8aee9453f22 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -56,9 +56,8 @@ enum { WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS) - 1, WORK_NO_COLOR = WORK_NR_COLORS, - /* special cpu IDs */ + /* not bound to any CPU, prefer the local CPU */ WORK_CPU_UNBOUND = NR_CPUS, - WORK_CPU_END = NR_CPUS + 1, /* * Reserve 7 bits off of pwq pointer w/ debugobjects turned off. -- cgit v1.2.3-71-gd317 From cafebac153ae54fd0aba5d4ad28af995532c5375 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 22 May 2014 16:43:56 +0800 Subject: workqueue: remove unused work_clear_pending() In 8930caba3dbd ("workqueue: disable irq while manipulating PENDING"), setting last CPU and clearing PENDING got merged into a single operation (set_work_cpu_and_clear_pending()), which resulted that the internal routine work_clear_pending() is not used any more. tj: Minor description tweak. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index b8aee9453f22..a0cc2e95ed1b 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -273,13 +273,6 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } #define delayed_work_pending(w) \ work_pending(&(w)->work) -/** - * work_clear_pending - for internal use only, mark a work item as not pending - * @work: The work item in question - */ -#define work_clear_pending(work) \ - clear_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)) - /* * Workqueue flags and constants. For details, please refer to * Documentation/workqueue.txt. -- cgit v1.2.3-71-gd317 From ca8a22634381537c92b5a10308652e1c38fd9edf Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Thu, 22 May 2014 10:41:08 -0400 Subject: tcp: make cwnd-limited checks measurement-based, and gentler Experience with the recent e114a710aa50 ("tcp: fix cwnd limited checking to improve congestion control") has shown that there are common cases where that commit can cause cwnd to be much larger than necessary. This leads to TSO autosizing cooking skbs that are too large, among other things. The main problems seemed to be: (1) That commit attempted to predict the future behavior of the connection by looking at the write queue (if TSO or TSQ limit sending). That prediction sometimes overestimated future outstanding packets. (2) That commit always allowed cwnd to grow to twice the number of outstanding packets (even in congestion avoidance, where this is not needed). This commit improves both of these, by: (1) Switching to a measurement-based approach where we explicitly track the largest number of packets in flight during the past window ("max_packets_out"), and remember whether we were cwnd-limited at the moment we finished sending that flight. (2) Only allowing cwnd to grow to twice the number of outstanding packets ("max_packets_out") in slow start. In congestion avoidance mode we now only allow cwnd to grow if it was fully utilized. Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 6 ++++-- include/net/tcp.h | 11 ++++++++--- net/ipv4/tcp_output.c | 37 +++++++++++++++++++++++-------------- 3 files changed, 35 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index bc35e4709e8e..a0513210798f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -197,7 +197,8 @@ struct tcp_sock { u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ syn_data:1, /* SYN includes data */ syn_fastopen:1, /* SYN includes Fast Open option */ - syn_data_acked:1;/* data in SYN is acked by SYN-ACK */ + syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ + is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ /* RTT measurement */ @@ -209,6 +210,8 @@ struct tcp_sock { u32 packets_out; /* Packets which are "in flight" */ u32 retrans_out; /* Retransmitted packets out */ + u32 max_packets_out; /* max packets_out in last window */ + u32 max_packets_seq; /* right edge of max_packets_out flight */ u16 urg_data; /* Saved octet of OOB data and control flags */ u8 ecn_flags; /* ECN status bits. */ @@ -230,7 +233,6 @@ struct tcp_sock { u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ u32 snd_cwnd_used; u32 snd_cwnd_stamp; - u32 lsnd_pending; /* packets inflight or unsent since last xmit */ u32 prior_cwnd; /* Congestion window at start of Recovery. */ u32 prr_delivered; /* Number of newly delivered packets to * receiver in Recovery. */ diff --git a/include/net/tcp.h b/include/net/tcp.h index f5d6ca4a9d28..e80abe4486cb 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -971,8 +971,9 @@ static inline u32 tcp_wnd_end(const struct tcp_sock *tp) /* We follow the spirit of RFC2861 to validate cwnd but implement a more * flexible approach. The RFC suggests cwnd should not be raised unless - * it was fully used previously. But we allow cwnd to grow as long as the - * application has used half the cwnd. + * it was fully used previously. And that's exactly what we do in + * congestion avoidance mode. But in slow start we allow cwnd to grow + * as long as the application has used half the cwnd. * Example : * cwnd is 10 (IW10), but application sends 9 frames. * We allow cwnd to reach 18 when all frames are ACKed. @@ -985,7 +986,11 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); - return tp->snd_cwnd < 2 * tp->lsnd_pending; + /* If in slow start, ensure cwnd grows to twice what was ACKed. */ + if (tp->snd_cwnd <= tp->snd_ssthresh) + return tp->snd_cwnd < 2 * tp->max_packets_out; + + return tp->is_cwnd_limited; } static inline void tcp_check_probe_timer(struct sock *sk) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3d61c52bdf79..d463c35db33d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1402,11 +1402,19 @@ static void tcp_cwnd_application_limited(struct sock *sk) tp->snd_cwnd_stamp = tcp_time_stamp; } -static void tcp_cwnd_validate(struct sock *sk, u32 unsent_segs) +static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) { struct tcp_sock *tp = tcp_sk(sk); - tp->lsnd_pending = tp->packets_out + unsent_segs; + /* Track the maximum number of outstanding packets in each + * window, and remember whether we were cwnd-limited then. + */ + if (!before(tp->snd_una, tp->max_packets_seq) || + tp->packets_out > tp->max_packets_out) { + tp->max_packets_out = tp->packets_out; + tp->max_packets_seq = tp->snd_nxt; + tp->is_cwnd_limited = is_cwnd_limited; + } if (tcp_is_cwnd_limited(sk)) { /* Network is feed fully. */ @@ -1660,7 +1668,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, * * This algorithm is from John Heffner. */ -static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) +static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, + bool *is_cwnd_limited) { struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); @@ -1724,6 +1733,9 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) if (!tp->tso_deferred) tp->tso_deferred = 1 | (jiffies << 1); + if (cong_win < send_win && cong_win < skb->len) + *is_cwnd_limited = true; + return true; send_now: @@ -1881,9 +1893,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - unsigned int tso_segs, sent_pkts, unsent_segs = 0; + unsigned int tso_segs, sent_pkts; int cwnd_quota; int result; + bool is_cwnd_limited = false; sent_pkts = 0; @@ -1908,6 +1921,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, cwnd_quota = tcp_cwnd_test(tp, skb); if (!cwnd_quota) { + is_cwnd_limited = true; if (push_one == 2) /* Force out a loss probe pkt. */ cwnd_quota = 1; @@ -1924,8 +1938,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, nonagle : TCP_NAGLE_PUSH)))) break; } else { - if (!push_one && tcp_tso_should_defer(sk, skb)) - goto compute_unsent_segs; + if (!push_one && + tcp_tso_should_defer(sk, skb, &is_cwnd_limited)) + break; } /* TCP Small Queues : @@ -1950,14 +1965,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, * there is no smp_mb__after_set_bit() yet */ smp_mb__after_clear_bit(); - if (atomic_read(&sk->sk_wmem_alloc) > limit) { - u32 unsent_bytes; - -compute_unsent_segs: - unsent_bytes = tp->write_seq - tp->snd_nxt; - unsent_segs = DIV_ROUND_UP(unsent_bytes, mss_now); + if (atomic_read(&sk->sk_wmem_alloc) > limit) break; - } } limit = mss_now; @@ -1997,7 +2006,7 @@ repair: /* Send one loss probe per tail loss episode. */ if (push_one != 2) tcp_schedule_loss_probe(sk); - tcp_cwnd_validate(sk, unsent_segs); + tcp_cwnd_validate(sk, is_cwnd_limited); return false; } return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk)); -- cgit v1.2.3-71-gd317 From e876f208af18b074f800656e4d1b99da75b2135f Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Mon, 19 May 2014 13:59:52 -0300 Subject: net: Add a software TSO helper API Although the implementation probably needs a lot of work, this initial API allows to implement software TSO in mvneta and mv643xx_eth drivers in a not so intrusive way. Signed-off-by: Ezequiel Garcia Signed-off-by: David S. Miller --- include/net/tso.h | 20 ++++++++++++++++ net/core/Makefile | 2 +- net/core/tso.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 include/net/tso.h create mode 100644 net/core/tso.c (limited to 'include') diff --git a/include/net/tso.h b/include/net/tso.h new file mode 100644 index 000000000000..47e5444f7d15 --- /dev/null +++ b/include/net/tso.h @@ -0,0 +1,20 @@ +#ifndef _TSO_H +#define _TSO_H + +#include + +struct tso_t { + int next_frag_idx; + void *data; + size_t size; + u16 ip_id; + u32 tcp_seq; +}; + +int tso_count_descs(struct sk_buff *skb); +void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso, + int size, bool is_last); +void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size); +void tso_start(struct sk_buff *skb, struct tso_t *tso); + +#endif /* _TSO_H */ diff --git a/net/core/Makefile b/net/core/Makefile index 826b925aa453..71093d94ad2b 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ - sock_diag.o dev_ioctl.o + sock_diag.o dev_ioctl.o tso.o obj-$(CONFIG_XFRM) += flow.o obj-y += net-sysfs.o diff --git a/net/core/tso.c b/net/core/tso.c new file mode 100644 index 000000000000..097821dd3a8c --- /dev/null +++ b/net/core/tso.c @@ -0,0 +1,72 @@ +#include +#include + +/* Calculate expected number of TX descriptors */ +int tso_count_descs(struct sk_buff *skb) +{ + /* The Marvell Way */ + return skb_shinfo(skb)->gso_segs * 2 + skb_shinfo(skb)->nr_frags; +} + +void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso, + int size, bool is_last) +{ + struct iphdr *iph; + struct tcphdr *tcph; + int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + int mac_hdr_len = skb_network_offset(skb); + + memcpy(hdr, skb->data, hdr_len); + iph = (struct iphdr *)(hdr + mac_hdr_len); + iph->id = htons(tso->ip_id); + iph->tot_len = htons(size + hdr_len - mac_hdr_len); + tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb)); + tcph->seq = htonl(tso->tcp_seq); + tso->ip_id++; + + if (!is_last) { + /* Clear all special flags for not last packet */ + tcph->psh = 0; + tcph->fin = 0; + tcph->rst = 0; + } +} + +void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size) +{ + tso->tcp_seq += size; + tso->size -= size; + tso->data += size; + + if ((tso->size == 0) && + (tso->next_frag_idx < skb_shinfo(skb)->nr_frags)) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx]; + + /* Move to next segment */ + tso->size = frag->size; + tso->data = page_address(frag->page.p) + frag->page_offset; + tso->next_frag_idx++; + } +} + +void tso_start(struct sk_buff *skb, struct tso_t *tso) +{ + int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + + tso->ip_id = ntohs(ip_hdr(skb)->id); + tso->tcp_seq = ntohl(tcp_hdr(skb)->seq); + tso->next_frag_idx = 0; + + /* Build first data */ + tso->size = skb_headlen(skb) - hdr_len; + tso->data = skb->data + hdr_len; + if ((tso->size == 0) && + (tso->next_frag_idx < skb_shinfo(skb)->nr_frags)) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx]; + + /* Move to next segment */ + tso->size = frag->size; + tso->data = page_address(frag->page.p) + frag->page_offset; + tso->next_frag_idx++; + } +} -- cgit v1.2.3-71-gd317 From da08143b85203b581f4a6461b149186b0e9592df Mon Sep 17 00:00:00 2001 From: Michal Kubeček Date: Tue, 20 May 2014 08:29:25 +0200 Subject: vlan: more careful checksum features handling When combining real_dev's features and vlan_features, simple bitwise AND is used. This doesn't work well for checksum offloading features as if one set has NETIF_F_HW_CSUM and the other NETIF_F_IP_CSUM and/or NETIF_F_IPV6_CSUM, we end up with no checksum offloading. However, from the logical point of view (how can_checksum_protocol() works), NETIF_F_HW_CSUM contains the functionality of NETIF_F_IP_CSUM and NETIF_F_IPV6_CSUM so that the result should be IP/IPV6. Add helper function netdev_intersect_features() implementing this logic and use it in vlan_dev_fix_features(). Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- include/linux/netdevice.h | 14 ++++++++++++++ net/8021q/vlan_dev.c | 4 ++-- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2dea98cbbdba..f4ad247fd324 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3153,6 +3153,20 @@ const char *netdev_drivername(const struct net_device *dev); void linkwatch_run_queue(void); +static inline netdev_features_t netdev_intersect_features(netdev_features_t f1, + netdev_features_t f2) +{ + if (f1 & NETIF_F_GEN_CSUM) + f1 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + if (f2 & NETIF_F_GEN_CSUM) + f2 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + f1 &= f2; + if (f1 & NETIF_F_GEN_CSUM) + f1 &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + + return f1; +} + static inline netdev_features_t netdev_get_wanted_features( struct net_device *dev) { diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 8f025afa29fd..4181fb71ba77 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -678,9 +678,9 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev, struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; netdev_features_t old_features = features; - features &= real_dev->vlan_features; + features = netdev_intersect_features(features, real_dev->vlan_features); features |= NETIF_F_RXCSUM; - features &= real_dev->features; + features = netdev_intersect_features(features, real_dev->features); features |= old_features & NETIF_F_SOFT_FEATURES; features |= NETIF_F_LLTX; -- cgit v1.2.3-71-gd317 From ea3429c77d4e34cb2983b90e49a5506fedf70b98 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 22 May 2014 09:47:50 -0700 Subject: of: mdio: remove of_phy_connect_fixed_link All in-tree drivers have been converted to use the new pair of functions: of_is_fixed_phy_link() plus of_phy_register_fixed_link(), we can now safely remove of_phy_connect_fixed_link. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/of/of_mdio.c | 38 -------------------------------------- include/linux/of_mdio.h | 10 ---------- 2 files changed, 48 deletions(-) (limited to 'include') diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 1def0bb5cb37..4c1e01ed16dc 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -245,44 +245,6 @@ struct phy_device *of_phy_connect(struct net_device *dev, } EXPORT_SYMBOL(of_phy_connect); -/** - * of_phy_connect_fixed_link - Parse fixed-link property and return a dummy phy - * @dev: pointer to net_device claiming the phy - * @hndlr: Link state callback for the network device - * @iface: PHY data interface type - * - * This function is a temporary stop-gap and will be removed soon. It is - * only to support the fs_enet, ucc_geth and gianfar Ethernet drivers. Do - * not call this function from new drivers. - */ -struct phy_device *of_phy_connect_fixed_link(struct net_device *dev, - void (*hndlr)(struct net_device *), - phy_interface_t iface) -{ - struct device_node *net_np; - char bus_id[MII_BUS_ID_SIZE + 3]; - struct phy_device *phy; - const __be32 *phy_id; - int sz; - - if (!dev->dev.parent) - return NULL; - - net_np = dev->dev.parent->of_node; - if (!net_np) - return NULL; - - phy_id = of_get_property(net_np, "fixed-link", &sz); - if (!phy_id || sz < sizeof(*phy_id)) - return NULL; - - sprintf(bus_id, PHY_ID_FMT, "fixed-0", be32_to_cpu(phy_id[0])); - - phy = phy_connect(dev, bus_id, hndlr, iface); - return IS_ERR(phy) ? NULL : phy; -} -EXPORT_SYMBOL(of_phy_connect_fixed_link); - /** * of_phy_attach - Attach to a PHY without starting the state machine * @dev: pointer to net_device claiming the phy diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index 0aa367e316cb..d449018d0726 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -22,9 +22,6 @@ extern struct phy_device *of_phy_connect(struct net_device *dev, struct phy_device *of_phy_attach(struct net_device *dev, struct device_node *phy_np, u32 flags, phy_interface_t iface); -extern struct phy_device *of_phy_connect_fixed_link(struct net_device *dev, - void (*hndlr)(struct net_device *), - phy_interface_t iface); extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np); @@ -59,13 +56,6 @@ static inline struct phy_device *of_phy_attach(struct net_device *dev, return NULL; } -static inline struct phy_device *of_phy_connect_fixed_link(struct net_device *dev, - void (*hndlr)(struct net_device *), - phy_interface_t iface) -{ - return NULL; -} - static inline struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np) { return NULL; -- cgit v1.2.3-71-gd317 From 16e4d93f6de7063800f3f5e68f064b0ff8fae9b7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 19 May 2014 13:40:22 -0400 Subject: NFSD: Ignore client's source port on RDMA transports An NFS/RDMA client's source port is meaningless for RDMA transports. The transport layer typically sets the source port value on the connection to a random ephemeral port. Currently, NFS server administrators must specify the "insecure" export option to enable clients to access exports via RDMA. But this means NFS clients can access such an export via IP using an ephemeral port, which may not be desirable. This patch eliminates the need to specify the "insecure" export option to allow NFS/RDMA clients access to an export. BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=250 Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 1 + net/sunrpc/svc_xprt.c | 2 +- net/sunrpc/svcsock.c | 9 +++++++++ net/sunrpc/xprtrdma/svc_rdma_transport.c | 7 +++++++ 4 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index b05963f09ebf..0cec1b94c670 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -24,6 +24,7 @@ struct svc_xprt_ops { void (*xpo_release_rqst)(struct svc_rqst *); void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); + int (*xpo_secure_port)(struct svc_rqst *); }; struct svc_xprt_class { diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 06c6ff0cb911..614956f1777e 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -793,7 +793,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) clear_bit(XPT_OLD, &xprt->xpt_flags); - rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp)); + rqstp->rq_secure = xprt->xpt_ops->xpo_secure_port(rqstp); rqstp->rq_chandle.defer = svc_defer; if (serv->sv_stats) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 43bcb4699d69..0cb34f5d58dc 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -400,6 +400,12 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, release_sock(sock->sk); #endif } + +static int svc_sock_secure_port(struct svc_rqst *rqstp) +{ + return svc_port_is_privileged(svc_addr(rqstp)); +} + /* * INET callback when data has been received on the socket. */ @@ -678,6 +684,7 @@ static struct svc_xprt_ops svc_udp_ops = { .xpo_prep_reply_hdr = svc_udp_prep_reply_hdr, .xpo_has_wspace = svc_udp_has_wspace, .xpo_accept = svc_udp_accept, + .xpo_secure_port = svc_sock_secure_port, }; static struct svc_xprt_class svc_udp_class = { @@ -1234,6 +1241,7 @@ static struct svc_xprt_ops svc_tcp_bc_ops = { .xpo_detach = svc_bc_tcp_sock_detach, .xpo_free = svc_bc_sock_free, .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr, + .xpo_secure_port = svc_sock_secure_port, }; static struct svc_xprt_class svc_tcp_bc_class = { @@ -1272,6 +1280,7 @@ static struct svc_xprt_ops svc_tcp_ops = { .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr, .xpo_has_wspace = svc_tcp_has_wspace, .xpo_accept = svc_tcp_accept, + .xpo_secure_port = svc_sock_secure_port, }; static struct svc_xprt_class svc_tcp_class = { diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 25688fa2207f..02db8d9cc994 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -65,6 +65,7 @@ static void dto_tasklet_func(unsigned long data); static void svc_rdma_detach(struct svc_xprt *xprt); static void svc_rdma_free(struct svc_xprt *xprt); static int svc_rdma_has_wspace(struct svc_xprt *xprt); +static int svc_rdma_secure_port(struct svc_rqst *); static void rq_cq_reap(struct svcxprt_rdma *xprt); static void sq_cq_reap(struct svcxprt_rdma *xprt); @@ -82,6 +83,7 @@ static struct svc_xprt_ops svc_rdma_ops = { .xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr, .xpo_has_wspace = svc_rdma_has_wspace, .xpo_accept = svc_rdma_accept, + .xpo_secure_port = svc_rdma_secure_port, }; struct svc_xprt_class svc_rdma_class = { @@ -1207,6 +1209,11 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt) return 1; } +static int svc_rdma_secure_port(struct svc_rqst *rqstp) +{ + return 1; +} + /* * Attempt to register the kvec representing the RPC memory with the * device. -- cgit v1.2.3-71-gd317 From ef11ce24875a8a540adc185e7bce3d7d49c8296f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 12 May 2014 11:22:47 +1000 Subject: SUNRPC: track whether a request is coming from a loop-back interface. If an incoming NFS request is coming from the local host, then nfsd will need to perform some special handling. So detect that possibility and make the source visible in rq_local. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 1 + include/linux/sunrpc/svc_xprt.h | 1 + net/sunrpc/sunrpc.h | 13 +++++++++++++ net/sunrpc/svcsock.c | 5 +++++ 4 files changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 04e763221246..a0dbbd1e00e9 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -254,6 +254,7 @@ struct svc_rqst { u32 rq_prot; /* IP protocol */ unsigned short rq_secure : 1; /* secure port */ + unsigned short rq_local : 1; /* local request */ void * rq_argp; /* decoded arguments */ void * rq_resp; /* xdr'd results */ diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 0cec1b94c670..7235040a19b2 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -64,6 +64,7 @@ struct svc_xprt { #define XPT_DETACHED 10 /* detached from tempsocks list */ #define XPT_LISTENER 11 /* listening endpoint */ #define XPT_CACHE_AUTH 12 /* cache auth info */ +#define XPT_LOCAL 13 /* connection from loopback interface */ struct svc_serv *xpt_server; /* service for transport */ atomic_t xpt_reserved; /* space on outq that is rsvd */ diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h index 14c9f6d1c5ff..f2b7cb540e61 100644 --- a/net/sunrpc/sunrpc.h +++ b/net/sunrpc/sunrpc.h @@ -43,6 +43,19 @@ static inline int rpc_reply_expected(struct rpc_task *task) (task->tk_msg.rpc_proc->p_decode != NULL); } +static inline int sock_is_loopback(struct sock *sk) +{ + struct dst_entry *dst; + int loopback = 0; + rcu_read_lock(); + dst = rcu_dereference(sk->sk_dst_cache); + if (dst && dst->dev && + (dst->dev->features & NETIF_F_LOOPBACK)) + loopback = 1; + rcu_read_unlock(); + return loopback; +} + int svc_send_common(struct socket *sock, struct xdr_buf *xdr, struct page *headpage, unsigned long headoffset, struct page *tailpage, unsigned long tailoffset); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 0cb34f5d58dc..f3b8eb309d01 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -874,6 +874,10 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) } svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen); + if (sock_is_loopback(newsock->sk)) + set_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags); + else + clear_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags); if (serv->sv_stats) serv->sv_stats->nettcpconn++; @@ -1119,6 +1123,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_xprt_ctxt = NULL; rqstp->rq_prot = IPPROTO_TCP; + rqstp->rq_local = !!test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags); p = (__be32 *)rqstp->rq_arg.head[0].iov_base; calldir = p[1]; -- cgit v1.2.3-71-gd317 From ecc8fb11cdb37d108d4597ba0f6bdff77c6019af Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 22 May 2014 15:55:39 +0300 Subject: net/mlx4_core: Deprecate use_prio module parameter use_prio was added as part of an infrastructure for running FCoE in A0 mode. FCoE didn't get into Mellanox Upstream driver, and when it will, it won't be using A0 steering mode. Therefore we can safely deprecate this module parameter without hurting any existing user. CC: Carol Soto Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 8 ++++---- include/linux/mlx4/device.h | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index a56f6012258d..08ff5dd9298f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -132,8 +132,7 @@ MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)"); static bool use_prio; module_param_named(use_prio, use_prio, bool, 0444); -MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports " - "(0/1, default 0)"); +MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports (deprecated)"); int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG); module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); @@ -290,7 +289,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.log_num_macs = log_num_mac; dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS; - dev->caps.log_num_prios = use_prio ? 3 : 0; for (i = 1; i <= dev->caps.num_ports; ++i) { dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE; @@ -358,7 +356,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] = (1 << dev->caps.log_num_macs) * (1 << dev->caps.log_num_vlans) * - (1 << dev->caps.log_num_prios) * dev->caps.num_ports; dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH; @@ -2775,6 +2772,9 @@ static int __init mlx4_verify_params(void) pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n", MLX4_LOG_NUM_VLANS); + if (use_prio != 0) + pr_warn("mlx4_core: use_prio - obsolete module param, ignored\n"); + if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) { pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg); return -1; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index c0468e6f0442..ca38871a585c 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -449,7 +449,6 @@ struct mlx4_caps { int reserved_qps_base[MLX4_NUM_QP_REGION]; int log_num_macs; int log_num_vlans; - int log_num_prios; enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1]; u8 supported_type[MLX4_MAX_PORTS + 1]; u8 suggested_type[MLX4_MAX_PORTS + 1]; -- cgit v1.2.3-71-gd317 From be52c9e96a6657d117bb0ec6e11438fb246af5c7 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 09:59:40 -0700 Subject: openvswitch: Avoid assigning a NULL pointer to flow actions. Flow SET can accept an empty set of actions, with the intended semantics of leaving existing actions unmodified. This seems to have been brokin after OVS 1.7, as we have assigned the flow's actions pointer to NULL in this case, but we never check for the NULL pointer later on. This patch restores the intended behavior and documents it in the include/linux/openvswitch.h. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- include/uapi/linux/openvswitch.h | 4 +++- net/openvswitch/datapath.c | 14 ++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 970553cbbc8e..0b979ee4bfc0 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -395,7 +395,9 @@ struct ovs_key_nd { * @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying * the actions to take for packets that match the key. Always present in * notifications. Required for %OVS_FLOW_CMD_NEW requests, optional for - * %OVS_FLOW_CMD_SET requests. + * %OVS_FLOW_CMD_SET requests. An %OVS_FLOW_CMD_SET without + * %OVS_FLOW_ATTR_ACTIONS will not modify the actions. To clear the actions, + * an %OVS_FLOW_ATTR_ACTIONS without any nested attributes must be given. * @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this * flow. Present in notifications if the stats would be nonzero. Ignored in * requests. diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 8867d7e2d65b..90a1e5e66287 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -810,6 +810,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_kfree; } } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { + /* OVS_FLOW_CMD_NEW must have actions. */ error = -EINVAL; goto error; } @@ -849,8 +850,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); } else { /* We found a matching flow. */ - struct sw_flow_actions *old_acts; - /* Bail out if we're not allowed to modify an existing flow. * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL * because Generic Netlink treats the latter as a dump @@ -866,11 +865,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) if (!ovs_flow_cmp_unmasked_key(flow, &match)) goto err_unlock_ovs; - /* Update actions. */ - old_acts = ovsl_dereference(flow->sf_acts); - rcu_assign_pointer(flow->sf_acts, acts); - ovs_nla_free_flow_actions(old_acts); + /* Update actions, if present. */ + if (acts) { + struct sw_flow_actions *old_acts; + old_acts = ovsl_dereference(flow->sf_acts); + rcu_assign_pointer(flow->sf_acts, acts); + ovs_nla_free_flow_actions(old_acts); + } reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); /* Clear stats. */ -- cgit v1.2.3-71-gd317 From 3c8a0922e0b774aa394a6925f11976ccfc852808 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 2 May 2014 11:05:21 +1000 Subject: drm/dp_helper: add defines for DP 1.2 and MST support. (v2) This just adds the defines from the DP 1.2 spec, which we will use later. fix some DP MST to 1.2 MST Signed-off-by: Dave Airlie Reviewed-by: Todd Previte Reviewed-by: Jingoo Han --- include/drm/drm_dp_helper.h | 78 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'include') diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index cfcacec5b89d..c8857e6159a5 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -37,6 +37,7 @@ * eDP: Embedded DisplayPort version 1 * DPI: DisplayPort Interoperability Guideline v1.1a * 1.2: DisplayPort 1.2 + * MST: Multistream Transport - part of DP 1.2a * * 1.2 formally includes both eDP and DPI definitions. */ @@ -103,9 +104,14 @@ #define DP_TRAINING_AUX_RD_INTERVAL 0x00e /* XXX 1.2? */ /* Multiple stream transport */ +#define DP_FAUX_CAP 0x020 /* 1.2 */ +# define DP_FAUX_CAP_1 (1 << 0) + #define DP_MSTM_CAP 0x021 /* 1.2 */ # define DP_MST_CAP (1 << 0) +#define DP_GUID 0x030 /* 1.2 */ + #define DP_PSR_SUPPORT 0x070 /* XXX 1.2? */ # define DP_PSR_IS_SUPPORTED 1 #define DP_PSR_CAPS 0x071 /* XXX 1.2? */ @@ -221,6 +227,16 @@ # define DP_PSR_CRC_VERIFICATION (1 << 2) # define DP_PSR_FRAME_CAPTURE (1 << 3) +#define DP_ADAPTER_CTRL 0x1a0 +# define DP_ADAPTER_CTRL_FORCE_LOAD_SENSE (1 << 0) + +#define DP_BRANCH_DEVICE_CTRL 0x1a1 +# define DP_BRANCH_DEVICE_IRQ_HPD (1 << 0) + +#define DP_PAYLOAD_ALLOCATE_SET 0x1c0 +#define DP_PAYLOAD_ALLOCATE_START_TIME_SLOT 0x1c1 +#define DP_PAYLOAD_ALLOCATE_TIME_SLOT_COUNT 0x1c2 + #define DP_SINK_COUNT 0x200 /* prior to 1.2 bit 7 was reserved mbz */ # define DP_GET_SINK_COUNT(x) ((((x) & 0x80) >> 1) | ((x) & 0x3f)) @@ -230,6 +246,9 @@ # define DP_REMOTE_CONTROL_COMMAND_PENDING (1 << 0) # define DP_AUTOMATED_TEST_REQUEST (1 << 1) # define DP_CP_IRQ (1 << 2) +# define DP_MCCS_IRQ (1 << 3) +# define DP_DOWN_REP_MSG_RDY (1 << 4) /* 1.2 MST */ +# define DP_UP_REQ_MSG_RDY (1 << 5) /* 1.2 MST */ # define DP_SINK_SPECIFIC_IRQ (1 << 6) #define DP_LANE0_1_STATUS 0x202 @@ -294,6 +313,13 @@ #define DP_TEST_SINK 0x270 #define DP_TEST_SINK_START (1 << 0) +#define DP_PAYLOAD_TABLE_UPDATE_STATUS 0x2c0 /* 1.2 MST */ +# define DP_PAYLOAD_TABLE_UPDATED (1 << 0) +# define DP_PAYLOAD_ACT_HANDLED (1 << 1) + +#define DP_VC_PAYLOAD_ID_SLOT_1 0x2c1 /* 1.2 MST */ +/* up to ID_SLOT_63 at 0x2ff */ + #define DP_SOURCE_OUI 0x300 #define DP_SINK_OUI 0x400 #define DP_BRANCH_OUI 0x500 @@ -303,6 +329,21 @@ # define DP_SET_POWER_D3 0x2 # define DP_SET_POWER_MASK 0x3 +#define DP_SIDEBAND_MSG_DOWN_REQ_BASE 0x1000 /* 1.2 MST */ +#define DP_SIDEBAND_MSG_UP_REP_BASE 0x1200 /* 1.2 MST */ +#define DP_SIDEBAND_MSG_DOWN_REP_BASE 0x1400 /* 1.2 MST */ +#define DP_SIDEBAND_MSG_UP_REQ_BASE 0x1600 /* 1.2 MST */ + +#define DP_SINK_COUNT_ESI 0x2002 /* 1.2 */ +/* 0-5 sink count */ +# define DP_SINK_COUNT_CP_READY (1 << 6) + +#define DP_DEVICE_SERVICE_IRQ_VECTOR_ESI0 0x2003 /* 1.2 */ + +#define DP_DEVICE_SERVICE_IRQ_VECTOR_ESI1 0x2004 /* 1.2 */ + +#define DP_LINK_SERVICE_IRQ_VECTOR_ESI0 0x2005 /* 1.2 */ + #define DP_PSR_ERROR_STATUS 0x2006 /* XXX 1.2? */ # define DP_PSR_LINK_CRC_ERROR (1 << 0) # define DP_PSR_RFB_STORAGE_ERROR (1 << 1) @@ -319,6 +360,43 @@ # define DP_PSR_SINK_INTERNAL_ERROR 7 # define DP_PSR_SINK_STATE_MASK 0x07 +/* DP 1.2 Sideband message defines */ +/* peer device type - DP 1.2a Table 2-92 */ +#define DP_PEER_DEVICE_NONE 0x0 +#define DP_PEER_DEVICE_SOURCE_OR_SST 0x1 +#define DP_PEER_DEVICE_MST_BRANCHING 0x2 +#define DP_PEER_DEVICE_SST_SINK 0x3 +#define DP_PEER_DEVICE_DP_LEGACY_CONV 0x4 + +/* DP 1.2 MST sideband request names DP 1.2a Table 2-80 */ +#define DP_LINK_ADDRESS 0x01 +#define DP_CONNECTION_STATUS_NOTIFY 0x02 +#define DP_ENUM_PATH_RESOURCES 0x10 +#define DP_ALLOCATE_PAYLOAD 0x11 +#define DP_QUERY_PAYLOAD 0x12 +#define DP_RESOURCE_STATUS_NOTIFY 0x13 +#define DP_CLEAR_PAYLOAD_ID_TABLE 0x14 +#define DP_REMOTE_DPCD_READ 0x20 +#define DP_REMOTE_DPCD_WRITE 0x21 +#define DP_REMOTE_I2C_READ 0x22 +#define DP_REMOTE_I2C_WRITE 0x23 +#define DP_POWER_UP_PHY 0x24 +#define DP_POWER_DOWN_PHY 0x25 +#define DP_SINK_EVENT_NOTIFY 0x30 +#define DP_QUERY_STREAM_ENC_STATUS 0x38 + +/* DP 1.2 MST sideband nak reasons - table 2.84 */ +#define DP_NAK_WRITE_FAILURE 0x01 +#define DP_NAK_INVALID_READ 0x02 +#define DP_NAK_CRC_FAILURE 0x03 +#define DP_NAK_BAD_PARAM 0x04 +#define DP_NAK_DEFER 0x05 +#define DP_NAK_LINK_FAILURE 0x06 +#define DP_NAK_NO_RESOURCES 0x07 +#define DP_NAK_DPCD_FAIL 0x08 +#define DP_NAK_I2C_NAK 0x09 +#define DP_NAK_ALLOCATE_FAIL 0x0a + #define MODE_I2C_START 1 #define MODE_I2C_WRITE 2 #define MODE_I2C_READ 4 -- cgit v1.2.3-71-gd317 From 8b2e62cc34feaaf1cac9440a93fb18ac0b1e81bc Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 21 May 2014 10:49:19 +0800 Subject: MIPS: Fix a typo error in AUDIT_ARCH definition Missing a "|" in AUDIT_ARCH_MIPSEL64N32 macro definition. Signed-off-by: Huacai Chen Reviewed-by: Markos Chandras Cc: John Crispin Cc: Steven J. Hill Cc: Aurelien Jarno Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang Cc: Zhangjin Wu Patchwork: https://patchwork.linux-mips.org/patch/6978/ Signed-off-by: Ralf Baechle --- include/uapi/linux/audit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 1b1efddb91cd..4c31a366be16 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -357,7 +357,7 @@ enum { #define AUDIT_ARCH_MIPS64N32 (EM_MIPS|__AUDIT_ARCH_64BIT|\ __AUDIT_ARCH_CONVENTION_MIPS64_N32) #define AUDIT_ARCH_MIPSEL64 (EM_MIPS|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) -#define AUDIT_ARCH_MIPSEL64N32 (EM_MIPS|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE\ +#define AUDIT_ARCH_MIPSEL64N32 (EM_MIPS|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE|\ __AUDIT_ARCH_CONVENTION_MIPS64_N32) #define AUDIT_ARCH_OPENRISC (EM_OPENRISC) #define AUDIT_ARCH_PARISC (EM_PARISC) -- cgit v1.2.3-71-gd317 From d7b2545023ecfde94d3ea9c03c5480ac18da96c9 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 23 May 2014 13:19:53 +0300 Subject: Bluetooth: Clearly distinguish mgmt LTK type from authenticated property On the mgmt level we have a key type parameter which currently accepts two possible values: 0x00 for unauthenticated and 0x01 for authenticated. However, in the internal struct smp_ltk representation we have an explicit "authenticated" boolean value. To make this distinction clear, add defines for the possible mgmt values and do conversion to and from the internal authenticated value. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/mgmt.h | 3 +++ net/bluetooth/mgmt.c | 19 ++++++++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 226ae03cafe7..bcffc9ae0c89 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -181,6 +181,9 @@ struct mgmt_cp_load_link_keys { } __packed; #define MGMT_LOAD_LINK_KEYS_SIZE 3 +#define MGMT_LTK_UNAUTHENTICATED 0x00 +#define MGMT_LTK_AUTHENTICATED 0x01 + struct mgmt_ltk_info { struct mgmt_addr_info addr; __u8 type; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f8ca69dd1984..5e9c21a5525f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -4534,7 +4534,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, for (i = 0; i < key_count; i++) { struct mgmt_ltk_info *key = &cp->keys[i]; - u8 type, addr_type; + u8 type, addr_type, authenticated; if (key->addr.type == BDADDR_LE_PUBLIC) addr_type = ADDR_LE_DEV_PUBLIC; @@ -4546,8 +4546,13 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, else type = HCI_SMP_LTK_SLAVE; + if (key->type == MGMT_LTK_UNAUTHENTICATED) + authenticated = 0x00; + else + authenticated = 0x01; + hci_add_ltk(hdev, &key->addr.bdaddr, addr_type, type, - key->type, key->val, key->enc_size, key->ediv, + authenticated, key->val, key->enc_size, key->ediv, key->rand); } @@ -5222,6 +5227,14 @@ void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, mgmt_event(MGMT_EV_NEW_LINK_KEY, hdev, &ev, sizeof(ev), NULL); } +static u8 mgmt_ltk_type(struct smp_ltk *ltk) +{ + if (ltk->authenticated) + return MGMT_LTK_AUTHENTICATED; + + return MGMT_LTK_UNAUTHENTICATED; +} + void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent) { struct mgmt_ev_new_long_term_key ev; @@ -5247,7 +5260,7 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent) bacpy(&ev.key.addr.bdaddr, &key->bdaddr); ev.key.addr.type = link_to_bdaddr(LE_LINK, key->bdaddr_type); - ev.key.type = key->authenticated; + ev.key.type = mgmt_ltk_type(key); ev.key.enc_size = key->enc_size; ev.key.ediv = key->ediv; ev.key.rand = key->rand; -- cgit v1.2.3-71-gd317 From ed616689a3d95eb6c9bdbb1ef74b0f50cbdf276a Mon Sep 17 00:00:00 2001 From: Sucheta Chakraborty Date: Thu, 22 May 2014 09:59:05 -0400 Subject: net-next:v4: Add support to configure SR-IOV VF minimum and maximum Tx rate through ip tool. o min_tx_rate puts lower limit on the VF bandwidth. VF is guaranteed to have a bandwidth of at least this value. max_tx_rate puts cap on the VF bandwidth. VF can have a bandwidth of up to this value. o A new handler set_vf_rate for attr IFLA_VF_RATE has been introduced which takes 4 arguments: netdev, VF number, min_tx_rate, max_tx_rate o ndo_set_vf_rate replaces ndo_set_vf_tx_rate handler. o Drivers that currently implement ndo_set_vf_tx_rate should now call ndo_set_vf_rate instead and reject attempt to set a minimum bandwidth greater than 0 for IFLA_VF_TX_RATE when IFLA_VF_RATE is not yet implemented by driver. o If user enters only one of either min_tx_rate or max_tx_rate, then, userland should read back the other value from driver and set both for IFLA_VF_RATE. Drivers that have not yet implemented IFLA_VF_RATE should always return min_tx_rate as 0 when read from ip tool. o If both IFLA_VF_TX_RATE and IFLA_VF_RATE options are specified, then IFLA_VF_RATE should override. o Idea is to have consistent display of rate values to user. o Usage example: - ./ip link set p4p1 vf 0 rate 900 ./ip link show p4p1 32: p4p1: mtu 1500 qdisc noop state DOWN mode DEFAULT qlen 1000 link/ether 00:0e:1e:08:b0:f0 brd ff:ff:ff:ff:ff:ff vf 0 MAC 3e:a0:ca:bd:ae:5a, tx rate 900 (Mbps), max_tx_rate 900Mbps vf 1 MAC f6:c6:7c:3f:3d:6c vf 2 MAC 56:32:43:98:d7:71 vf 3 MAC d6:be:c3:b5:85:ff vf 4 MAC ee:a9:9a:1e:19:14 vf 5 MAC 4a:d0:4c:07:52:18 vf 6 MAC 3a:76:44:93:62:f9 vf 7 MAC 82:e9:e7:e3:15:1a ./ip link set p4p1 vf 0 max_tx_rate 300 min_tx_rate 200 ./ip link show p4p1 32: p4p1: mtu 1500 qdisc noop state DOWN mode DEFAULT qlen 1000 link/ether 00:0e:1e:08:b0:f0 brd ff:ff:ff:ff:ff:ff vf 0 MAC 3e:a0:ca:bd:ae:5a, tx rate 300 (Mbps), max_tx_rate 300Mbps, min_tx_rate 200Mbps vf 1 MAC f6:c6:7c:3f:3d:6c vf 2 MAC 56:32:43:98:d7:71 vf 3 MAC d6:be:c3:b5:85:ff vf 4 MAC ee:a9:9a:1e:19:14 vf 5 MAC 4a:d0:4c:07:52:18 vf 6 MAC 3a:76:44:93:62:f9 vf 7 MAC 82:e9:e7:e3:15:1a ./ip link set p4p1 vf 0 max_tx_rate 600 rate 300 ./ip link show p4p1 32: p4p1: mtu 1500 qdisc noop state DOWN mode DEFAULT qlen 1000 link/ether 00:0e:1e:08:b0:f brd ff:ff:ff:ff:ff:ff vf 0 MAC 3e:a0:ca:bd:ae:5, tx rate 600 (Mbps), max_tx_rate 600Mbps, min_tx_rate 200Mbps vf 1 MAC f6:c6:7c:3f:3d:6c vf 2 MAC 56:32:43:98:d7:71 vf 3 MAC d6:be:c3:b5:85:ff vf 4 MAC ee:a9:9a:1e:19:14 vf 5 MAC 4a:d0:4c:07:52:18 vf 6 MAC 3a:76:44:93:62:f9 vf 7 MAC 82:e9:e7:e3:15:1a Signed-off-by: Sucheta Chakraborty Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 3 +- drivers/net/ethernet/emulex/benet/be_main.c | 21 +++++---- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 26 +++++++---- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h | 3 +- drivers/net/ethernet/intel/igb/igb_main.c | 20 ++++++--- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 13 ++++-- drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h | 3 +- drivers/net/ethernet/mellanox/mlx4/cmd.c | 11 ++--- drivers/net/ethernet/qlogic/qlcnic/qlcnic.h | 1 + drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 2 +- drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h | 2 +- .../ethernet/qlogic/qlcnic/qlcnic_sriov_common.c | 1 + .../net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c | 52 +++++++++++++++------- drivers/net/ethernet/sfc/siena_sriov.c | 3 +- include/linux/if_link.h | 3 +- include/linux/netdevice.h | 8 ++-- include/uapi/linux/if_link.h | 9 +++- net/core/rtnetlink.c | 38 +++++++++++++--- 20 files changed, 156 insertions(+), 67 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 81cc2d9831c2..8d0479d5be8e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -2576,7 +2576,8 @@ int bnx2x_get_vf_config(struct net_device *dev, int vfidx, ivi->vf = vfidx; ivi->qos = 0; - ivi->tx_rate = 10000; /* always 10G. TBA take from link struct */ + ivi->max_tx_rate = 10000; /* always 10G. TBA take from link struct */ + ivi->min_tx_rate = 0; ivi->spoofchk = 1; /*always enabled */ if (vf->state == VF_ENABLED) { /* mac and vlan are in vlan_mac objects */ diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index dcc5e5c69743..4693d004a223 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1302,7 +1302,8 @@ static int be_get_vf_config(struct net_device *netdev, int vf, return -EINVAL; vi->vf = vf; - vi->tx_rate = vf_cfg->tx_rate; + vi->max_tx_rate = vf_cfg->tx_rate; + vi->min_tx_rate = 0; vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK; vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT; memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN); @@ -1342,7 +1343,8 @@ static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) return status; } -static int be_set_vf_tx_rate(struct net_device *netdev, int vf, int rate) +static int be_set_vf_tx_rate(struct net_device *netdev, int vf, + int min_tx_rate, int max_tx_rate) { struct be_adapter *adapter = netdev_priv(netdev); int status = 0; @@ -1353,18 +1355,21 @@ static int be_set_vf_tx_rate(struct net_device *netdev, int vf, int rate) if (vf >= adapter->num_vfs) return -EINVAL; - if (rate < 100 || rate > 10000) { + if (min_tx_rate) + return -EINVAL; + + if (max_tx_rate < 100 || max_tx_rate > 10000) { dev_err(&adapter->pdev->dev, - "tx rate must be between 100 and 10000 Mbps\n"); + "max tx rate must be between 100 and 10000 Mbps\n"); return -EINVAL; } - status = be_cmd_config_qos(adapter, rate / 10, vf + 1); + status = be_cmd_config_qos(adapter, max_tx_rate / 10, vf + 1); if (status) dev_err(&adapter->pdev->dev, - "tx rate %d on VF %d failed\n", rate, vf); + "max tx rate %d on VF %d failed\n", max_tx_rate, vf); else - adapter->vf_cfg[vf].tx_rate = rate; + adapter->vf_cfg[vf].tx_rate = max_tx_rate; return status; } static int be_set_vf_link_state(struct net_device *netdev, int vf, @@ -4257,7 +4262,7 @@ static const struct net_device_ops be_netdev_ops = { .ndo_vlan_rx_kill_vid = be_vlan_rem_vid, .ndo_set_vf_mac = be_set_vf_mac, .ndo_set_vf_vlan = be_set_vf_vlan, - .ndo_set_vf_tx_rate = be_set_vf_tx_rate, + .ndo_set_vf_rate = be_set_vf_tx_rate, .ndo_get_vf_config = be_get_vf_config, .ndo_set_vf_link_state = be_set_vf_link_state, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e0e5c6a867b1..96f7fabd8758 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -6724,7 +6724,7 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_set_features = i40e_set_features, .ndo_set_vf_mac = i40e_ndo_set_vf_mac, .ndo_set_vf_vlan = i40e_ndo_set_vf_port_vlan, - .ndo_set_vf_tx_rate = i40e_ndo_set_vf_bw, + .ndo_set_vf_rate = i40e_ndo_set_vf_bw, .ndo_get_vf_config = i40e_ndo_get_vf_config, .ndo_set_vf_link_state = i40e_ndo_set_vf_link_state, #ifdef CONFIG_I40E_VXLAN diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 4d219566a04d..8564b0939dc4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2205,7 +2205,8 @@ error_pvid: * * configure vf tx rate **/ -int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int tx_rate) +int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, + int max_tx_rate) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_pf *pf = np->vsi->back; @@ -2221,6 +2222,12 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int tx_rate) goto error; } + if (min_tx_rate) { + dev_err(&pf->pdev->dev, "Invalid min tx rate (%d) (greater than 0) specified for vf %d.\n", + min_tx_rate, vf_id); + return -EINVAL; + } + vf = &(pf->vf[vf_id]); vsi = pf->vsi[vf->lan_vsi_index]; if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) { @@ -2243,23 +2250,23 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int tx_rate) break; } - if (tx_rate > speed) { - dev_err(&pf->pdev->dev, "Invalid tx rate %d specified for vf %d.", - tx_rate, vf->vf_id); + if (max_tx_rate > speed) { + dev_err(&pf->pdev->dev, "Invalid max tx rate %d specified for vf %d.", + max_tx_rate, vf->vf_id); ret = -EINVAL; goto error; } /* Tx rate credits are in values of 50Mbps, 0 is disabled*/ - ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid, tx_rate / 50, 0, - NULL); + ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid, max_tx_rate / 50, + 0, NULL); if (ret) { - dev_err(&pf->pdev->dev, "Unable to set tx rate, error code %d.\n", + dev_err(&pf->pdev->dev, "Unable to set max tx rate, error code %d.\n", ret); ret = -EIO; goto error; } - vf->tx_rate = tx_rate; + vf->tx_rate = max_tx_rate; error: return ret; } @@ -2301,7 +2308,8 @@ int i40e_ndo_get_vf_config(struct net_device *netdev, memcpy(&ivi->mac, vf->default_lan_addr.addr, ETH_ALEN); - ivi->tx_rate = vf->tx_rate; + ivi->max_tx_rate = vf->tx_rate; + ivi->min_tx_rate = 0; ivi->vlan = le16_to_cpu(vsi->info.pvid) & I40E_VLAN_MASK; ivi->qos = (le16_to_cpu(vsi->info.pvid) & I40E_PRIORITY_MASK) >> I40E_VLAN_PRIORITY_SHIFT; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index ba3d1f8414be..5a559be4ba2c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -116,7 +116,8 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf); int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac); int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos); -int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int tx_rate); +int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, + int max_tx_rate); int i40e_ndo_get_vf_config(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi); int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index bfcda8a455f4..1075b3f8c415 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -169,7 +169,7 @@ static void igb_restore_vf_multicasts(struct igb_adapter *adapter); static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac); static int igb_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos); -static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate); +static int igb_ndo_set_vf_bw(struct net_device *, int, int, int); static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting); static int igb_ndo_get_vf_config(struct net_device *netdev, int vf, @@ -2084,7 +2084,7 @@ static const struct net_device_ops igb_netdev_ops = { .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid, .ndo_set_vf_mac = igb_ndo_set_vf_mac, .ndo_set_vf_vlan = igb_ndo_set_vf_vlan, - .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw, + .ndo_set_vf_rate = igb_ndo_set_vf_bw, .ndo_set_vf_spoofchk = igb_ndo_set_vf_spoofchk, .ndo_get_vf_config = igb_ndo_get_vf_config, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -7879,7 +7879,8 @@ static void igb_check_vf_rate_limit(struct igb_adapter *adapter) } } -static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate) +static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, + int min_tx_rate, int max_tx_rate) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -7888,15 +7889,19 @@ static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate) if (hw->mac.type != e1000_82576) return -EOPNOTSUPP; + if (min_tx_rate) + return -EINVAL; + actual_link_speed = igb_link_mbps(adapter->link_speed); if ((vf >= adapter->vfs_allocated_count) || (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) || - (tx_rate < 0) || (tx_rate > actual_link_speed)) + (max_tx_rate < 0) || + (max_tx_rate > actual_link_speed)) return -EINVAL; adapter->vf_rate_link_speed = actual_link_speed; - adapter->vf_data[vf].tx_rate = (u16)tx_rate; - igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed); + adapter->vf_data[vf].tx_rate = (u16)max_tx_rate; + igb_set_vf_rate_limit(hw, vf, max_tx_rate, actual_link_speed); return 0; } @@ -7936,7 +7941,8 @@ static int igb_ndo_get_vf_config(struct net_device *netdev, return -EINVAL; ivi->vf = vf; memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN); - ivi->tx_rate = adapter->vf_data[vf].tx_rate; + ivi->max_tx_rate = adapter->vf_data[vf].tx_rate; + ivi->min_tx_rate = 0; ivi->vlan = adapter->vf_data[vf].pf_vlan; ivi->qos = adapter->vf_data[vf].pf_qos; ivi->spoofchk = adapter->vf_data[vf].spoofchk_enabled; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 8089ea9f2fba..a5332389620a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7926,7 +7926,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_do_ioctl = ixgbe_ioctl, .ndo_set_vf_mac = ixgbe_ndo_set_vf_mac, .ndo_set_vf_vlan = ixgbe_ndo_set_vf_vlan, - .ndo_set_vf_tx_rate = ixgbe_ndo_set_vf_bw, + .ndo_set_vf_rate = ixgbe_ndo_set_vf_bw, .ndo_set_vf_spoofchk = ixgbe_ndo_set_vf_spoofchk, .ndo_get_vf_config = ixgbe_ndo_get_vf_config, .ndo_get_stats64 = ixgbe_get_stats64, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index a01417c06620..3248e208c9dc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -1222,7 +1222,8 @@ void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter) } } -int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate) +int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate, + int max_tx_rate) { struct ixgbe_adapter *adapter = netdev_priv(netdev); int link_speed; @@ -1240,13 +1241,16 @@ int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate) if (link_speed != 10000) return -EINVAL; + if (min_tx_rate) + return -EINVAL; + /* rate limit cannot be less than 10Mbs or greater than link speed */ - if (tx_rate && ((tx_rate <= 10) || (tx_rate > link_speed))) + if (max_tx_rate && ((max_tx_rate <= 10) || (max_tx_rate > link_speed))) return -EINVAL; /* store values */ adapter->vf_rate_link_speed = link_speed; - adapter->vfinfo[vf].tx_rate = tx_rate; + adapter->vfinfo[vf].tx_rate = max_tx_rate; /* update hardware configuration */ ixgbe_set_vf_rate_limit(adapter, vf); @@ -1288,7 +1292,8 @@ int ixgbe_ndo_get_vf_config(struct net_device *netdev, return -EINVAL; ivi->vf = vf; memcpy(&ivi->mac, adapter->vfinfo[vf].vf_mac_addresses, ETH_ALEN); - ivi->tx_rate = adapter->vfinfo[vf].tx_rate; + ivi->max_tx_rate = adapter->vfinfo[vf].tx_rate; + ivi->min_tx_rate = 0; ivi->vlan = adapter->vfinfo[vf].pf_vlan; ivi->qos = adapter->vfinfo[vf].pf_qos; ivi->spoofchk = adapter->vfinfo[vf].spoofchk_enabled; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h index cea640147604..32c26d586c01 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h @@ -44,7 +44,8 @@ void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter); int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac); int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan, u8 qos); -int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate); +int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate, + int max_tx_rate); int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting); int ixgbe_ndo_get_vf_config(struct net_device *netdev, int vf, struct ifla_vf_info *ivi); diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 59c7fd406805..ca8e7cb5a8e4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2486,11 +2486,12 @@ int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_in ivf->mac[4] = ((s_info->mac >> (1*8)) & 0xff); ivf->mac[5] = ((s_info->mac) & 0xff); - ivf->vlan = s_info->default_vlan; - ivf->qos = s_info->default_qos; - ivf->tx_rate = s_info->tx_rate; - ivf->spoofchk = s_info->spoofchk; - ivf->linkstate = s_info->link_state; + ivf->vlan = s_info->default_vlan; + ivf->qos = s_info->default_qos; + ivf->max_tx_rate = s_info->tx_rate; + ivf->min_tx_rate = 0; + ivf->spoofchk = s_info->spoofchk; + ivf->linkstate = s_info->link_state; return 0; } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index 6e7527e2b595..41abe6070466 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -1319,6 +1319,7 @@ struct qlcnic_eswitch { #define QL_STATUS_INVALID_PARAM -1 #define MAX_BW 100 /* % of link speed */ +#define MIN_BW 1 /* % of link speed */ #define MAX_VLAN_ID 4095 #define MIN_VLAN_ID 2 #define DEFAULT_MAC_LEARN 1 diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index f0a285359e66..f06ba90b4282 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -525,7 +525,7 @@ static const struct net_device_ops qlcnic_netdev_ops = { #endif #ifdef CONFIG_QLCNIC_SRIOV .ndo_set_vf_mac = qlcnic_sriov_set_vf_mac, - .ndo_set_vf_tx_rate = qlcnic_sriov_set_vf_tx_rate, + .ndo_set_vf_rate = qlcnic_sriov_set_vf_tx_rate, .ndo_get_vf_config = qlcnic_sriov_get_vf_config, .ndo_set_vf_vlan = qlcnic_sriov_set_vf_vlan, .ndo_set_vf_spoofchk = qlcnic_sriov_set_vf_spoofchk, diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h index 335b50f7bd3e..4677b2edccca 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h @@ -233,7 +233,7 @@ bool qlcnic_sriov_soft_flr_check(struct qlcnic_adapter *, void qlcnic_sriov_pf_reset(struct qlcnic_adapter *); int qlcnic_sriov_pf_reinit(struct qlcnic_adapter *); int qlcnic_sriov_set_vf_mac(struct net_device *, int, u8 *); -int qlcnic_sriov_set_vf_tx_rate(struct net_device *, int, int); +int qlcnic_sriov_set_vf_tx_rate(struct net_device *, int, int, int); int qlcnic_sriov_get_vf_config(struct net_device *, int , struct ifla_vf_info *); int qlcnic_sriov_set_vf_vlan(struct net_device *, int, u16, u8); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c index 498fa6350c8d..2bdd9deffb38 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c @@ -201,6 +201,7 @@ int qlcnic_sriov_init(struct qlcnic_adapter *adapter, int num_vfs) sriov->vf_info[i].vp = vp; vp->vlan_mode = QLC_GUEST_VLAN_MODE; vp->max_tx_bw = MAX_BW; + vp->min_tx_bw = MIN_BW; vp->spoofchk = false; random_ether_addr(vp->mac); dev_info(&adapter->pdev->dev, diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c index 6d2f72f114f2..a29538b86edf 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c @@ -1848,7 +1848,8 @@ int qlcnic_sriov_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) return 0; } -int qlcnic_sriov_set_vf_tx_rate(struct net_device *netdev, int vf, int tx_rate) +int qlcnic_sriov_set_vf_tx_rate(struct net_device *netdev, int vf, + int min_tx_rate, int max_tx_rate) { struct qlcnic_adapter *adapter = netdev_priv(netdev); struct qlcnic_sriov *sriov = adapter->ahw->sriov; @@ -1863,35 +1864,52 @@ int qlcnic_sriov_set_vf_tx_rate(struct net_device *netdev, int vf, int tx_rate) if (vf >= sriov->num_vfs) return -EINVAL; - if (tx_rate >= 10000 || tx_rate < 100) { + vf_info = &sriov->vf_info[vf]; + vp = vf_info->vp; + vpid = vp->handle; + + if (!min_tx_rate) + min_tx_rate = QLC_VF_MIN_TX_RATE; + + if (max_tx_rate && + (max_tx_rate >= 10000 || max_tx_rate < min_tx_rate)) { netdev_err(netdev, - "Invalid Tx rate, allowed range is [%d - %d]", - QLC_VF_MIN_TX_RATE, QLC_VF_MAX_TX_RATE); + "Invalid max Tx rate, allowed range is [%d - %d]", + min_tx_rate, QLC_VF_MAX_TX_RATE); return -EINVAL; } - if (tx_rate == 0) - tx_rate = 10000; + if (!max_tx_rate) + max_tx_rate = 10000; - vf_info = &sriov->vf_info[vf]; - vp = vf_info->vp; - vpid = vp->handle; + if (min_tx_rate && + (min_tx_rate > max_tx_rate || min_tx_rate < QLC_VF_MIN_TX_RATE)) { + netdev_err(netdev, + "Invalid min Tx rate, allowed range is [%d - %d]", + QLC_VF_MIN_TX_RATE, max_tx_rate); + return -EINVAL; + } if (test_bit(QLC_BC_VF_STATE, &vf_info->state)) { if (qlcnic_sriov_get_vf_vport_info(adapter, &nic_info, vpid)) return -EIO; - nic_info.max_tx_bw = tx_rate / 100; + nic_info.max_tx_bw = max_tx_rate / 100; + nic_info.min_tx_bw = min_tx_rate / 100; nic_info.bit_offsets = BIT_0; if (qlcnic_sriov_pf_set_vport_info(adapter, &nic_info, vpid)) return -EIO; } - vp->max_tx_bw = tx_rate / 100; + vp->max_tx_bw = max_tx_rate / 100; + netdev_info(netdev, + "Setting Max Tx rate %d (Mbps), %d %% of PF bandwidth, for VF %d\n", + max_tx_rate, vp->max_tx_bw, vf); + vp->min_tx_bw = min_tx_rate / 100; netdev_info(netdev, - "Setting Tx rate %d (Mbps), %d %% of PF bandwidth, for VF %d\n", - tx_rate, vp->max_tx_bw, vf); + "Setting Min Tx rate %d (Mbps), %d %% of PF bandwidth, for VF %d\n", + min_tx_rate, vp->min_tx_bw, vf); return 0; } @@ -1990,9 +2008,13 @@ int qlcnic_sriov_get_vf_config(struct net_device *netdev, ivi->qos = vp->qos; ivi->spoofchk = vp->spoofchk; if (vp->max_tx_bw == MAX_BW) - ivi->tx_rate = 0; + ivi->max_tx_rate = 0; + else + ivi->max_tx_rate = vp->max_tx_bw * 100; + if (vp->min_tx_bw == MIN_BW) + ivi->min_tx_rate = 0; else - ivi->tx_rate = vp->max_tx_bw * 100; + ivi->min_tx_rate = vp->min_tx_bw * 100; ivi->vf = vf; return 0; diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c index 9a9205e77896..43d2e64546ed 100644 --- a/drivers/net/ethernet/sfc/siena_sriov.c +++ b/drivers/net/ethernet/sfc/siena_sriov.c @@ -1633,7 +1633,8 @@ int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i, ivi->vf = vf_i; ether_addr_copy(ivi->mac, vf->addr.mac_addr); - ivi->tx_rate = 0; + ivi->max_tx_rate = 0; + ivi->min_tx_rate = 0; tci = ntohs(vf->addr.tci); ivi->vlan = tci & VLAN_VID_MASK; ivi->qos = (tci >> VLAN_PRIO_SHIFT) & 0x7; diff --git a/include/linux/if_link.h b/include/linux/if_link.h index a86784dec3d3..119130e9298b 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -10,8 +10,9 @@ struct ifla_vf_info { __u8 mac[32]; __u32 vlan; __u32 qos; - __u32 tx_rate; __u32 spoofchk; __u32 linkstate; + __u32 min_tx_rate; + __u32 max_tx_rate; }; #endif /* _LINUX_IF_LINK_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f4ad247fd324..9ec3a945caf2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -850,7 +850,8 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * SR-IOV management functions. * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac); * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos); - * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate); + * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate, + * int max_tx_rate); * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting); * int (*ndo_get_vf_config)(struct net_device *dev, * int vf, struct ifla_vf_info *ivf); @@ -1044,8 +1045,9 @@ struct net_device_ops { int queue, u8 *mac); int (*ndo_set_vf_vlan)(struct net_device *dev, int queue, u16 vlan, u8 qos); - int (*ndo_set_vf_tx_rate)(struct net_device *dev, - int vf, int rate); + int (*ndo_set_vf_rate)(struct net_device *dev, + int vf, int min_tx_rate, + int max_tx_rate); int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting); int (*ndo_get_vf_config)(struct net_device *dev, diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 9a7f7ace6649..622e7910b8cc 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -399,9 +399,10 @@ enum { IFLA_VF_UNSPEC, IFLA_VF_MAC, /* Hardware queue specific attributes */ IFLA_VF_VLAN, - IFLA_VF_TX_RATE, /* TX Bandwidth Allocation */ + IFLA_VF_TX_RATE, /* Max TX Bandwidth Allocation */ IFLA_VF_SPOOFCHK, /* Spoof Checking on/off switch */ IFLA_VF_LINK_STATE, /* link state enable/disable/auto switch */ + IFLA_VF_RATE, /* Min and Max TX Bandwidth Allocation */ __IFLA_VF_MAX, }; @@ -423,6 +424,12 @@ struct ifla_vf_tx_rate { __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */ }; +struct ifla_vf_rate { + __u32 vf; + __u32 min_tx_rate; /* Min Bandwidth in Mbps */ + __u32 max_tx_rate; /* Max Bandwidth in Mbps */ +}; + struct ifla_vf_spoofchk { __u32 vf; __u32 setting; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9837bebf93ce..d6417464dc66 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -767,8 +767,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, size += num_vfs * (nla_total_size(sizeof(struct ifla_vf_mac)) + nla_total_size(sizeof(struct ifla_vf_vlan)) + - nla_total_size(sizeof(struct ifla_vf_tx_rate)) + - nla_total_size(sizeof(struct ifla_vf_spoofchk))); + nla_total_size(sizeof(struct ifla_vf_spoofchk)) + + nla_total_size(sizeof(struct ifla_vf_rate))); return size; } else return 0; @@ -1034,6 +1034,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct ifla_vf_info ivi; struct ifla_vf_mac vf_mac; struct ifla_vf_vlan vf_vlan; + struct ifla_vf_rate vf_rate; struct ifla_vf_tx_rate vf_tx_rate; struct ifla_vf_spoofchk vf_spoofchk; struct ifla_vf_link_state vf_linkstate; @@ -1054,6 +1055,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, break; vf_mac.vf = vf_vlan.vf = + vf_rate.vf = vf_tx_rate.vf = vf_spoofchk.vf = vf_linkstate.vf = ivi.vf; @@ -1061,7 +1063,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); vf_vlan.vlan = ivi.vlan; vf_vlan.qos = ivi.qos; - vf_tx_rate.rate = ivi.tx_rate; + vf_tx_rate.rate = ivi.max_tx_rate; + vf_rate.min_tx_rate = ivi.min_tx_rate; + vf_rate.max_tx_rate = ivi.max_tx_rate; vf_spoofchk.setting = ivi.spoofchk; vf_linkstate.link_state = ivi.linkstate; vf = nla_nest_start(skb, IFLA_VF_INFO); @@ -1071,6 +1075,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, } if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) || nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) || + nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate), + &vf_rate) || nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate) || nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk), @@ -1177,6 +1183,8 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { .len = sizeof(struct ifla_vf_tx_rate) }, [IFLA_VF_SPOOFCHK] = { .type = NLA_BINARY, .len = sizeof(struct ifla_vf_spoofchk) }, + [IFLA_VF_RATE] = { .type = NLA_BINARY, + .len = sizeof(struct ifla_vf_rate) }, }; static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { @@ -1336,11 +1344,29 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) } case IFLA_VF_TX_RATE: { struct ifla_vf_tx_rate *ivt; + struct ifla_vf_info ivf; ivt = nla_data(vf); err = -EOPNOTSUPP; - if (ops->ndo_set_vf_tx_rate) - err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, - ivt->rate); + if (ops->ndo_get_vf_config) + err = ops->ndo_get_vf_config(dev, ivt->vf, + &ivf); + if (err) + break; + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_rate) + err = ops->ndo_set_vf_rate(dev, ivt->vf, + ivf.min_tx_rate, + ivt->rate); + break; + } + case IFLA_VF_RATE: { + struct ifla_vf_rate *ivt; + ivt = nla_data(vf); + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_rate) + err = ops->ndo_set_vf_rate(dev, ivt->vf, + ivt->min_tx_rate, + ivt->max_tx_rate); break; } case IFLA_VF_SPOOFCHK: { -- cgit v1.2.3-71-gd317 From b26ba202e0500eb852e89499ece1b2deaa64c3a7 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 23 May 2014 08:47:09 -0700 Subject: net: Eliminate no_check from protosw It doesn't seem like an protocols are setting anything other than the default, and allowing to arbitrarily disable checksums for a whole protocol seems dangerous. This can be done on a per socket basis. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/protocol.h | 1 - net/dccp/ipv4.c | 1 - net/ipv4/af_inet.c | 7 ------- net/ipv4/udplite.c | 1 - net/ipv6/af_inet6.c | 3 --- net/ipv6/ping.c | 1 - net/ipv6/raw.c | 1 - net/ipv6/tcp_ipv6.c | 1 - net/ipv6/udp.c | 1 - net/ipv6/udplite.c | 1 - net/l2tp/l2tp_ip.c | 1 - net/l2tp/l2tp_ip6.c | 1 - net/sctp/ipv6.c | 2 -- net/sctp/protocol.c | 2 -- 14 files changed, 24 deletions(-) (limited to 'include') diff --git a/include/net/protocol.h b/include/net/protocol.h index a7e986b08147..d6fcc1fcdb5b 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -86,7 +86,6 @@ struct inet_protosw { struct proto *prot; const struct proto_ops *ops; - char no_check; /* checksum on rcv/xmit/none? */ unsigned char flags; /* See INET_PROTOSW_* below. */ }; #define INET_PROTOSW_REUSE 0x01 /* Are ports automatically reusable? */ diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 22b5d818b200..6ca645c4b48e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1024,7 +1024,6 @@ static struct inet_protosw dccp_v4_protosw = { .protocol = IPPROTO_DCCP, .prot = &dccp_v4_prot, .ops = &inet_dccp_ops, - .no_check = 0, .flags = INET_PROTOSW_ICSK, }; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 279132bcadd9..0e9bb08a91e4 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -254,7 +254,6 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, struct inet_sock *inet; struct proto *answer_prot; unsigned char answer_flags; - char answer_no_check; int try_loading_module = 0; int err; @@ -312,7 +311,6 @@ lookup_protocol: sock->ops = answer->ops; answer_prot = answer->prot; - answer_no_check = answer->no_check; answer_flags = answer->flags; rcu_read_unlock(); @@ -324,7 +322,6 @@ lookup_protocol: goto out; err = 0; - sk->sk_no_check = answer_no_check; if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = SK_CAN_REUSE; @@ -1002,7 +999,6 @@ static struct inet_protosw inetsw_array[] = .protocol = IPPROTO_TCP, .prot = &tcp_prot, .ops = &inet_stream_ops, - .no_check = 0, .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK, }, @@ -1012,7 +1008,6 @@ static struct inet_protosw inetsw_array[] = .protocol = IPPROTO_UDP, .prot = &udp_prot, .ops = &inet_dgram_ops, - .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_PERMANENT, }, @@ -1021,7 +1016,6 @@ static struct inet_protosw inetsw_array[] = .protocol = IPPROTO_ICMP, .prot = &ping_prot, .ops = &inet_dgram_ops, - .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, }, @@ -1030,7 +1024,6 @@ static struct inet_protosw inetsw_array[] = .protocol = IPPROTO_IP, /* wild card */ .prot = &raw_prot, .ops = &inet_sockraw_ops, - .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, } }; diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 2c46acd4cc36..3b3efbda48e1 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -70,7 +70,6 @@ static struct inet_protosw udplite4_protosw = { .protocol = IPPROTO_UDPLITE, .prot = &udplite_prot, .ops = &inet_dgram_ops, - .no_check = 0, /* must checksum (RFC 3828) */ .flags = INET_PROTOSW_PERMANENT, }; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index dc47cc757b80..7cb4392690dd 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -106,7 +106,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, struct inet_protosw *answer; struct proto *answer_prot; unsigned char answer_flags; - char answer_no_check; int try_loading_module = 0; int err; @@ -162,7 +161,6 @@ lookup_protocol: sock->ops = answer->ops; answer_prot = answer->prot; - answer_no_check = answer->no_check; answer_flags = answer->flags; rcu_read_unlock(); @@ -176,7 +174,6 @@ lookup_protocol: sock_init_data(sock, sk); err = 0; - sk->sk_no_check = answer_no_check; if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = SK_CAN_REUSE; diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index a2a1d80dfe0c..5b7a1ed2aba9 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -51,7 +51,6 @@ static struct inet_protosw pingv6_protosw = { .protocol = IPPROTO_ICMPV6, .prot = &pingv6_prot, .ops = &inet6_dgram_ops, - .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, }; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index dddfb5fa2b7a..b2dc60b0c764 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1322,7 +1322,6 @@ static struct inet_protosw rawv6_protosw = { .protocol = IPPROTO_IP, /* wild card */ .prot = &rawv6_prot, .ops = &inet6_sockraw_ops, - .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, }; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f07b2abba359..229239ad96b1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1992,7 +1992,6 @@ static struct inet_protosw tcpv6_protosw = { .protocol = IPPROTO_TCP, .prot = &tcpv6_prot, .ops = &inet6_stream_ops, - .no_check = 0, .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK, }; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7edf096867c4..c7ed47bde54b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1507,7 +1507,6 @@ static struct inet_protosw udpv6_protosw = { .protocol = IPPROTO_UDP, .prot = &udpv6_prot, .ops = &inet6_dgram_ops, - .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_PERMANENT, }; diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index dfcc4be46898..9cf097e206e9 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -64,7 +64,6 @@ static struct inet_protosw udplite6_protosw = { .protocol = IPPROTO_UDPLITE, .prot = &udplitev6_prot, .ops = &inet6_dgram_ops, - .no_check = 0, .flags = INET_PROTOSW_PERMANENT, }; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 3397fe6897c0..369a9822488c 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -606,7 +606,6 @@ static struct inet_protosw l2tp_ip_protosw = { .protocol = IPPROTO_L2TP, .prot = &l2tp_ip_prot, .ops = &l2tp_ip_ops, - .no_check = 0, }; static struct net_protocol l2tp_ip_protocol __read_mostly = { diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index e472d44a3b91..f3f98a156cee 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -755,7 +755,6 @@ static struct inet_protosw l2tp_ip6_protosw = { .protocol = IPPROTO_L2TP, .prot = &l2tp_ip6_prot, .ops = &l2tp_ip6_ops, - .no_check = 0, }; static struct inet6_protocol l2tp_ip6_protocol __read_mostly = { diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 4dc5d9e08311..1999592ba88c 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -943,7 +943,6 @@ static struct inet_protosw sctpv6_seqpacket_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctpv6_prot, .ops = &inet6_seqpacket_ops, - .no_check = 0, .flags = SCTP_PROTOSW_FLAG }; static struct inet_protosw sctpv6_stream_protosw = { @@ -951,7 +950,6 @@ static struct inet_protosw sctpv6_stream_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctpv6_prot, .ops = &inet6_seqpacket_ops, - .no_check = 0, .flags = SCTP_PROTOSW_FLAG, }; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index af5afca4b85a..6789d785e698 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1017,7 +1017,6 @@ static struct inet_protosw sctp_seqpacket_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctp_prot, .ops = &inet_seqpacket_ops, - .no_check = 0, .flags = SCTP_PROTOSW_FLAG }; static struct inet_protosw sctp_stream_protosw = { @@ -1025,7 +1024,6 @@ static struct inet_protosw sctp_stream_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctp_prot, .ops = &inet_seqpacket_ops, - .no_check = 0, .flags = SCTP_PROTOSW_FLAG }; -- cgit v1.2.3-71-gd317 From 28448b80456feafe07e2d05b6363b00f61f6171e Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 23 May 2014 08:47:19 -0700 Subject: net: Split sk_no_check into sk_no_check_{rx,tx} Define separate fields in the sock structure for configuring disabling checksums in both TX and RX-- sk_no_check_tx and sk_no_check_rx. The SO_NO_CHECK socket option only affects sk_no_check_tx. Also, removed UDP_CSUM_* defines since they are no longer necessary. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- drivers/scsi/iscsi_tcp.c | 2 +- include/net/sock.h | 6 ++++-- include/net/udp.h | 9 --------- net/appletalk/ddp.c | 2 +- net/core/sock.c | 4 ++-- net/decnet/af_decnet.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/udp.c | 6 +++--- net/ipx/af_ipx.c | 2 +- net/ipx/ipx_route.c | 3 ++- net/l2tp/l2tp_core.c | 4 ++-- net/l2tp/l2tp_netlink.c | 3 +-- net/sctp/socket.c | 3 ++- 13 files changed, 21 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 11854845393b..a669f2d11c31 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -244,7 +244,7 @@ iscsi_sw_tcp_conn_restore_callbacks(struct iscsi_conn *conn) sk->sk_data_ready = tcp_sw_conn->old_data_ready; sk->sk_state_change = tcp_sw_conn->old_state_change; sk->sk_write_space = tcp_sw_conn->old_write_space; - sk->sk_no_check = 0; + sk->sk_no_check_tx = 0; write_unlock_bh(&sk->sk_callback_lock); } diff --git a/include/net/sock.h b/include/net/sock.h index 21569cf456ed..07b7fcd60d80 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -243,7 +243,8 @@ struct cg_proto; * @sk_sndbuf: size of send buffer in bytes * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings - * @sk_no_check: %SO_NO_CHECK setting, whether or not checkup packets + * @sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets + * @sk_no_check_rx: allow zero checksum in RX packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) * @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK) * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) @@ -371,7 +372,8 @@ struct sock { struct sk_buff_head sk_write_queue; kmemcheck_bitfield_begin(flags); unsigned int sk_shutdown : 2, - sk_no_check : 2, + sk_no_check_tx : 1, + sk_no_check_rx : 1, sk_userlocks : 4, sk_protocol : 8, sk_type : 16; diff --git a/include/net/udp.h b/include/net/udp.h index a24f0f3e107f..5eb86874bcd6 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -95,15 +95,6 @@ static inline struct udp_hslot *udp_hashslot2(struct udp_table *table, return &table->hash2[hash & table->mask]; } -/* Note: this must match 'valbool' in sock_setsockopt */ -#define UDP_CSUM_NOXMIT 1 - -/* Used by SunRPC/xprt layer. */ -#define UDP_CSUM_NORCV 2 - -/* Default, as per the RFC, is to always do csums. */ -#define UDP_CSUM_DEFAULT 0 - extern struct proto udp_prot; extern atomic_long_t udp_memory_allocated; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 786ee2f83d5f..01a1082e02b3 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1669,7 +1669,7 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr goto out; } - if (sk->sk_no_check == 1) + if (sk->sk_no_check_tx) ddp->deh_sum = 0; else ddp->deh_sum = atalk_checksum(skb, len + sizeof(*ddp)); diff --git a/net/core/sock.c b/net/core/sock.c index 664ee4295b6f..026e01f70274 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -784,7 +784,7 @@ set_rcvbuf: break; case SO_NO_CHECK: - sk->sk_no_check = valbool; + sk->sk_no_check_tx = valbool; break; case SO_PRIORITY: @@ -1064,7 +1064,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_NO_CHECK: - v.val = sk->sk_no_check; + v.val = sk->sk_no_check_tx; break; case SO_PRIORITY: diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 4c04848953bd..ae011b46c071 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -481,7 +481,7 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf sk->sk_backlog_rcv = dn_nsp_backlog_rcv; sk->sk_destruct = dn_destruct; - sk->sk_no_check = 1; + sk->sk_no_check_tx = 1; sk->sk_family = PF_DECnet; sk->sk_protocol = 0; sk->sk_allocation = gfp; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 590532a7bd2d..12c6175b29cd 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -785,7 +785,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) if (is_udplite) /* UDP-Lite */ csum = udplite_csum(skb); - else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ + else if (sk->sk_no_check_tx) { /* UDP csum disabled */ skb->ip_summed = CHECKSUM_NONE; goto send; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c7ed47bde54b..b8db453133aa 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -797,7 +797,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, /* If zero checksum and sk_no_check is not on for * the socket then skip it. */ - if (uh->check || sk->sk_no_check) + if (uh->check || sk->sk_no_check_rx) stack[count++] = sk; sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr, @@ -887,7 +887,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (sk != NULL) { int ret; - if (!uh->check && !sk->sk_no_check) { + if (!uh->check && !sk->sk_no_check_rx) { sock_put(sk); udp6_csum_zero_error(skb); goto csum_error; @@ -1037,7 +1037,7 @@ static int udp_v6_push_pending_frames(struct sock *sk) if (is_udplite) csum = udplite_csum_outgoing(sk, skb); - else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ + else if (sk->sk_no_check_tx) { /* UDP csum disabled */ skb->ip_summed = CHECKSUM_NONE; goto send; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 41e4e93cb3aa..91729b807c7d 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1353,7 +1353,7 @@ static int ipx_create(struct net *net, struct socket *sock, int protocol, sk_refcnt_debug_inc(sk); sock_init_data(sock, sk); - sk->sk_no_check = 1; /* Checksum off by default */ + sk->sk_no_check_tx = 1; /* Checksum off by default */ sock->ops = &ipx_dgram_ops; rc = 0; out: diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c index c1f03185c5e1..67e7ad3d46b1 100644 --- a/net/ipx/ipx_route.c +++ b/net/ipx/ipx_route.c @@ -236,7 +236,8 @@ int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx, } /* Apply checksum. Not allowed on 802.3 links. */ - if (sk->sk_no_check || intrfc->if_dlink_type == htons(IPX_FRAME_8023)) + if (sk->sk_no_check_tx || + intrfc->if_dlink_type == htons(IPX_FRAME_8023)) ipx->ipx_checksum = htons(0xFFFF); else ipx->ipx_checksum = ipx_cksum(ipx, len + sizeof(struct ipxhdr)); diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index ed0716a075ba..a1186105f537 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1188,7 +1188,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len l2tp_xmit_ipv6_csum(sk, skb, udp_len); else #endif - if (sk->sk_no_check == UDP_CSUM_NOXMIT) + if (sk->sk_no_check_tx) skb->ip_summed = CHECKSUM_NONE; else if ((skb_dst(skb) && skb_dst(skb)->dev) && (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) { @@ -1463,7 +1463,7 @@ static int l2tp_tunnel_sock_create(struct net *net, } if (!cfg->use_udp_checksums) - sock->sk->sk_no_check = UDP_CSUM_NOXMIT; + sock->sk->sk_no_check_tx = 1; break; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index bd7387adea9e..f3d331bdd706 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -297,8 +297,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla case L2TP_ENCAPTYPE_UDP: if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) || nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)) || - nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, - (sk->sk_no_check != UDP_CSUM_NOXMIT))) + nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx)) goto nla_put_failure; /* NOBREAK */ case L2TP_ENCAPTYPE_IP: diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 2af76eaba8f7..429899689408 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6946,7 +6946,8 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newsk->sk_type = sk->sk_type; newsk->sk_bound_dev_if = sk->sk_bound_dev_if; newsk->sk_flags = sk->sk_flags; - newsk->sk_no_check = sk->sk_no_check; + newsk->sk_no_check_tx = sk->sk_no_check_tx; + newsk->sk_no_check_rx = sk->sk_no_check_rx; newsk->sk_reuse = sk->sk_reuse; newsk->sk_shutdown = sk->sk_shutdown; -- cgit v1.2.3-71-gd317 From 1c19448c9ba6545b80ded18488a64a7f3d8e6998 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 23 May 2014 08:47:32 -0700 Subject: net: Make enabling of zero UDP6 csums more restrictive RFC 6935 permits zero checksums to be used in IPv6 however this is recommended only for certain tunnel protocols, it does not make checksums completely optional like they are in IPv4. This patch restricts the use of IPv6 zero checksums that was previously intoduced. no_check6_tx and no_check6_rx have been added to control the use of checksums in UDP6 RX and TX path. The normal sk_no_check_{rx,tx} settings are not used (this avoids ambiguity when dealing with a dual stack socket). A helper function has been added (udp_set_no_check6) which can be called by tunnel impelmentations to all zero checksums (send on the socket, and accept them as valid). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/udp.h | 24 +++++++++++++++++++++++- include/uapi/linux/udp.h | 2 ++ net/ipv4/udp.c | 20 +++++++++++++++++++- net/ipv6/udp.c | 8 ++++---- 4 files changed, 48 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/udp.h b/include/linux/udp.h index 42278bbf7a88..247cfdcc4b08 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -47,7 +47,9 @@ struct udp_sock { #define udp_portaddr_node inet.sk.__sk_common.skc_portaddr_node int pending; /* Any pending frames ? */ unsigned int corkflag; /* Cork is required */ - __u16 encap_type; /* Is this an Encapsulation socket? */ + __u8 encap_type; /* Is this an Encapsulation socket? */ + unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */ + no_check6_rx:1;/* Allow zero UDP6 checksums on RX? */ /* * Following member retains the information to create a UDP header * when the socket is uncorked. @@ -76,6 +78,26 @@ static inline struct udp_sock *udp_sk(const struct sock *sk) return (struct udp_sock *)sk; } +static inline void udp_set_no_check6_tx(struct sock *sk, bool val) +{ + udp_sk(sk)->no_check6_tx = val; +} + +static inline void udp_set_no_check6_rx(struct sock *sk, bool val) +{ + udp_sk(sk)->no_check6_rx = val; +} + +static inline bool udp_get_no_check6_tx(struct sock *sk) +{ + return udp_sk(sk)->no_check6_tx; +} + +static inline bool udp_get_no_check6_rx(struct sock *sk) +{ + return udp_sk(sk)->no_check6_rx; +} + #define udp_portaddr_for_each_entry(__sk, node, list) \ hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node) diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h index e2bcfd75a30d..16574ea18f0c 100644 --- a/include/uapi/linux/udp.h +++ b/include/uapi/linux/udp.h @@ -29,6 +29,8 @@ struct udphdr { /* UDP socket options */ #define UDP_CORK 1 /* Never send partially complete segments */ #define UDP_ENCAP 100 /* Set the socket to accept encapsulated packets */ +#define UDP_NO_CHECK6_TX 101 /* Disable sending checksum for UDP6X */ +#define UDP_NO_CHECK6_RX 102 /* Disable accpeting checksum for UDP6 */ /* UDP encapsulation types */ #define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 12c6175b29cd..e07d52b8617a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1968,7 +1968,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, int (*push_pending_frames)(struct sock *)) { struct udp_sock *up = udp_sk(sk); - int val; + int val, valbool; int err = 0; int is_udplite = IS_UDPLITE(sk); @@ -1978,6 +1978,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, if (get_user(val, (int __user *)optval)) return -EFAULT; + valbool = val ? 1 : 0; + switch (optname) { case UDP_CORK: if (val != 0) { @@ -2007,6 +2009,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, } break; + case UDP_NO_CHECK6_TX: + up->no_check6_tx = valbool; + break; + + case UDP_NO_CHECK6_RX: + up->no_check6_rx = valbool; + break; + /* * UDP-Lite's partial checksum coverage (RFC 3828). */ @@ -2089,6 +2099,14 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, val = up->encap_type; break; + case UDP_NO_CHECK6_TX: + val = up->no_check6_tx; + break; + + case UDP_NO_CHECK6_RX: + val = up->no_check6_rx; + break; + /* The following two cannot be changed on UDP sockets, the return is * always 0 (which corresponds to the full checksum coverage of UDP). */ case UDPLITE_SEND_CSCOV: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b8db453133aa..60325236446a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -794,10 +794,10 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, dif = inet6_iif(skb); sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); while (sk) { - /* If zero checksum and sk_no_check is not on for + /* If zero checksum and no_check is not on for * the socket then skip it. */ - if (uh->check || sk->sk_no_check_rx) + if (uh->check || udp_sk(sk)->no_check6_rx) stack[count++] = sk; sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr, @@ -887,7 +887,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (sk != NULL) { int ret; - if (!uh->check && !sk->sk_no_check_rx) { + if (!uh->check && !udp_sk(sk)->no_check6_rx) { sock_put(sk); udp6_csum_zero_error(skb); goto csum_error; @@ -1037,7 +1037,7 @@ static int udp_v6_push_pending_frames(struct sock *sk) if (is_udplite) csum = udplite_csum_outgoing(sk, skb); - else if (sk->sk_no_check_tx) { /* UDP csum disabled */ + else if (up->no_check6_tx) { /* UDP csum disabled */ skb->ip_summed = CHECKSUM_NONE; goto send; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ -- cgit v1.2.3-71-gd317 From 6b649feafe10b293f4bd5a74aca95faf625ae525 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 23 May 2014 08:47:40 -0700 Subject: l2tp: Add support for zero IPv6 checksums Added new L2TP configuration options to allow TX and RX of zero checksums in IPv6. Default is not to use them. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/uapi/linux/l2tp.h | 2 ++ net/l2tp/l2tp_core.c | 9 ++++++++- net/l2tp/l2tp_core.h | 4 +++- net/l2tp/l2tp_netlink.c | 7 +++++++ 4 files changed, 20 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h index 8adb68160327..21caa2631c20 100644 --- a/include/uapi/linux/l2tp.h +++ b/include/uapi/linux/l2tp.h @@ -124,6 +124,8 @@ enum { L2TP_ATTR_STATS, /* nested */ L2TP_ATTR_IP6_SADDR, /* struct in6_addr */ L2TP_ATTR_IP6_DADDR, /* struct in6_addr */ + L2TP_ATTR_UDP_ZERO_CSUM6_TX, /* u8 */ + L2TP_ATTR_UDP_ZERO_CSUM6_RX, /* u8 */ __L2TP_ATTR_MAX, }; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index a1186105f537..379558014b60 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1102,7 +1102,9 @@ static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb, struct ipv6_pinfo *np = inet6_sk(sk); struct udphdr *uh = udp_hdr(skb); - if (!skb_dst(skb) || !skb_dst(skb)->dev || + if (udp_get_no_check6_tx(sk)) + skb->ip_summed = CHECKSUM_NONE; + else if (!skb_dst(skb) || !skb_dst(skb)->dev || !(skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) { __wsum csum = skb_checksum(skb, 0, udp_len, 0); skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -1435,6 +1437,11 @@ static int l2tp_tunnel_sock_create(struct net *net, sizeof(udp6_addr), 0); if (err < 0) goto out; + + if (cfg->udp6_zero_tx_checksums) + udp_set_no_check6_tx(sock->sk, true); + if (cfg->udp6_zero_rx_checksums) + udp_set_no_check6_rx(sock->sk, true); } else #endif { diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 3f93ccd6ba97..68aa9ffd4ae4 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -162,7 +162,9 @@ struct l2tp_tunnel_cfg { #endif u16 local_udp_port; u16 peer_udp_port; - unsigned int use_udp_checksums:1; + unsigned int use_udp_checksums:1, + udp6_zero_tx_checksums:1, + udp6_zero_rx_checksums:1; }; struct l2tp_tunnel { diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index f3d331bdd706..0ac907adb2f4 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -161,6 +161,13 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info cfg.peer_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_DPORT]); if (info->attrs[L2TP_ATTR_UDP_CSUM]) cfg.use_udp_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_CSUM]); + +#if IS_ENABLED(CONFIG_IPV6) + if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]) + cfg.udp6_zero_tx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]); + if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]) + cfg.udp6_zero_rx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]); +#endif } if (info->attrs[L2TP_ATTR_DEBUG]) -- cgit v1.2.3-71-gd317 From 8556ce79d5986a87fee4c29300b4efee07c0f15e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 23 May 2014 18:43:57 +0200 Subject: net: filter: remove DL macro Lets get rid of this macro. After commit 5bcfedf06f7f ("net: filter: simplify label names from jump-table"), labels have become more readable due to omission of BPF_ prefix but at the same time more generic, so that things like `git grep -n` would not find them. As a middle path, lets get rid of the DL macro as it's not strictly needed and would otherwise just hide the full name. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 3 - net/core/filter.c | 193 +++++++++++++++++++++++++------------------------ 2 files changed, 99 insertions(+), 97 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 7977b3958e25..2b0056afd1f7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -37,9 +37,6 @@ #define BPF_CALL 0x80 /* function call */ #define BPF_EXIT 0x90 /* function return */ -/* Placeholder/dummy for 0 */ -#define BPF_0 0 - /* Register numbers */ enum { BPF_REG_0 = 0, diff --git a/net/core/filter.c b/net/core/filter.c index 7067cb240d3e..b3b0889fe089 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -160,95 +160,100 @@ static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *ins static const void *jumptable[256] = { [0 ... 255] = &&default_label, /* Now overwrite non-defaults ... */ -#define DL(A, B, C) [BPF_##A|BPF_##B|BPF_##C] = &&A##_##B##_##C - DL(ALU, ADD, X), - DL(ALU, ADD, K), - DL(ALU, SUB, X), - DL(ALU, SUB, K), - DL(ALU, AND, X), - DL(ALU, AND, K), - DL(ALU, OR, X), - DL(ALU, OR, K), - DL(ALU, LSH, X), - DL(ALU, LSH, K), - DL(ALU, RSH, X), - DL(ALU, RSH, K), - DL(ALU, XOR, X), - DL(ALU, XOR, K), - DL(ALU, MUL, X), - DL(ALU, MUL, K), - DL(ALU, MOV, X), - DL(ALU, MOV, K), - DL(ALU, DIV, X), - DL(ALU, DIV, K), - DL(ALU, MOD, X), - DL(ALU, MOD, K), - DL(ALU, NEG, 0), - DL(ALU, END, TO_BE), - DL(ALU, END, TO_LE), - DL(ALU64, ADD, X), - DL(ALU64, ADD, K), - DL(ALU64, SUB, X), - DL(ALU64, SUB, K), - DL(ALU64, AND, X), - DL(ALU64, AND, K), - DL(ALU64, OR, X), - DL(ALU64, OR, K), - DL(ALU64, LSH, X), - DL(ALU64, LSH, K), - DL(ALU64, RSH, X), - DL(ALU64, RSH, K), - DL(ALU64, XOR, X), - DL(ALU64, XOR, K), - DL(ALU64, MUL, X), - DL(ALU64, MUL, K), - DL(ALU64, MOV, X), - DL(ALU64, MOV, K), - DL(ALU64, ARSH, X), - DL(ALU64, ARSH, K), - DL(ALU64, DIV, X), - DL(ALU64, DIV, K), - DL(ALU64, MOD, X), - DL(ALU64, MOD, K), - DL(ALU64, NEG, 0), - DL(JMP, CALL, 0), - DL(JMP, JA, 0), - DL(JMP, JEQ, X), - DL(JMP, JEQ, K), - DL(JMP, JNE, X), - DL(JMP, JNE, K), - DL(JMP, JGT, X), - DL(JMP, JGT, K), - DL(JMP, JGE, X), - DL(JMP, JGE, K), - DL(JMP, JSGT, X), - DL(JMP, JSGT, K), - DL(JMP, JSGE, X), - DL(JMP, JSGE, K), - DL(JMP, JSET, X), - DL(JMP, JSET, K), - DL(JMP, EXIT, 0), - DL(STX, MEM, B), - DL(STX, MEM, H), - DL(STX, MEM, W), - DL(STX, MEM, DW), - DL(STX, XADD, W), - DL(STX, XADD, DW), - DL(ST, MEM, B), - DL(ST, MEM, H), - DL(ST, MEM, W), - DL(ST, MEM, DW), - DL(LDX, MEM, B), - DL(LDX, MEM, H), - DL(LDX, MEM, W), - DL(LDX, MEM, DW), - DL(LD, ABS, W), - DL(LD, ABS, H), - DL(LD, ABS, B), - DL(LD, IND, W), - DL(LD, IND, H), - DL(LD, IND, B), -#undef DL + /* 32 bit ALU operations */ + [BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X, + [BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K, + [BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X, + [BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K, + [BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X, + [BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K, + [BPF_ALU | BPF_OR | BPF_X] = &&ALU_OR_X, + [BPF_ALU | BPF_OR | BPF_K] = &&ALU_OR_K, + [BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X, + [BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K, + [BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X, + [BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K, + [BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X, + [BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K, + [BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X, + [BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K, + [BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X, + [BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K, + [BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X, + [BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K, + [BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X, + [BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K, + [BPF_ALU | BPF_NEG] = &&ALU_NEG, + [BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE, + [BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE, + /* 64 bit ALU operations */ + [BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X, + [BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K, + [BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X, + [BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K, + [BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X, + [BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K, + [BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X, + [BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K, + [BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X, + [BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K, + [BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X, + [BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K, + [BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X, + [BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K, + [BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X, + [BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K, + [BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X, + [BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K, + [BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X, + [BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K, + [BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X, + [BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K, + [BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X, + [BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K, + [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG, + /* Call instruction */ + [BPF_JMP | BPF_CALL] = &&JMP_CALL, + /* Jumps */ + [BPF_JMP | BPF_JA] = &&JMP_JA, + [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X, + [BPF_JMP | BPF_JEQ | BPF_K] = &&JMP_JEQ_K, + [BPF_JMP | BPF_JNE | BPF_X] = &&JMP_JNE_X, + [BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K, + [BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X, + [BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K, + [BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X, + [BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K, + [BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X, + [BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K, + [BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X, + [BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K, + [BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X, + [BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K, + /* Program return */ + [BPF_JMP | BPF_EXIT] = &&JMP_EXIT, + /* Store instructions */ + [BPF_STX | BPF_MEM | BPF_B] = &&STX_MEM_B, + [BPF_STX | BPF_MEM | BPF_H] = &&STX_MEM_H, + [BPF_STX | BPF_MEM | BPF_W] = &&STX_MEM_W, + [BPF_STX | BPF_MEM | BPF_DW] = &&STX_MEM_DW, + [BPF_STX | BPF_XADD | BPF_W] = &&STX_XADD_W, + [BPF_STX | BPF_XADD | BPF_DW] = &&STX_XADD_DW, + [BPF_ST | BPF_MEM | BPF_B] = &&ST_MEM_B, + [BPF_ST | BPF_MEM | BPF_H] = &&ST_MEM_H, + [BPF_ST | BPF_MEM | BPF_W] = &&ST_MEM_W, + [BPF_ST | BPF_MEM | BPF_DW] = &&ST_MEM_DW, + /* Load instructions */ + [BPF_LDX | BPF_MEM | BPF_B] = &&LDX_MEM_B, + [BPF_LDX | BPF_MEM | BPF_H] = &&LDX_MEM_H, + [BPF_LDX | BPF_MEM | BPF_W] = &&LDX_MEM_W, + [BPF_LDX | BPF_MEM | BPF_DW] = &&LDX_MEM_DW, + [BPF_LD | BPF_ABS | BPF_W] = &&LD_ABS_W, + [BPF_LD | BPF_ABS | BPF_H] = &&LD_ABS_H, + [BPF_LD | BPF_ABS | BPF_B] = &&LD_ABS_B, + [BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W, + [BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H, + [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B, }; void *ptr; int off; @@ -290,10 +295,10 @@ select_insn: ALU(XOR, ^) ALU(MUL, *) #undef ALU - ALU_NEG_0: + ALU_NEG: A = (u32) -A; CONT; - ALU64_NEG_0: + ALU64_NEG: A = -A; CONT; ALU_MOV_X: @@ -382,7 +387,7 @@ select_insn: CONT; /* CALL */ - JMP_CALL_0: + JMP_CALL: /* Function call scratches BPF_R1-BPF_R5 registers, * preserves BPF_R6-BPF_R9, and stores return value * into BPF_R0. @@ -392,7 +397,7 @@ select_insn: CONT; /* JMP */ - JMP_JA_0: + JMP_JA: insn += insn->off; CONT; JMP_JEQ_X: @@ -479,7 +484,7 @@ select_insn: CONT_JMP; } CONT; - JMP_EXIT_0: + JMP_EXIT: return BPF_R0; /* STX and ST and LDX*/ -- cgit v1.2.3-71-gd317 From b1fcd35cf53553a0a3ef949b05106d921446abc3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 23 May 2014 18:43:58 +0200 Subject: net: filter: let unattached filters use sock_fprog_kern The sk_unattached_filter_create() API is used by BPF filters that are not directly attached or related to sockets, and are used in team, ptp, xt_bpf, cls_bpf, etc. As such all users do their own internal managment of obtaining filter blocks and thus already have them in kernel memory and set up before calling into sk_unattached_filter_create(). As a result, due to __user annotation in sock_fprog, sparse triggers false positives (incorrect type in assignment [different address space]) when filters are set up before passing them to sk_unattached_filter_create(). Therefore, let sk_unattached_filter_create() API use sock_fprog_kern to overcome this issue. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- drivers/isdn/i4l/isdn_ppp.c | 4 ++-- drivers/net/ppp/ppp_generic.c | 4 ++-- drivers/net/team/team_mode_loadbalance.c | 10 +++++----- include/linux/filter.h | 2 +- lib/test_bpf.c | 2 +- net/core/filter.c | 2 +- net/core/ptp_classifier.c | 2 +- net/netfilter/xt_bpf.c | 5 +++-- net/sched/cls_bpf.c | 4 ++-- 9 files changed, 18 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c index a5da511e3c9a..61ac63237446 100644 --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c @@ -634,7 +634,7 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg) #ifdef CONFIG_IPPP_FILTER case PPPIOCSPASS: { - struct sock_fprog fprog; + struct sock_fprog_kern fprog; struct sock_filter *code; int err, len = get_filter(argp, &code); @@ -653,7 +653,7 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg) } case PPPIOCSACTIVE: { - struct sock_fprog fprog; + struct sock_fprog_kern fprog; struct sock_filter *code; int err, len = get_filter(argp, &code); diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index e3923ebb693f..91d6c1272fcf 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -757,7 +757,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) err = get_filter(argp, &code); if (err >= 0) { - struct sock_fprog fprog = { + struct sock_fprog_kern fprog = { .len = err, .filter = code, }; @@ -778,7 +778,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) err = get_filter(argp, &code); if (err >= 0) { - struct sock_fprog fprog = { + struct sock_fprog_kern fprog = { .len = err, .filter = code, }; diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index dbde3412ee5e..0a6ee07bf0af 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -49,7 +49,7 @@ struct lb_port_mapping { struct lb_priv_ex { struct team *team; struct lb_port_mapping tx_hash_to_port_mapping[LB_TX_HASHTABLE_SIZE]; - struct sock_fprog *orig_fprog; + struct sock_fprog_kern *orig_fprog; struct { unsigned int refresh_interval; /* in tenths of second */ struct delayed_work refresh_dw; @@ -241,10 +241,10 @@ static int lb_bpf_func_get(struct team *team, struct team_gsetter_ctx *ctx) return 0; } -static int __fprog_create(struct sock_fprog **pfprog, u32 data_len, +static int __fprog_create(struct sock_fprog_kern **pfprog, u32 data_len, const void *data) { - struct sock_fprog *fprog; + struct sock_fprog_kern *fprog; struct sock_filter *filter = (struct sock_filter *) data; if (data_len % sizeof(struct sock_filter)) @@ -262,7 +262,7 @@ static int __fprog_create(struct sock_fprog **pfprog, u32 data_len, return 0; } -static void __fprog_destroy(struct sock_fprog *fprog) +static void __fprog_destroy(struct sock_fprog_kern *fprog) { kfree(fprog->filter); kfree(fprog); @@ -273,7 +273,7 @@ static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx) struct lb_priv *lb_priv = get_lb_priv(team); struct sk_filter *fp = NULL; struct sk_filter *orig_fp; - struct sock_fprog *fprog = NULL; + struct sock_fprog_kern *fprog = NULL; int err; if (ctx->data.bin_val.len) { diff --git a/include/linux/filter.h b/include/linux/filter.h index 2b0056afd1f7..625f4de9bdf2 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -188,7 +188,7 @@ int sk_convert_filter(struct sock_filter *prog, int len, struct sock_filter_int *new_prog, int *new_len); int sk_unattached_filter_create(struct sk_filter **pfp, - struct sock_fprog *fprog); + struct sock_fprog_kern *fprog); void sk_unattached_filter_destroy(struct sk_filter *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 3d80adbdb559..e03991ea8cc2 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -1472,7 +1472,7 @@ static int run_one(struct sk_filter *fp, struct bpf_test *t) static __init int test_bpf(void) { struct sk_filter *fp, *fp_ext = NULL; - struct sock_fprog fprog; + struct sock_fprog_kern fprog; int err, i, err_cnt = 0; for (i = 0; i < ARRAY_SIZE(tests); i++) { diff --git a/net/core/filter.c b/net/core/filter.c index b3b0889fe089..2c2d35d9d101 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1585,7 +1585,7 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, * a negative errno code is returned. On success the return is zero. */ int sk_unattached_filter_create(struct sk_filter **pfp, - struct sock_fprog *fprog) + struct sock_fprog_kern *fprog) { unsigned int fsize = sk_filter_proglen(fprog); struct sk_filter *fp; diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c index 37d86157b76e..d3027a73fd4b 100644 --- a/net/core/ptp_classifier.c +++ b/net/core/ptp_classifier.c @@ -133,7 +133,7 @@ void __init ptp_classifier_init(void) { 0x16, 0, 0, 0x00000000 }, { 0x06, 0, 0, 0x00000000 }, }; - struct sock_fprog ptp_prog = { + struct sock_fprog_kern ptp_prog = { .len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter, }; diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index 12d4da8e6c77..bbffdbdaf603 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -23,10 +23,11 @@ MODULE_ALIAS("ip6t_bpf"); static int bpf_mt_check(const struct xt_mtchk_param *par) { struct xt_bpf_info *info = par->matchinfo; - struct sock_fprog program; + struct sock_fprog_kern program; program.len = info->bpf_program_num_elem; - program.filter = (struct sock_filter __user *) info->bpf_program; + program.filter = info->bpf_program; + if (sk_unattached_filter_create(&info->filter, &program)) { pr_info("bpf: check failed: parse error\n"); return -EINVAL; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 16186965af97..13f64df2c710 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -160,7 +160,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, { struct sock_filter *bpf_ops, *bpf_old; struct tcf_exts exts; - struct sock_fprog tmp; + struct sock_fprog_kern tmp; struct sk_filter *fp, *fp_old; u16 bpf_size, bpf_len; u32 classid; @@ -191,7 +191,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size); tmp.len = bpf_len; - tmp.filter = (struct sock_filter __user *) bpf_ops; + tmp.filter = bpf_ops; ret = sk_unattached_filter_create(&fp, &tmp); if (ret) -- cgit v1.2.3-71-gd317 From 34ea4d4417bb726245fdaeb2f8951eaa0c18fc4c Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 9 Mar 2014 21:42:52 -0300 Subject: [media] v4l: vb2: Add a function to discard all DONE buffers When suspending a device while a video stream is active all buffers marked as done but not dequeued yet will be kept across suspend and given back to userspace after resume. This will result in outdated buffers being dequeued. Introduce a new vb2 function to mark all done buffers as erroneous instead, to be used by drivers at resume time. Signed-off-by: Laurent Pinchart Acked-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- drivers/media/v4l2-core/videobuf2-core.c | 24 ++++++++++++++++++++++++ include/media/videobuf2-core.h | 1 + 2 files changed, 25 insertions(+) (limited to 'include') diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c index 349e659d75fb..7c4489c42365 100644 --- a/drivers/media/v4l2-core/videobuf2-core.c +++ b/drivers/media/v4l2-core/videobuf2-core.c @@ -1199,6 +1199,30 @@ void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state) } EXPORT_SYMBOL_GPL(vb2_buffer_done); +/** + * vb2_discard_done() - discard all buffers marked as DONE + * @q: videobuf2 queue + * + * This function is intended to be used with suspend/resume operations. It + * discards all 'done' buffers as they would be too old to be requested after + * resume. + * + * Drivers must stop the hardware and synchronize with interrupt handlers and/or + * delayed works before calling this function to make sure no buffer will be + * touched by the driver and/or hardware. + */ +void vb2_discard_done(struct vb2_queue *q) +{ + struct vb2_buffer *vb; + unsigned long flags; + + spin_lock_irqsave(&q->done_lock, flags); + list_for_each_entry(vb, &q->done_list, done_entry) + vb->state = VB2_BUF_STATE_ERROR; + spin_unlock_irqrestore(&q->done_lock, flags); +} +EXPORT_SYMBOL_GPL(vb2_discard_done); + /** * __fill_vb2_buffer() - fill a vb2_buffer with information provided in a * v4l2_buffer by the userspace. The caller has already verified that struct diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h index bca25dc53f9d..8fab6fa0dbfb 100644 --- a/include/media/videobuf2-core.h +++ b/include/media/videobuf2-core.h @@ -432,6 +432,7 @@ void *vb2_plane_vaddr(struct vb2_buffer *vb, unsigned int plane_no); void *vb2_plane_cookie(struct vb2_buffer *vb, unsigned int plane_no); void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state); +void vb2_discard_done(struct vb2_queue *q); int vb2_wait_for_all_buffers(struct vb2_queue *q); int vb2_querybuf(struct vb2_queue *q, struct v4l2_buffer *b); -- cgit v1.2.3-71-gd317 From 59201052dff17d0cc8bc60bc9e89ad4924906ee8 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 23 May 2014 12:58:06 +0900 Subject: drm/ttm: fix kerneldoc of ttm_bo_create The kerneldoc header of ttm_bo_create() was referring to another (nonexisting) function and had a few obsolete or incorrect arguments. Signed-off-by: Alexandre Courbot Reviewed-by: David Herrmann Signed-off-by: Dave Airlie --- include/drm/ttm/ttm_bo_api.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index ee127ec33c60..7526c5bf5610 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -485,13 +485,12 @@ extern int ttm_bo_init(struct ttm_bo_device *bdev, void (*destroy) (struct ttm_buffer_object *)); /** - * ttm_bo_synccpu_object_init + * ttm_bo_create * * @bdev: Pointer to a ttm_bo_device struct. - * @bo: Pointer to a ttm_buffer_object to be initialized. * @size: Requested size of buffer object. * @type: Requested type of buffer object. - * @flags: Initial placement flags. + * @placement: Initial placement. * @page_alignment: Data alignment in pages. * @interruptible: If needing to sleep while waiting for GPU resources, * sleep interruptible. -- cgit v1.2.3-71-gd317 From 1a5f0c13d1a8808c2bdd00630818ed491e1719f5 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Fri, 23 May 2014 14:33:12 +0300 Subject: mac80211: add a single-transaction driver op to switch contexts In some cases, when the driver is already using all the channel contexts it can handle at once, we have to do an in-place switch (ie. we cannot afford using an extra context temporarily for the transaction). But some drivers may not support switching the channel context assigned to a vif on the fly (ie. without unassigning and assigning it) while others may only work if the context is changed on the fly, without unassigning it first. To allow these different scenarios, add a new driver operation that let's the driver decide how to handle an in-place switch. Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- include/net/mac80211.h | 46 +++++++++++++++++++++++++ net/mac80211/driver-ops.h | 53 +++++++++++++++++++++++++++++ net/mac80211/trace.h | 85 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 184 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2c78997bc48d..421b6ecb4b2c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -188,6 +188,43 @@ struct ieee80211_chanctx_conf { u8 drv_priv[0] __aligned(sizeof(void *)); }; +/** + * enum ieee80211_chanctx_switch_mode - channel context switch mode + * @CHANCTX_SWMODE_REASSIGN_VIF: Both old and new contexts already + * exist (and will continue to exist), but the virtual interface + * needs to be switched from one to the other. + * @CHANCTX_SWMODE_SWAP_CONTEXTS: The old context exists but will stop + * to exist with this call, the new context doesn't exist but + * will be active after this call, the virtual interface switches + * from the old to the new (note that the driver may of course + * implement this as an on-the-fly chandef switch of the existing + * hardware context, but the mac80211 pointer for the old context + * will cease to exist and only the new one will later be used + * for changes/removal.) + */ +enum ieee80211_chanctx_switch_mode { + CHANCTX_SWMODE_REASSIGN_VIF, + CHANCTX_SWMODE_SWAP_CONTEXTS, +}; + +/** + * struct ieee80211_vif_chanctx_switch - vif chanctx switch information + * + * This is structure is used to pass information about a vif that + * needs to switch from one chanctx to another. The + * &ieee80211_chanctx_switch_mode defines how the switch should be + * done. + * + * @vif: the vif that should be switched from old_ctx to new_ctx + * @old_ctx: the old context to which the vif was assigned + * @new_ctx: the new context to which the vif must be assigned + */ +struct ieee80211_vif_chanctx_switch { + struct ieee80211_vif *vif; + struct ieee80211_chanctx_conf *old_ctx; + struct ieee80211_chanctx_conf *new_ctx; +}; + /** * enum ieee80211_bss_change - BSS change notification flags * @@ -2736,6 +2773,11 @@ enum ieee80211_roc_type { * to vif. Possible use is for hw queue remapping. * @unassign_vif_chanctx: Notifies device driver about channel context being * unbound from vif. + * @switch_vif_chanctx: switch a number of vifs from one chanctx to + * another, as specified in the list of + * @ieee80211_vif_chanctx_switch passed to the driver, according + * to the mode defined in &ieee80211_chanctx_switch_mode. + * * @start_ap: Start operation on the AP interface, this is called after all the * information in bss_conf is set and beacon can be retrieved. A channel * context is bound before this is called. Note that if the driver uses @@ -2952,6 +2994,10 @@ struct ieee80211_ops { void (*unassign_vif_chanctx)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_chanctx_conf *ctx); + int (*switch_vif_chanctx)(struct ieee80211_hw *hw, + struct ieee80211_vif_chanctx_switch *vifs, + int n_vifs, + enum ieee80211_chanctx_switch_mode mode); void (*restart_complete)(struct ieee80211_hw *hw); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 696ef78b1fb7..bd782dcffcc7 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1048,6 +1048,59 @@ static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local, trace_drv_return_void(local); } +static inline int +drv_switch_vif_chanctx(struct ieee80211_local *local, + struct ieee80211_vif_chanctx_switch *vifs, + int n_vifs, + enum ieee80211_chanctx_switch_mode mode) +{ + int ret = 0; + int i; + + if (!local->ops->switch_vif_chanctx) + return -EOPNOTSUPP; + + for (i = 0; i < n_vifs; i++) { + struct ieee80211_chanctx *new_ctx = + container_of(vifs[i].new_ctx, + struct ieee80211_chanctx, + conf); + struct ieee80211_chanctx *old_ctx = + container_of(vifs[i].old_ctx, + struct ieee80211_chanctx, + conf); + + WARN_ON_ONCE(!old_ctx->driver_present); + WARN_ON_ONCE((mode == CHANCTX_SWMODE_SWAP_CONTEXTS && + new_ctx->driver_present) || + (mode == CHANCTX_SWMODE_REASSIGN_VIF && + !new_ctx->driver_present)); + } + + trace_drv_switch_vif_chanctx(local, vifs, n_vifs, mode); + ret = local->ops->switch_vif_chanctx(&local->hw, + vifs, n_vifs, mode); + trace_drv_return_int(local, ret); + + if (!ret && mode == CHANCTX_SWMODE_SWAP_CONTEXTS) { + for (i = 0; i < n_vifs; i++) { + struct ieee80211_chanctx *new_ctx = + container_of(vifs[i].new_ctx, + struct ieee80211_chanctx, + conf); + struct ieee80211_chanctx *old_ctx = + container_of(vifs[i].old_ctx, + struct ieee80211_chanctx, + conf); + + new_ctx->driver_present = true; + old_ctx->driver_present = false; + } + } + + return ret; +} + static inline int drv_start_ap(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 942f64b8ce0e..b22d6969cde9 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1389,6 +1389,91 @@ TRACE_EVENT(drv_change_chanctx, ) ); +#if !defined(__TRACE_VIF_ENTRY) +#define __TRACE_VIF_ENTRY +struct trace_vif_entry { + enum nl80211_iftype vif_type; + bool p2p; + char vif_name[IFNAMSIZ]; +} __packed; + +struct trace_chandef_entry { + u32 control_freq; + u32 chan_width; + u32 center_freq1; + u32 center_freq2; +} __packed; + +struct trace_switch_entry { + struct trace_vif_entry vif; + struct trace_chandef_entry old_chandef; + struct trace_chandef_entry new_chandef; +} __packed; + +#define SWITCH_ENTRY_ASSIGN(to, from) local_vifs[i].to = vifs[i].from +#endif + +TRACE_EVENT(drv_switch_vif_chanctx, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_vif_chanctx_switch *vifs, + int n_vifs, enum ieee80211_chanctx_switch_mode mode), + TP_ARGS(local, vifs, n_vifs, mode), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(int, n_vifs) + __field(u32, mode) + __dynamic_array(u8, vifs, + sizeof(struct trace_switch_entry) * n_vifs) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->n_vifs = n_vifs; + __entry->mode = mode; + { + struct trace_switch_entry *local_vifs = + __get_dynamic_array(vifs); + int i; + + for (i = 0; i < n_vifs; i++) { + struct ieee80211_sub_if_data *sdata; + + sdata = container_of(vifs[i].vif, + struct ieee80211_sub_if_data, + vif); + + SWITCH_ENTRY_ASSIGN(vif.vif_type, vif->type); + SWITCH_ENTRY_ASSIGN(vif.p2p, vif->p2p); + strncpy(local_vifs[i].vif.vif_name, + sdata->name, + sizeof(local_vifs[i].vif.vif_name)); + SWITCH_ENTRY_ASSIGN(old_chandef.control_freq, + old_ctx->def.chan->center_freq); + SWITCH_ENTRY_ASSIGN(old_chandef.chan_width, + old_ctx->def.width); + SWITCH_ENTRY_ASSIGN(old_chandef.center_freq1, + old_ctx->def.center_freq1); + SWITCH_ENTRY_ASSIGN(old_chandef.center_freq2, + old_ctx->def.center_freq2); + SWITCH_ENTRY_ASSIGN(new_chandef.control_freq, + new_ctx->def.chan->center_freq); + SWITCH_ENTRY_ASSIGN(new_chandef.chan_width, + new_ctx->def.width); + SWITCH_ENTRY_ASSIGN(new_chandef.center_freq1, + new_ctx->def.center_freq1); + SWITCH_ENTRY_ASSIGN(new_chandef.center_freq2, + new_ctx->def.center_freq2); + } + } + ), + + TP_printk( + LOCAL_PR_FMT " n_vifs:%d mode:%d", + LOCAL_PR_ARG, __entry->n_vifs, __entry->mode + ) +); + DECLARE_EVENT_CLASS(local_sdata_chanctx, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, -- cgit v1.2.3-71-gd317 From 61dc1abae64854c7cef543598b9e6f04886c4ebd Mon Sep 17 00:00:00 2001 From: Nick Dyer Date: Sun, 18 May 2014 23:16:49 -0700 Subject: Input: atmel_mxt_ts - read screen config from chip By reading the touchscreen configuration from the settings that the maXTouch chip is actually using, we can remove some platform data. The matrix size is not used for anything, and results in some rather confusing code to re-read it because it may change when configuration is downloaded, so don't print it out. Signed-off-by: Nick Dyer Acked-by: Benson Leung Acked-by: Yufeng Shen Signed-off-by: Dmitry Torokhov --- arch/arm/mach-s5pv210/mach-goni.c | 3 - drivers/input/touchscreen/atmel_mxt_ts.c | 136 ++++++++++++++---------------- drivers/platform/chrome/chromeos_laptop.c | 6 -- include/linux/i2c/atmel_mxt_ts.h | 15 ---- 4 files changed, 65 insertions(+), 95 deletions(-) (limited to 'include') diff --git a/arch/arm/mach-s5pv210/mach-goni.c b/arch/arm/mach-s5pv210/mach-goni.c index e549ecf0e5dc..bb9354f45e27 100644 --- a/arch/arm/mach-s5pv210/mach-goni.c +++ b/arch/arm/mach-s5pv210/mach-goni.c @@ -239,9 +239,6 @@ static void __init goni_radio_init(void) /* TSP */ static struct mxt_platform_data qt602240_platform_data = { - .x_size = 800, - .y_size = 480, - .orient = MXT_DIAGONAL, .irqflags = IRQF_TRIGGER_FALLING, }; diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index 7a9197a19f67..75493ca8e784 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -100,33 +100,16 @@ /* MXT_TOUCH_MULTI_T9 field */ #define MXT_TOUCH_CTRL 0 -#define MXT_TOUCH_XORIGIN 1 -#define MXT_TOUCH_YORIGIN 2 -#define MXT_TOUCH_XSIZE 3 -#define MXT_TOUCH_YSIZE 4 -#define MXT_TOUCH_BLEN 6 -#define MXT_TOUCH_TCHTHR 7 -#define MXT_TOUCH_TCHDI 8 -#define MXT_TOUCH_ORIENT 9 -#define MXT_TOUCH_MOVHYSTI 11 -#define MXT_TOUCH_MOVHYSTN 12 -#define MXT_TOUCH_NUMTOUCH 14 -#define MXT_TOUCH_MRGHYST 15 -#define MXT_TOUCH_MRGTHR 16 -#define MXT_TOUCH_AMPHYST 17 -#define MXT_TOUCH_XRANGE_LSB 18 -#define MXT_TOUCH_XRANGE_MSB 19 -#define MXT_TOUCH_YRANGE_LSB 20 -#define MXT_TOUCH_YRANGE_MSB 21 -#define MXT_TOUCH_XLOCLIP 22 -#define MXT_TOUCH_XHICLIP 23 -#define MXT_TOUCH_YLOCLIP 24 -#define MXT_TOUCH_YHICLIP 25 -#define MXT_TOUCH_XEDGECTRL 26 -#define MXT_TOUCH_XEDGEDIST 27 -#define MXT_TOUCH_YEDGECTRL 28 -#define MXT_TOUCH_YEDGEDIST 29 -#define MXT_TOUCH_JUMPLIMIT 30 +#define MXT_T9_ORIENT 9 +#define MXT_T9_RANGE 18 + +struct t9_range { + u16 x; + u16 y; +} __packed; + +/* Touch orient bits */ +#define MXT_XY_SWITCH (1 << 0) /* MXT_PROCI_GRIPFACE_T20 field */ #define MXT_GRIPFACE_CTRL 0 @@ -211,11 +194,6 @@ #define MXT_PRESS (1 << 6) #define MXT_DETECT (1 << 7) -/* Touch orient bits */ -#define MXT_XY_SWITCH (1 << 0) -#define MXT_X_INVERT (1 << 1) -#define MXT_Y_INVERT (1 << 2) - /* Touchscreen absolute values */ #define MXT_MAX_AREA 0xff @@ -580,11 +558,6 @@ static int __mxt_read_reg(struct i2c_client *client, return ret; } -static int mxt_read_reg(struct i2c_client *client, u16 reg, u8 *val) -{ - return __mxt_read_reg(client, reg, 1, val); -} - static int __mxt_write_reg(struct i2c_client *client, u16 reg, u16 len, const void *val) { @@ -1029,12 +1002,59 @@ static void mxt_free_object_table(struct mxt_data *data) data->T19_reportid = 0; } +static int mxt_read_t9_resolution(struct mxt_data *data) +{ + struct i2c_client *client = data->client; + int error; + struct t9_range range; + unsigned char orient; + struct mxt_object *object; + + object = mxt_get_object(data, MXT_TOUCH_MULTI_T9); + if (!object) + return -EINVAL; + + error = __mxt_read_reg(client, + object->start_address + MXT_T9_RANGE, + sizeof(range), &range); + if (error) + return error; + + le16_to_cpus(&range.x); + le16_to_cpus(&range.y); + + error = __mxt_read_reg(client, + object->start_address + MXT_T9_ORIENT, + 1, &orient); + if (error) + return error; + + /* Handle default values */ + if (range.x == 0) + range.x = 1023; + + if (range.y == 0) + range.y = 1023; + + if (orient & MXT_XY_SWITCH) { + data->max_x = range.y; + data->max_y = range.x; + } else { + data->max_x = range.x; + data->max_y = range.y; + } + + dev_dbg(&client->dev, + "Touchscreen size X%uY%u\n", data->max_x, data->max_y); + + return 0; +} + static int mxt_initialize(struct mxt_data *data) { struct i2c_client *client = data->client; struct mxt_info *info = &data->info; int error; - u8 val; error = mxt_get_info(data); if (error) @@ -1063,26 +1083,16 @@ static int mxt_initialize(struct mxt_data *data) goto err_free_object_table; } - /* Update matrix size at info struct */ - error = mxt_read_reg(client, MXT_MATRIX_X_SIZE, &val); - if (error) - goto err_free_object_table; - info->matrix_xsize = val; - - error = mxt_read_reg(client, MXT_MATRIX_Y_SIZE, &val); - if (error) + error = mxt_read_t9_resolution(data); + if (error) { + dev_err(&client->dev, "Failed to initialize T9 resolution\n"); goto err_free_object_table; - info->matrix_ysize = val; - - dev_info(&client->dev, - "Family: %u Variant: %u Firmware V%u.%u.%02X\n", - info->family_id, info->variant_id, info->version >> 4, - info->version & 0xf, info->build); + } dev_info(&client->dev, - "Matrix X Size: %u Matrix Y Size: %u Objects: %u\n", - info->matrix_xsize, info->matrix_ysize, - info->object_num); + "Family: %u Variant: %u Firmware V%u.%u.%02X Objects: %u\n", + info->family_id, info->variant_id, info->version >> 4, + info->version & 0xf, info->build, info->object_num); return 0; @@ -1091,20 +1101,6 @@ err_free_object_table: return error; } -static void mxt_calc_resolution(struct mxt_data *data) -{ - unsigned int max_x = data->pdata->x_size - 1; - unsigned int max_y = data->pdata->y_size - 1; - - if (data->pdata->orient & MXT_XY_SWITCH) { - data->max_x = max_y; - data->max_y = max_x; - } else { - data->max_x = max_x; - data->max_y = max_y; - } -} - /* Firmware Version is returned as Major.Minor.Build */ static ssize_t mxt_fw_version_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -1430,8 +1426,6 @@ static int mxt_probe(struct i2c_client *client, init_completion(&data->reset_completion); init_completion(&data->crc_completion); - mxt_calc_resolution(data); - error = mxt_initialize(data); if (error) goto err_free_mem; diff --git a/drivers/platform/chrome/chromeos_laptop.c b/drivers/platform/chrome/chromeos_laptop.c index 8b7523ab62e5..7f1a2e2711bd 100644 --- a/drivers/platform/chrome/chromeos_laptop.c +++ b/drivers/platform/chrome/chromeos_laptop.c @@ -94,9 +94,6 @@ static int mxt_t19_keys[] = { }; static struct mxt_platform_data atmel_224s_tp_platform_data = { - .x_size = 102*20, - .y_size = 68*20, - .orient = MXT_VERTICAL_FLIP, .irqflags = IRQF_TRIGGER_FALLING, .t19_num_keys = ARRAY_SIZE(mxt_t19_keys), .t19_keymap = mxt_t19_keys, @@ -111,9 +108,6 @@ static struct i2c_board_info atmel_224s_tp_device = { }; static struct mxt_platform_data atmel_1664s_platform_data = { - .x_size = 1700, - .y_size = 2560, - .orient = MXT_ROTATED_90_COUNTER, .irqflags = IRQF_TRIGGER_FALLING, .config = NULL, .config_length = 0, diff --git a/include/linux/i2c/atmel_mxt_ts.h b/include/linux/i2c/atmel_mxt_ts.h index 9f92135b6620..3891dc1de21c 100644 --- a/include/linux/i2c/atmel_mxt_ts.h +++ b/include/linux/i2c/atmel_mxt_ts.h @@ -15,26 +15,11 @@ #include -/* Orient */ -#define MXT_NORMAL 0x0 -#define MXT_DIAGONAL 0x1 -#define MXT_HORIZONTAL_FLIP 0x2 -#define MXT_ROTATED_90_COUNTER 0x3 -#define MXT_VERTICAL_FLIP 0x4 -#define MXT_ROTATED_90 0x5 -#define MXT_ROTATED_180 0x6 -#define MXT_DIAGONAL_COUNTER 0x7 - /* The platform data for the Atmel maXTouch touchscreen driver */ struct mxt_platform_data { const u8 *config; size_t config_length; u32 config_crc; - - unsigned int x_size; - unsigned int y_size; - unsigned char orient; - unsigned long irqflags; u8 t19_num_keys; const unsigned int *t19_keymap; -- cgit v1.2.3-71-gd317 From b8380580836bf81d032da6099879da9f6b515325 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 23 Apr 2014 15:49:04 +0200 Subject: drm/dp: Add missing kernel-doc Commit 9dc4056026e0 (drm/dp: let drivers specify the name of the I2C- over-AUX adapter) introduced a new field but didn't add the proper kernel-doc for it. Signed-off-by: Thierry Reding Reviewed-by: Jani Nikula Signed-off-by: Dave Airlie --- include/drm/drm_dp_helper.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index c8857e6159a5..efcde2c38cc1 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -509,6 +509,7 @@ struct drm_dp_aux_msg { /** * struct drm_dp_aux - DisplayPort AUX channel + * @name: user-visible name of this AUX channel and the I2C-over-AUX adapter * @ddc: I2C adapter that can be used for I2C-over-AUX communication * @dev: pointer to struct device that is the parent for this AUX channel * @transfer: transfers a message representing a single AUX transaction -- cgit v1.2.3-71-gd317 From b475598aec63f2efbc78f0ff1895d917d2370846 Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Thu, 22 May 2014 14:50:10 +0300 Subject: mlx5_core: Store MR attributes in mlx5_mr_core during creation and after UMR The patch stores iova, pd and size during mr creation and after UMRs that modify them. It removes the unused access flags field. Signed-off-by: Haggai Eran Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/mr.c | 4 ++++ drivers/net/ethernet/mellanox/mlx5/core/mr.c | 4 ++++ include/linux/mlx5/driver.h | 1 - 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 9d932a2aa9f4..f472ab246d94 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -794,6 +794,10 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, err = -EFAULT; } + mr->mmr.iova = virt_addr; + mr->mmr.size = len; + mr->mmr.pd = to_mpd(pd)->pdn; + unmap_dma: up(&umrc->sem); dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 4cc927649404..ac52a0fe2d3a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -82,7 +82,11 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, return mlx5_cmd_status_to_err(&lout.hdr); } + mr->iova = be64_to_cpu(in->seg.start_addr); + mr->size = be64_to_cpu(in->seg.len); mr->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key; + mr->pd = be32_to_cpu(in->seg.flags_pd) & 0xffffff; + mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", be32_to_cpu(lout.mkey), key, mr->key); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 93cef6313e72..2bce4aad2570 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -427,7 +427,6 @@ struct mlx5_core_mr { u64 size; u32 key; u32 pd; - u32 access; }; struct mlx5_core_srq { -- cgit v1.2.3-71-gd317 From 29fe5732bef2ba3c785fcb4aa5ba7160ad8faba5 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Thu, 6 Mar 2014 09:44:18 -0800 Subject: mtd: pfow: remove unused variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes warning: In file included from drivers/mtd/lpddr/qinfo_probe.c:31:0: include/linux/mtd/pfow.h: In function ‘send_pfow_command’: include/linux/mtd/pfow.h:104:6: warning: variable ‘chipnum’ set but not used [-Wunused-but-set-variable] int chipnum; ^ Signed-off-by: Brian Norris --- include/linux/mtd/pfow.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/pfow.h b/include/linux/mtd/pfow.h index b730d4f84655..42ff7ff09bf5 100644 --- a/include/linux/mtd/pfow.h +++ b/include/linux/mtd/pfow.h @@ -101,9 +101,6 @@ static inline void send_pfow_command(struct map_info *map, unsigned long len, map_word *datum) { int bits_per_chip = map_bankwidth(map) * 8; - int chipnum; - struct lpddr_private *lpddr = map->fldrv_priv; - chipnum = adr >> lpddr->chipshift; map_write(map, CMD(cmd_code), map->pfow_base + PFOW_COMMAND_CODE); map_write(map, CMD(adr & ((1< Date: Tue, 20 May 2014 11:17:54 -0700 Subject: mfd: twl4030-power: Add generic reset configuration The twl4030 PMIC needs to be configured properly for things like warm reset and deeper idle states so the PMIC manages the regulators properly based on the hardware triggers from the SoC. Earlier we have configured twl4030 using platform data, but we want to do it for device tree based booting also. In some cases configuring twl4030 is needed for things to work. For example, when rebooting an OMAP3530 at 125 MHz, it hangs. With this patch, TWL4030 will be reset when a warm reset occures, and OMAP3530 does not hang on reboot. Let's add device tree support and configure things for warm reset as the default when compatible = "ti,twl4030-power". More complicated configurations can be added to the driver based on other compatible flags. Note we now also make the pdata const like it should be. This allows use it for match->data with the device tree related functions. Based on earlier patch by Matthias Brugger and Lesly A M . Signed-off-by: Tony Lindgren Signed-off-by: Lee Jones --- .../devicetree/bindings/mfd/twl4030-power.txt | 7 +- drivers/mfd/twl4030-power.c | 109 ++++++++++++++++++--- include/linux/i2c/twl.h | 3 + 3 files changed, 105 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/Documentation/devicetree/bindings/mfd/twl4030-power.txt b/Documentation/devicetree/bindings/mfd/twl4030-power.txt index 8e15ec35ac99..b90611650cd9 100644 --- a/Documentation/devicetree/bindings/mfd/twl4030-power.txt +++ b/Documentation/devicetree/bindings/mfd/twl4030-power.txt @@ -5,7 +5,12 @@ to control the power resources, including power scripts. For now, the binding only supports the complete shutdown of the system after poweroff. Required properties: -- compatible : must be "ti,twl4030-power" +- compatible : must be one of the following + "ti,twl4030-power" + "ti,twl4030-power-reset" + +The use of ti,twl4030-power-reset is recommended at least on +3530 that needs a special configuration for warm reset to work. Optional properties: - ti,use_poweroff: With this flag, the chip will initiates an ACTIVE-to-OFF or diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c index 0b037dca46a8..cb5b0cb8f933 100644 --- a/drivers/mfd/twl4030-power.c +++ b/drivers/mfd/twl4030-power.c @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -128,6 +129,40 @@ static u8 res_config_addrs[] = { [RES_MAIN_REF] = 0x94, }; +/* + * Usable values for .remap_sleep and .remap_off + * Based on table "5.3.3 Resource Operating modes" + */ +enum { + TWL_REMAP_OFF = 0, + TWL_REMAP_SLEEP = 8, + TWL_REMAP_ACTIVE = 9, +}; + +/* + * Macros to configure the PM register states for various resources. + * Note that we can make MSG_SINGULAR etc private to this driver once + * omap3 has been made DT only. + */ +#define TWL_DFLT_DELAY 2 /* typically 2 32 KiHz cycles */ +#define TWL_RESOURCE_SET(res, state) \ + { MSG_SINGULAR(DEV_GRP_NULL, (res), (state)), TWL_DFLT_DELAY } +#define TWL_RESOURCE_ON(res) TWL_RESOURCE_SET(res, RES_STATE_ACTIVE) +#define TWL_RESOURCE_OFF(res) TWL_RESOURCE_SET(res, RES_STATE_OFF) +#define TWL_RESOURCE_RESET(res) TWL_RESOURCE_SET(res, RES_STATE_WRST) +/* + * It seems that type1 and type2 is just the resource init order + * number for the type1 and type2 group. + */ +#define TWL_RESOURCE_GROUP_RESET(group, type1, type2) \ + { MSG_BROADCAST(DEV_GRP_NULL, (group), (type1), (type2), \ + RES_STATE_WRST), TWL_DFLT_DELAY } +#define TWL_REMAP_SLEEP(res, devgrp, typ, typ2) \ + { .resource = (res), .devgroup = (devgrp), \ + .type = (typ), .type2 = (typ2), \ + .remap_off = TWL_REMAP_OFF, \ + .remap_sleep = TWL_REMAP_SLEEP, } + static int twl4030_write_script_byte(u8 address, u8 byte) { int err; @@ -502,7 +537,8 @@ int twl4030_remove_script(u8 flags) return err; } -static int twl4030_power_configure_scripts(struct twl4030_power_data *pdata) +static int +twl4030_power_configure_scripts(const struct twl4030_power_data *pdata) { int err; int i; @@ -518,7 +554,8 @@ static int twl4030_power_configure_scripts(struct twl4030_power_data *pdata) return 0; } -static int twl4030_power_configure_resources(struct twl4030_power_data *pdata) +static int +twl4030_power_configure_resources(const struct twl4030_power_data *pdata) { struct twl4030_resconfig *resconfig = pdata->resource_config; int err; @@ -550,7 +587,7 @@ void twl4030_power_off(void) pr_err("TWL4030 Unable to power off\n"); } -static bool twl4030_power_use_poweroff(struct twl4030_power_data *pdata, +static bool twl4030_power_use_poweroff(const struct twl4030_power_data *pdata, struct device_node *node) { if (pdata && pdata->use_poweroff) @@ -562,10 +599,60 @@ static bool twl4030_power_use_poweroff(struct twl4030_power_data *pdata, return false; } +#ifdef CONFIG_OF + +/* Generic warm reset configuration for omap3 */ + +static struct twl4030_ins omap3_wrst_seq[] = { + TWL_RESOURCE_OFF(RES_NRES_PWRON), + TWL_RESOURCE_OFF(RES_RESET), + TWL_RESOURCE_RESET(RES_MAIN_REF), + TWL_RESOURCE_GROUP_RESET(RES_GRP_ALL, RES_TYPE_R0, RES_TYPE2_R2), + TWL_RESOURCE_RESET(RES_VUSB_3V1), + TWL_RESOURCE_GROUP_RESET(RES_GRP_ALL, RES_TYPE_R0, RES_TYPE2_R1), + TWL_RESOURCE_GROUP_RESET(RES_GRP_RC, RES_TYPE_ALL, RES_TYPE2_R0), + TWL_RESOURCE_ON(RES_RESET), + TWL_RESOURCE_ON(RES_NRES_PWRON), +}; + +static struct twl4030_script omap3_wrst_script = { + .script = omap3_wrst_seq, + .size = ARRAY_SIZE(omap3_wrst_seq), + .flags = TWL4030_WRST_SCRIPT, +}; + +static struct twl4030_script *omap3_reset_scripts[] = { + &omap3_wrst_script, +}; + +static struct twl4030_resconfig omap3_rconfig[] = { + TWL_REMAP_SLEEP(RES_HFCLKOUT, DEV_GRP_P3, -1, -1), + TWL_REMAP_SLEEP(RES_VDD1, DEV_GRP_P1, -1, -1), + TWL_REMAP_SLEEP(RES_VDD2, DEV_GRP_P1, -1, -1), + { 0, 0 }, +}; + +static struct twl4030_power_data omap3_reset = { + .scripts = omap3_reset_scripts, + .num = ARRAY_SIZE(omap3_reset_scripts), + .resource_config = omap3_rconfig, +}; + +static struct of_device_id twl4030_power_of_match[] = { + { + .compatible = "ti,twl4030-power-reset", + .data = &omap3_reset, + }, + { }, +}; +MODULE_DEVICE_TABLE(of, twl4030_power_of_match); +#endif /* CONFIG_OF */ + static int twl4030_power_probe(struct platform_device *pdev) { - struct twl4030_power_data *pdata = dev_get_platdata(&pdev->dev); + const struct twl4030_power_data *pdata = dev_get_platdata(&pdev->dev); struct device_node *node = pdev->dev.of_node; + const struct of_device_id *match; int err = 0; int err2 = 0; u8 val; @@ -586,8 +673,12 @@ static int twl4030_power_probe(struct platform_device *pdev) return err; } + match = of_match_device(of_match_ptr(twl4030_power_of_match), + &pdev->dev); + if (match && match->data) + pdata = match->data; + if (pdata) { - /* TODO: convert to device tree */ err = twl4030_power_configure_scripts(pdata); if (err) { pr_err("TWL4030 failed to load scripts\n"); @@ -637,14 +728,6 @@ static int twl4030_power_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_OF -static const struct of_device_id twl4030_power_of_match[] = { - {.compatible = "ti,twl4030-power", }, - { }, -}; -MODULE_DEVICE_TABLE(of, twl4030_power_of_match); -#endif - static struct platform_driver twl4030_power_driver = { .driver = { .name = "twl4030_power", diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h index ade1c06d4ceb..5fe031375ed4 100644 --- a/include/linux/i2c/twl.h +++ b/include/linux/i2c/twl.h @@ -486,7 +486,10 @@ static inline int twl6030_mmc_card_detect(struct device *dev, int slot) #define RES_GRP_ALL 0x7 /* All resource groups */ #define RES_TYPE2_R0 0x0 +#define RES_TYPE2_R1 0x1 +#define RES_TYPE2_R2 0x2 +#define RES_TYPE_R0 0x0 #define RES_TYPE_ALL 0x7 /* Resource states */ -- cgit v1.2.3-71-gd317 From 482e7db160df713a2d1d4c7ee9fffad92008283f Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 20 May 2014 11:17:54 -0700 Subject: mfd: twl4030-power: Add support for board specific configuration With the recommended twl4030 configuration added, we can now add board specific changes as modifications to the recommended configuration. Note that the data is private to this driver, and the data must always have a NULL resource in the sentinel. Signed-off-by: Tony Lindgren Signed-off-by: Lee Jones --- drivers/mfd/twl4030-power.c | 21 +++++++++++++++++++++ include/linux/i2c/twl.h | 1 + 2 files changed, 22 insertions(+) (limited to 'include') diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c index 2bfbb40ca9d2..4846c7b48ebb 100644 --- a/drivers/mfd/twl4030-power.c +++ b/drivers/mfd/twl4030-power.c @@ -567,13 +567,34 @@ twl4030_power_configure_scripts(const struct twl4030_power_data *pdata) return 0; } +static void twl4030_patch_rconfig(struct twl4030_resconfig *common, + struct twl4030_resconfig *board) +{ + while (common->resource) { + struct twl4030_resconfig *b = board; + + while (b->resource) { + if (b->resource == common->resource) { + *common = *b; + break; + } + b++; + } + common++; + } +} + static int twl4030_power_configure_resources(const struct twl4030_power_data *pdata) { struct twl4030_resconfig *resconfig = pdata->resource_config; + struct twl4030_resconfig *boardconf = pdata->board_config; int err; if (resconfig) { + if (boardconf) + twl4030_patch_rconfig(resconfig, boardconf); + while (resconfig->resource) { err = twl4030_configure_resource(resconfig); if (err) diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h index 5fe031375ed4..57fe782bf031 100644 --- a/include/linux/i2c/twl.h +++ b/include/linux/i2c/twl.h @@ -662,6 +662,7 @@ struct twl4030_power_data { struct twl4030_script **scripts; unsigned num; struct twl4030_resconfig *resource_config; + struct twl4030_resconfig *board_config; #define TWL4030_RESCONFIG_UNDEF ((u8)-1) bool use_poweroff; /* Board is wired for TWL poweroff */ }; -- cgit v1.2.3-71-gd317 From aa76fcf473f6bfa839f37f77b6fdb71f0fb88d8f Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Fri, 21 Feb 2014 17:36:21 +0200 Subject: CLK: TI: DPLL: add support for omap2 core dpll OMAP2 has slightly different DPLL compared to later OMAP generations. This patch adds support for the ti,omap2-dpll-core-clock and also adds the bindings documentation. Signed-off-by: Tero Kristo --- .../devicetree/bindings/clock/ti/dpll.txt | 9 +++ arch/arm/mach-omap2/clock.h | 1 - arch/arm/mach-omap2/clock2xxx.h | 4 -- drivers/clk/ti/dpll.c | 78 +++++++++++++++++++--- include/linux/clk/ti.h | 6 ++ 5 files changed, 82 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/ti/dpll.txt b/Documentation/devicetree/bindings/clock/ti/dpll.txt index 30bfdb7c9f18..50a1a427608f 100644 --- a/Documentation/devicetree/bindings/clock/ti/dpll.txt +++ b/Documentation/devicetree/bindings/clock/ti/dpll.txt @@ -30,6 +30,7 @@ Required properties: "ti,am3-dpll-clock", "ti,am3-dpll-core-clock", "ti,am3-dpll-x2-clock", + "ti,omap2-dpll-core-clock", - #clock-cells : from common clock binding; shall be set to 0. - clocks : link phandles of parent clocks, first entry lists reference clock @@ -41,6 +42,7 @@ Required properties: "mult-div1" - contains the multiplier / divider register base address "autoidle" - contains the autoidle register base address (optional) ti,am3-* dpll types do not have autoidle register + ti,omap2-* dpll type does not support idlest / autoidle registers Optional properties: - DPLL mode setting - defining any one or more of the following overrides @@ -73,3 +75,10 @@ Examples: clocks = <&sys_clkin_ck>, <&sys_clkin_ck>; reg = <0x90>, <0x5c>, <0x68>; }; + + dpll_ck: dpll_ck { + #clock-cells = <0>; + compatible = "ti,omap2-dpll-core-clock"; + clocks = <&sys_ck>, <&sys_ck>; + reg = <0x0500>, <0x0540>; + }; diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h index bda767a9dea8..f6e9904d7a75 100644 --- a/arch/arm/mach-omap2/clock.h +++ b/arch/arm/mach-omap2/clock.h @@ -279,7 +279,6 @@ extern const struct clk_hw_omap_ops clkhwops_omap3430es2_hsotgusb_wait; extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait; extern const struct clk_hw_omap_ops clkhwops_apll54; extern const struct clk_hw_omap_ops clkhwops_apll96; -extern const struct clk_hw_omap_ops clkhwops_omap2xxx_dpll; extern const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait; /* clksel_rate blocks shared between OMAP44xx and AM33xx */ diff --git a/arch/arm/mach-omap2/clock2xxx.h b/arch/arm/mach-omap2/clock2xxx.h index 539dc08afbba..45f41a411603 100644 --- a/arch/arm/mach-omap2/clock2xxx.h +++ b/arch/arm/mach-omap2/clock2xxx.h @@ -21,10 +21,6 @@ unsigned long omap2xxx_sys_clk_recalc(struct clk_hw *clk, unsigned long parent_rate); unsigned long omap2_osc_clk_recalc(struct clk_hw *clk, unsigned long parent_rate); -unsigned long omap2_dpllcore_recalc(struct clk_hw *hw, - unsigned long parent_rate); -int omap2_reprogram_dpllcore(struct clk_hw *clk, unsigned long rate, - unsigned long parent_rate); void omap2xxx_clkt_dpllcore_init(struct clk_hw *hw); unsigned long omap2_clk_apll54_recalc(struct clk_hw *hw, unsigned long parent_rate); diff --git a/drivers/clk/ti/dpll.c b/drivers/clk/ti/dpll.c index dda262db42ea..34e233990212 100644 --- a/drivers/clk/ti/dpll.c +++ b/drivers/clk/ti/dpll.c @@ -35,21 +35,18 @@ static const struct clk_ops dpll_m4xen_ck_ops = { .set_rate = &omap3_noncore_dpll_set_rate, .get_parent = &omap2_init_dpll_parent, }; +#else +static const struct clk_ops dpll_m4xen_ck_ops = {}; #endif +#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_ARCH_OMAP4) || \ + defined(CONFIG_SOC_OMAP5) || defined(CONFIG_SOC_DRA7XX) || \ + defined(CONFIG_SOC_AM33XX) || defined(CONFIG_SOC_AM43XX) static const struct clk_ops dpll_core_ck_ops = { .recalc_rate = &omap3_dpll_recalc, .get_parent = &omap2_init_dpll_parent, }; -#ifdef CONFIG_ARCH_OMAP3 -static const struct clk_ops omap3_dpll_core_ck_ops = { - .get_parent = &omap2_init_dpll_parent, - .recalc_rate = &omap3_dpll_recalc, - .round_rate = &omap2_dpll_round_rate, -}; -#endif - static const struct clk_ops dpll_ck_ops = { .enable = &omap3_noncore_dpll_enable, .disable = &omap3_noncore_dpll_disable, @@ -65,6 +62,33 @@ static const struct clk_ops dpll_no_gate_ck_ops = { .round_rate = &omap2_dpll_round_rate, .set_rate = &omap3_noncore_dpll_set_rate, }; +#else +static const struct clk_ops dpll_core_ck_ops = {}; +static const struct clk_ops dpll_ck_ops = {}; +static const struct clk_ops dpll_no_gate_ck_ops = {}; +const struct clk_hw_omap_ops clkhwops_omap3_dpll = {}; +#endif + +#ifdef CONFIG_ARCH_OMAP2 +static const struct clk_ops omap2_dpll_core_ck_ops = { + .get_parent = &omap2_init_dpll_parent, + .recalc_rate = &omap2_dpllcore_recalc, + .round_rate = &omap2_dpll_round_rate, + .set_rate = &omap2_reprogram_dpllcore, +}; +#else +static const struct clk_ops omap2_dpll_core_ck_ops = {}; +#endif + +#ifdef CONFIG_ARCH_OMAP3 +static const struct clk_ops omap3_dpll_core_ck_ops = { + .get_parent = &omap2_init_dpll_parent, + .recalc_rate = &omap3_dpll_recalc, + .round_rate = &omap2_dpll_round_rate, +}; +#else +static const struct clk_ops omap3_dpll_core_ck_ops = {}; +#endif #ifdef CONFIG_ARCH_OMAP3 static const struct clk_ops omap3_dpll_ck_ops = { @@ -237,10 +261,27 @@ static void __init of_ti_dpll_setup(struct device_node *node, init->parent_names = parent_names; dd->control_reg = ti_clk_get_reg_addr(node, 0); - dd->idlest_reg = ti_clk_get_reg_addr(node, 1); - dd->mult_div1_reg = ti_clk_get_reg_addr(node, 2); - if (!dd->control_reg || !dd->idlest_reg || !dd->mult_div1_reg) + /* + * Special case for OMAP2 DPLL, register order is different due to + * missing idlest_reg, also clkhwops is different. Detected from + * missing idlest_mask. + */ + if (!dd->idlest_mask) { + dd->mult_div1_reg = ti_clk_get_reg_addr(node, 1); +#ifdef CONFIG_ARCH_OMAP2 + clk_hw->ops = &clkhwops_omap2xxx_dpll; + omap2xxx_clkt_dpllcore_init(&clk_hw->hw); +#endif + } else { + dd->idlest_reg = ti_clk_get_reg_addr(node, 1); + if (!dd->idlest_reg) + goto cleanup; + + dd->mult_div1_reg = ti_clk_get_reg_addr(node, 2); + } + + if (!dd->control_reg || !dd->mult_div1_reg) goto cleanup; if (dd->autoidle_mask) { @@ -547,3 +588,18 @@ static void __init of_ti_am3_core_dpll_setup(struct device_node *node) } CLK_OF_DECLARE(ti_am3_core_dpll_clock, "ti,am3-dpll-core-clock", of_ti_am3_core_dpll_setup); + +static void __init of_ti_omap2_core_dpll_setup(struct device_node *node) +{ + const struct dpll_data dd = { + .enable_mask = 0x3, + .mult_mask = 0x3ff << 12, + .div1_mask = 0xf << 8, + .max_divider = 16, + .min_divider = 1, + }; + + of_ti_dpll_setup(node, &omap2_dpll_core_ck_ops, &dd); +} +CLK_OF_DECLARE(ti_omap2_core_dpll_clock, "ti,omap2-dpll-core-clock", + of_ti_omap2_core_dpll_setup); diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 4a21a872dbbd..753878c6fa52 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -259,6 +259,11 @@ int omap2_dflt_clk_enable(struct clk_hw *hw); void omap2_dflt_clk_disable(struct clk_hw *hw); int omap2_dflt_clk_is_enabled(struct clk_hw *hw); void omap3_clk_lock_dpll5(void); +unsigned long omap2_dpllcore_recalc(struct clk_hw *hw, + unsigned long parent_rate); +int omap2_reprogram_dpllcore(struct clk_hw *clk, unsigned long rate, + unsigned long parent_rate); +void omap2xxx_clkt_dpllcore_init(struct clk_hw *hw); void __iomem *ti_clk_get_reg_addr(struct device_node *node, int index); void ti_dt_clocks_register(struct ti_dt_clk *oclks); @@ -287,6 +292,7 @@ static inline void of_ti_clk_allow_autoidle_all(void) { } static inline void of_ti_clk_deny_autoidle_all(void) { } #endif +extern const struct clk_hw_omap_ops clkhwops_omap2xxx_dpll; extern const struct clk_hw_omap_ops clkhwops_omap3_dpll; extern const struct clk_hw_omap_ops clkhwops_omap4_dpllmx; extern const struct clk_hw_omap_ops clkhwops_wait; -- cgit v1.2.3-71-gd317 From 4d008589e271e28eae728eef7f5fb1f658f12b9f Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Mon, 24 Feb 2014 16:06:34 +0200 Subject: CLK: TI: APLL: add support for omap2 aplls This patch adds support for omap2 type aplls, which have gating and autoidle functionality. Signed-off-by: Tero Kristo --- .../devicetree/bindings/clock/ti/apll.txt | 24 ++- arch/arm/mach-omap2/clock.h | 11 -- drivers/clk/ti/apll.c | 181 +++++++++++++++++++++ include/linux/clk/ti.h | 21 ++- 4 files changed, 220 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/ti/apll.txt b/Documentation/devicetree/bindings/clock/ti/apll.txt index 7faf5a68b3be..ade4dd4c30f0 100644 --- a/Documentation/devicetree/bindings/clock/ti/apll.txt +++ b/Documentation/devicetree/bindings/clock/ti/apll.txt @@ -14,18 +14,32 @@ a subtype of a DPLL [2], although a simplified one at that. [2] Documentation/devicetree/bindings/clock/ti/dpll.txt Required properties: -- compatible : shall be "ti,dra7-apll-clock" +- compatible : shall be "ti,dra7-apll-clock" or "ti,omap2-apll-clock" - #clock-cells : from common clock binding; shall be set to 0. - clocks : link phandles of parent clocks (clk-ref and clk-bypass) - reg : address and length of the register set for controlling the APLL. It contains the information of registers in the following order: - "control" - contains the control register base address - "idlest" - contains the idlest register base address + "control" - contains the control register offset + "idlest" - contains the idlest register offset + "autoidle" - contains the autoidle register offset (OMAP2 only) +- ti,clock-frequency : static clock frequency for the clock (OMAP2 only) +- ti,idlest-shift : bit-shift for the idlest field (OMAP2 only) +- ti,bit-shift : bit-shift for enable and autoidle fields (OMAP2 only) Examples: - apll_pcie_ck: apll_pcie_ck@4a008200 { + apll_pcie_ck: apll_pcie_ck { #clock-cells = <0>; clocks = <&apll_pcie_in_clk_mux>, <&dpll_pcie_ref_ck>; - reg = <0x4a00821c 0x4>, <0x4a008220 0x4>; + reg = <0x021c>, <0x0220>; compatible = "ti,dra7-apll-clock"; }; + + apll96_ck: apll96_ck { + #clock-cells = <0>; + compatible = "ti,omap2-apll-clock"; + clocks = <&sys_ck>; + ti,bit-shift = <2>; + ti,idlest-shift = <8>; + ti,clock-frequency = <96000000>; + reg = <0x0500>, <0x0530>, <0x0520>; + }; diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h index f6e9904d7a75..eb441d137843 100644 --- a/arch/arm/mach-omap2/clock.h +++ b/arch/arm/mach-omap2/clock.h @@ -178,17 +178,6 @@ struct clksel { const struct clksel_rate *rates; }; -struct clk_hw_omap_ops { - void (*find_idlest)(struct clk_hw_omap *oclk, - void __iomem **idlest_reg, - u8 *idlest_bit, u8 *idlest_val); - void (*find_companion)(struct clk_hw_omap *oclk, - void __iomem **other_reg, - u8 *other_bit); - void (*allow_idle)(struct clk_hw_omap *oclk); - void (*deny_idle)(struct clk_hw_omap *oclk); -}; - unsigned long omap_fixed_divisor_recalc(struct clk_hw *hw, unsigned long parent_rate); diff --git a/drivers/clk/ti/apll.c b/drivers/clk/ti/apll.c index b986f61f5a77..5428c9c547cd 100644 --- a/drivers/clk/ti/apll.c +++ b/drivers/clk/ti/apll.c @@ -221,3 +221,184 @@ cleanup: kfree(init); } CLK_OF_DECLARE(dra7_apll_clock, "ti,dra7-apll-clock", of_dra7_apll_setup); + +#define OMAP2_EN_APLL_LOCKED 0x3 +#define OMAP2_EN_APLL_STOPPED 0x0 + +static int omap2_apll_is_enabled(struct clk_hw *hw) +{ + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + struct dpll_data *ad = clk->dpll_data; + u32 v; + + v = ti_clk_ll_ops->clk_readl(ad->control_reg); + v &= ad->enable_mask; + + v >>= __ffs(ad->enable_mask); + + return v == OMAP2_EN_APLL_LOCKED ? 1 : 0; +} + +static unsigned long omap2_apll_recalc(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + + if (omap2_apll_is_enabled(hw)) + return clk->fixed_rate; + + return 0; +} + +static int omap2_apll_enable(struct clk_hw *hw) +{ + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + struct dpll_data *ad = clk->dpll_data; + u32 v; + int i = 0; + + v = ti_clk_ll_ops->clk_readl(ad->control_reg); + v &= ~ad->enable_mask; + v |= OMAP2_EN_APLL_LOCKED << __ffs(ad->enable_mask); + ti_clk_ll_ops->clk_writel(v, ad->control_reg); + + while (1) { + v = ti_clk_ll_ops->clk_readl(ad->idlest_reg); + if (v & ad->idlest_mask) + break; + if (i > MAX_APLL_WAIT_TRIES) + break; + i++; + udelay(1); + } + + if (i == MAX_APLL_WAIT_TRIES) { + pr_warn("%s failed to transition to locked\n", + __clk_get_name(clk->hw.clk)); + return -EBUSY; + } + + return 0; +} + +static void omap2_apll_disable(struct clk_hw *hw) +{ + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + struct dpll_data *ad = clk->dpll_data; + u32 v; + + v = ti_clk_ll_ops->clk_readl(ad->control_reg); + v &= ~ad->enable_mask; + v |= OMAP2_EN_APLL_STOPPED << __ffs(ad->enable_mask); + ti_clk_ll_ops->clk_writel(v, ad->control_reg); +} + +static struct clk_ops omap2_apll_ops = { + .enable = &omap2_apll_enable, + .disable = &omap2_apll_disable, + .is_enabled = &omap2_apll_is_enabled, + .recalc_rate = &omap2_apll_recalc, +}; + +static void omap2_apll_set_autoidle(struct clk_hw_omap *clk, u32 val) +{ + struct dpll_data *ad = clk->dpll_data; + u32 v; + + v = ti_clk_ll_ops->clk_readl(ad->autoidle_reg); + v &= ~ad->autoidle_mask; + v |= val << __ffs(ad->autoidle_mask); + ti_clk_ll_ops->clk_writel(v, ad->control_reg); +} + +#define OMAP2_APLL_AUTOIDLE_LOW_POWER_STOP 0x3 +#define OMAP2_APLL_AUTOIDLE_DISABLE 0x0 + +static void omap2_apll_allow_idle(struct clk_hw_omap *clk) +{ + omap2_apll_set_autoidle(clk, OMAP2_APLL_AUTOIDLE_LOW_POWER_STOP); +} + +static void omap2_apll_deny_idle(struct clk_hw_omap *clk) +{ + omap2_apll_set_autoidle(clk, OMAP2_APLL_AUTOIDLE_DISABLE); +} + +static struct clk_hw_omap_ops omap2_apll_hwops = { + .allow_idle = &omap2_apll_allow_idle, + .deny_idle = &omap2_apll_deny_idle, +}; + +static void __init of_omap2_apll_setup(struct device_node *node) +{ + struct dpll_data *ad = NULL; + struct clk_hw_omap *clk_hw = NULL; + struct clk_init_data *init = NULL; + struct clk *clk; + const char *parent_name; + u32 val; + + ad = kzalloc(sizeof(*clk_hw), GFP_KERNEL); + clk_hw = kzalloc(sizeof(*clk_hw), GFP_KERNEL); + init = kzalloc(sizeof(*init), GFP_KERNEL); + + if (!ad || !clk_hw || !init) + goto cleanup; + + clk_hw->dpll_data = ad; + clk_hw->hw.init = init; + init->ops = &omap2_apll_ops; + init->name = node->name; + clk_hw->ops = &omap2_apll_hwops; + + init->num_parents = of_clk_get_parent_count(node); + if (init->num_parents != 1) { + pr_err("%s must have one parent\n", node->name); + goto cleanup; + } + + parent_name = of_clk_get_parent_name(node, 0); + init->parent_names = &parent_name; + + if (of_property_read_u32(node, "ti,clock-frequency", &val)) { + pr_err("%s missing clock-frequency\n", node->name); + goto cleanup; + } + clk_hw->fixed_rate = val; + + if (of_property_read_u32(node, "ti,bit-shift", &val)) { + pr_err("%s missing bit-shift\n", node->name); + goto cleanup; + } + + clk_hw->enable_bit = val; + ad->enable_mask = 0x3 << val; + ad->autoidle_mask = 0x3 << val; + + if (of_property_read_u32(node, "ti,idlest-shift", &val)) { + pr_err("%s missing idlest-shift\n", node->name); + goto cleanup; + } + + ad->idlest_mask = 1 << val; + + ad->control_reg = ti_clk_get_reg_addr(node, 0); + ad->autoidle_reg = ti_clk_get_reg_addr(node, 1); + ad->idlest_reg = ti_clk_get_reg_addr(node, 2); + + if (!ad->control_reg || !ad->autoidle_reg || !ad->idlest_reg) + goto cleanup; + + clk = clk_register(NULL, &clk_hw->hw); + if (!IS_ERR(clk)) { + of_clk_add_provider(node, of_clk_src_simple_get, clk); + kfree(init); + return; + } +cleanup: + kfree(ad); + kfree(clk_hw); + kfree(init); +} +CLK_OF_DECLARE(omap2_apll_clock, "ti,omap2-apll-clock", + of_omap2_apll_setup); diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 753878c6fa52..44bf84002a34 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -94,7 +94,26 @@ struct dpll_data { u8 flags; }; -struct clk_hw_omap_ops; +struct clk_hw_omap; + +/** + * struct clk_hw_omap_ops - OMAP clk ops + * @find_idlest: find idlest register information for a clock + * @find_companion: find companion clock register information for a clock, + * basically converts CM_ICLKEN* <-> CM_FCLKEN* + * @allow_idle: enables autoidle hardware functionality for a clock + * @deny_idle: prevent autoidle hardware functionality for a clock + */ +struct clk_hw_omap_ops { + void (*find_idlest)(struct clk_hw_omap *oclk, + void __iomem **idlest_reg, + u8 *idlest_bit, u8 *idlest_val); + void (*find_companion)(struct clk_hw_omap *oclk, + void __iomem **other_reg, + u8 *other_bit); + void (*allow_idle)(struct clk_hw_omap *oclk); + void (*deny_idle)(struct clk_hw_omap *oclk); +}; /** * struct clk_hw_omap - OMAP struct clk -- cgit v1.2.3-71-gd317 From de742570745e12b53c70130ace958f2a60044000 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Tue, 25 Feb 2014 19:16:07 +0200 Subject: CLK: TI: interface: add support for omap2430 specific interface clock OMAP2430 I2CHS modules require specific hardware ops to be used, so added a new compatible string for this. Signed-off-by: Tero Kristo --- Documentation/devicetree/bindings/clock/ti/interface.txt | 2 ++ arch/arm/mach-omap2/clock.h | 1 - drivers/clk/ti/interface.c | 11 +++++++++++ include/linux/clk/ti.h | 1 + 4 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/ti/interface.txt b/Documentation/devicetree/bindings/clock/ti/interface.txt index 064e8caccac3..3111a409fea6 100644 --- a/Documentation/devicetree/bindings/clock/ti/interface.txt +++ b/Documentation/devicetree/bindings/clock/ti/interface.txt @@ -21,6 +21,8 @@ Required properties: "ti,omap3-dss-interface-clock" - interface clock with DSS specific HW handling "ti,omap3-ssi-interface-clock" - interface clock with SSI specific HW handling "ti,am35xx-interface-clock" - interface clock with AM35xx specific HW handling + "ti,omap2430-interface-clock" - interface clock with OMAP2430 specific HW + handling - #clock-cells : from common clock binding; shall be set to 0 - clocks : link to phandle of parent clock - reg : base address for the control register diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h index eb441d137843..12f54d428d7c 100644 --- a/arch/arm/mach-omap2/clock.h +++ b/arch/arm/mach-omap2/clock.h @@ -268,7 +268,6 @@ extern const struct clk_hw_omap_ops clkhwops_omap3430es2_hsotgusb_wait; extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait; extern const struct clk_hw_omap_ops clkhwops_apll54; extern const struct clk_hw_omap_ops clkhwops_apll96; -extern const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait; /* clksel_rate blocks shared between OMAP44xx and AM33xx */ extern const struct clksel_rate div_1_0_rates[]; diff --git a/drivers/clk/ti/interface.c b/drivers/clk/ti/interface.c index 320a2b168bb2..9c3e8c4aaa40 100644 --- a/drivers/clk/ti/interface.c +++ b/drivers/clk/ti/interface.c @@ -94,6 +94,7 @@ static void __init of_ti_no_wait_interface_clk_setup(struct device_node *node) CLK_OF_DECLARE(ti_no_wait_interface_clk, "ti,omap3-no-wait-interface-clock", of_ti_no_wait_interface_clk_setup); +#ifdef CONFIG_ARCH_OMAP3 static void __init of_ti_hsotgusb_interface_clk_setup(struct device_node *node) { _of_ti_interface_clk_setup(node, @@ -123,3 +124,13 @@ static void __init of_ti_am35xx_interface_clk_setup(struct device_node *node) } CLK_OF_DECLARE(ti_am35xx_interface_clk, "ti,am35xx-interface-clock", of_ti_am35xx_interface_clk_setup); +#endif + +#ifdef CONFIG_SOC_OMAP2430 +static void __init of_ti_omap2430_interface_clk_setup(struct device_node *node) +{ + _of_ti_interface_clk_setup(node, &clkhwops_omap2430_i2chs_wait); +} +CLK_OF_DECLARE(ti_omap2430_interface_clk, "ti,omap2430-interface-clock", + of_ti_omap2430_interface_clk_setup); +#endif diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 44bf84002a34..a8390d478528 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -312,6 +312,7 @@ static inline void of_ti_clk_deny_autoidle_all(void) { } #endif extern const struct clk_hw_omap_ops clkhwops_omap2xxx_dpll; +extern const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait; extern const struct clk_hw_omap_ops clkhwops_omap3_dpll; extern const struct clk_hw_omap_ops clkhwops_omap4_dpllmx; extern const struct clk_hw_omap_ops clkhwops_wait; -- cgit v1.2.3-71-gd317 From be67c3bf382c591d8267e0ef12d80041854731d9 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Mon, 24 Feb 2014 17:52:57 +0200 Subject: CLK: TI: OMAP2: add clock init support Adds support for registering the alias clocks, boot time clock-enable list and disabling autoidle of clocks. Signed-off-by: Tero Kristo --- drivers/clk/ti/Makefile | 1 + drivers/clk/ti/clk-2xxx.c | 254 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/clk/ti.h | 2 + 3 files changed, 257 insertions(+) create mode 100644 drivers/clk/ti/clk-2xxx.c (limited to 'include') diff --git a/drivers/clk/ti/Makefile b/drivers/clk/ti/Makefile index 4319d4031aa3..4afeaed9e9ba 100644 --- a/drivers/clk/ti/Makefile +++ b/drivers/clk/ti/Makefile @@ -3,6 +3,7 @@ obj-y += clk.o autoidle.o clockdomain.o clk-common = dpll.o composite.o divider.o gate.o \ fixed-factor.o mux.o apll.o obj-$(CONFIG_SOC_AM33XX) += $(clk-common) clk-33xx.o +obj-$(CONFIG_ARCH_OMAP2) += $(clk-common) interface.o clk-2xxx.o obj-$(CONFIG_ARCH_OMAP3) += $(clk-common) interface.o clk-3xxx.o obj-$(CONFIG_ARCH_OMAP4) += $(clk-common) clk-44xx.o obj-$(CONFIG_SOC_OMAP5) += $(clk-common) clk-54xx.o diff --git a/drivers/clk/ti/clk-2xxx.c b/drivers/clk/ti/clk-2xxx.c new file mode 100644 index 000000000000..f6400fb5ee3e --- /dev/null +++ b/drivers/clk/ti/clk-2xxx.c @@ -0,0 +1,254 @@ +/* + * OMAP2 Clock init + * + * Copyright (C) 2013 Texas Instruments, Inc + * Tero Kristo (t-kristo@ti.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include + +static struct ti_dt_clk omap2xxx_clks[] = { + DT_CLK(NULL, "func_32k_ck", "func_32k_ck"), + DT_CLK(NULL, "secure_32k_ck", "secure_32k_ck"), + DT_CLK(NULL, "virt_12m_ck", "virt_12m_ck"), + DT_CLK(NULL, "virt_13m_ck", "virt_13m_ck"), + DT_CLK(NULL, "virt_19200000_ck", "virt_19200000_ck"), + DT_CLK(NULL, "virt_26m_ck", "virt_26m_ck"), + DT_CLK(NULL, "aplls_clkin_ck", "aplls_clkin_ck"), + DT_CLK(NULL, "aplls_clkin_x2_ck", "aplls_clkin_x2_ck"), + DT_CLK(NULL, "osc_ck", "osc_ck"), + DT_CLK(NULL, "sys_ck", "sys_ck"), + DT_CLK(NULL, "alt_ck", "alt_ck"), + DT_CLK(NULL, "mcbsp_clks", "mcbsp_clks"), + DT_CLK(NULL, "dpll_ck", "dpll_ck"), + DT_CLK(NULL, "apll96_ck", "apll96_ck"), + DT_CLK(NULL, "apll54_ck", "apll54_ck"), + DT_CLK(NULL, "func_54m_ck", "func_54m_ck"), + DT_CLK(NULL, "core_ck", "core_ck"), + DT_CLK(NULL, "func_96m_ck", "func_96m_ck"), + DT_CLK(NULL, "func_48m_ck", "func_48m_ck"), + DT_CLK(NULL, "func_12m_ck", "func_12m_ck"), + DT_CLK(NULL, "sys_clkout_src", "sys_clkout_src"), + DT_CLK(NULL, "sys_clkout", "sys_clkout"), + DT_CLK(NULL, "emul_ck", "emul_ck"), + DT_CLK(NULL, "mpu_ck", "mpu_ck"), + DT_CLK(NULL, "dsp_fck", "dsp_fck"), + DT_CLK(NULL, "gfx_3d_fck", "gfx_3d_fck"), + DT_CLK(NULL, "gfx_2d_fck", "gfx_2d_fck"), + DT_CLK(NULL, "gfx_ick", "gfx_ick"), + DT_CLK("omapdss_dss", "ick", "dss_ick"), + DT_CLK(NULL, "dss_ick", "dss_ick"), + DT_CLK(NULL, "dss1_fck", "dss1_fck"), + DT_CLK(NULL, "dss2_fck", "dss2_fck"), + DT_CLK(NULL, "dss_54m_fck", "dss_54m_fck"), + DT_CLK(NULL, "core_l3_ck", "core_l3_ck"), + DT_CLK(NULL, "ssi_fck", "ssi_ssr_sst_fck"), + DT_CLK(NULL, "usb_l4_ick", "usb_l4_ick"), + DT_CLK(NULL, "l4_ck", "l4_ck"), + DT_CLK(NULL, "ssi_l4_ick", "ssi_l4_ick"), + DT_CLK(NULL, "gpt1_ick", "gpt1_ick"), + DT_CLK(NULL, "gpt1_fck", "gpt1_fck"), + DT_CLK(NULL, "gpt2_ick", "gpt2_ick"), + DT_CLK(NULL, "gpt2_fck", "gpt2_fck"), + DT_CLK(NULL, "gpt3_ick", "gpt3_ick"), + DT_CLK(NULL, "gpt3_fck", "gpt3_fck"), + DT_CLK(NULL, "gpt4_ick", "gpt4_ick"), + DT_CLK(NULL, "gpt4_fck", "gpt4_fck"), + DT_CLK(NULL, "gpt5_ick", "gpt5_ick"), + DT_CLK(NULL, "gpt5_fck", "gpt5_fck"), + DT_CLK(NULL, "gpt6_ick", "gpt6_ick"), + DT_CLK(NULL, "gpt6_fck", "gpt6_fck"), + DT_CLK(NULL, "gpt7_ick", "gpt7_ick"), + DT_CLK(NULL, "gpt7_fck", "gpt7_fck"), + DT_CLK(NULL, "gpt8_ick", "gpt8_ick"), + DT_CLK(NULL, "gpt8_fck", "gpt8_fck"), + DT_CLK(NULL, "gpt9_ick", "gpt9_ick"), + DT_CLK(NULL, "gpt9_fck", "gpt9_fck"), + DT_CLK(NULL, "gpt10_ick", "gpt10_ick"), + DT_CLK(NULL, "gpt10_fck", "gpt10_fck"), + DT_CLK(NULL, "gpt11_ick", "gpt11_ick"), + DT_CLK(NULL, "gpt11_fck", "gpt11_fck"), + DT_CLK(NULL, "gpt12_ick", "gpt12_ick"), + DT_CLK(NULL, "gpt12_fck", "gpt12_fck"), + DT_CLK("omap-mcbsp.1", "ick", "mcbsp1_ick"), + DT_CLK(NULL, "mcbsp1_ick", "mcbsp1_ick"), + DT_CLK(NULL, "mcbsp1_fck", "mcbsp1_fck"), + DT_CLK("omap-mcbsp.2", "ick", "mcbsp2_ick"), + DT_CLK(NULL, "mcbsp2_ick", "mcbsp2_ick"), + DT_CLK(NULL, "mcbsp2_fck", "mcbsp2_fck"), + DT_CLK("omap2_mcspi.1", "ick", "mcspi1_ick"), + DT_CLK(NULL, "mcspi1_ick", "mcspi1_ick"), + DT_CLK(NULL, "mcspi1_fck", "mcspi1_fck"), + DT_CLK("omap2_mcspi.2", "ick", "mcspi2_ick"), + DT_CLK(NULL, "mcspi2_ick", "mcspi2_ick"), + DT_CLK(NULL, "mcspi2_fck", "mcspi2_fck"), + DT_CLK(NULL, "uart1_ick", "uart1_ick"), + DT_CLK(NULL, "uart1_fck", "uart1_fck"), + DT_CLK(NULL, "uart2_ick", "uart2_ick"), + DT_CLK(NULL, "uart2_fck", "uart2_fck"), + DT_CLK(NULL, "uart3_ick", "uart3_ick"), + DT_CLK(NULL, "uart3_fck", "uart3_fck"), + DT_CLK(NULL, "gpios_ick", "gpios_ick"), + DT_CLK(NULL, "gpios_fck", "gpios_fck"), + DT_CLK("omap_wdt", "ick", "mpu_wdt_ick"), + DT_CLK(NULL, "mpu_wdt_ick", "mpu_wdt_ick"), + DT_CLK(NULL, "mpu_wdt_fck", "mpu_wdt_fck"), + DT_CLK(NULL, "sync_32k_ick", "sync_32k_ick"), + DT_CLK(NULL, "wdt1_ick", "wdt1_ick"), + DT_CLK(NULL, "omapctrl_ick", "omapctrl_ick"), + DT_CLK("omap24xxcam", "fck", "cam_fck"), + DT_CLK(NULL, "cam_fck", "cam_fck"), + DT_CLK("omap24xxcam", "ick", "cam_ick"), + DT_CLK(NULL, "cam_ick", "cam_ick"), + DT_CLK(NULL, "mailboxes_ick", "mailboxes_ick"), + DT_CLK(NULL, "wdt4_ick", "wdt4_ick"), + DT_CLK(NULL, "wdt4_fck", "wdt4_fck"), + DT_CLK(NULL, "mspro_ick", "mspro_ick"), + DT_CLK(NULL, "mspro_fck", "mspro_fck"), + DT_CLK(NULL, "fac_ick", "fac_ick"), + DT_CLK(NULL, "fac_fck", "fac_fck"), + DT_CLK("omap_hdq.0", "ick", "hdq_ick"), + DT_CLK(NULL, "hdq_ick", "hdq_ick"), + DT_CLK("omap_hdq.0", "fck", "hdq_fck"), + DT_CLK(NULL, "hdq_fck", "hdq_fck"), + DT_CLK("omap_i2c.1", "ick", "i2c1_ick"), + DT_CLK(NULL, "i2c1_ick", "i2c1_ick"), + DT_CLK("omap_i2c.2", "ick", "i2c2_ick"), + DT_CLK(NULL, "i2c2_ick", "i2c2_ick"), + DT_CLK(NULL, "gpmc_fck", "gpmc_fck"), + DT_CLK(NULL, "sdma_fck", "sdma_fck"), + DT_CLK(NULL, "sdma_ick", "sdma_ick"), + DT_CLK(NULL, "sdrc_ick", "sdrc_ick"), + DT_CLK(NULL, "des_ick", "des_ick"), + DT_CLK("omap-sham", "ick", "sha_ick"), + DT_CLK(NULL, "sha_ick", "sha_ick"), + DT_CLK("omap_rng", "ick", "rng_ick"), + DT_CLK(NULL, "rng_ick", "rng_ick"), + DT_CLK("omap-aes", "ick", "aes_ick"), + DT_CLK(NULL, "aes_ick", "aes_ick"), + DT_CLK(NULL, "pka_ick", "pka_ick"), + DT_CLK(NULL, "usb_fck", "usb_fck"), + DT_CLK(NULL, "timer_32k_ck", "func_32k_ck"), + DT_CLK(NULL, "timer_sys_ck", "sys_ck"), + DT_CLK(NULL, "timer_ext_ck", "alt_ck"), + { .node_name = NULL }, +}; + +static struct ti_dt_clk omap2420_clks[] = { + DT_CLK(NULL, "sys_clkout2_src", "sys_clkout2_src"), + DT_CLK(NULL, "sys_clkout2", "sys_clkout2"), + DT_CLK(NULL, "dsp_ick", "dsp_ick"), + DT_CLK(NULL, "iva1_ifck", "iva1_ifck"), + DT_CLK(NULL, "iva1_mpu_int_ifck", "iva1_mpu_int_ifck"), + DT_CLK(NULL, "wdt3_ick", "wdt3_ick"), + DT_CLK(NULL, "wdt3_fck", "wdt3_fck"), + DT_CLK("mmci-omap.0", "ick", "mmc_ick"), + DT_CLK(NULL, "mmc_ick", "mmc_ick"), + DT_CLK("mmci-omap.0", "fck", "mmc_fck"), + DT_CLK(NULL, "mmc_fck", "mmc_fck"), + DT_CLK(NULL, "eac_ick", "eac_ick"), + DT_CLK(NULL, "eac_fck", "eac_fck"), + DT_CLK(NULL, "i2c1_fck", "i2c1_fck"), + DT_CLK(NULL, "i2c2_fck", "i2c2_fck"), + DT_CLK(NULL, "vlynq_ick", "vlynq_ick"), + DT_CLK(NULL, "vlynq_fck", "vlynq_fck"), + DT_CLK("musb-hdrc", "fck", "osc_ck"), + { .node_name = NULL }, +}; + +static struct ti_dt_clk omap2430_clks[] = { + DT_CLK("twl", "fck", "osc_ck"), + DT_CLK(NULL, "iva2_1_ick", "iva2_1_ick"), + DT_CLK(NULL, "mdm_ick", "mdm_ick"), + DT_CLK(NULL, "mdm_osc_ck", "mdm_osc_ck"), + DT_CLK("omap-mcbsp.3", "ick", "mcbsp3_ick"), + DT_CLK(NULL, "mcbsp3_ick", "mcbsp3_ick"), + DT_CLK(NULL, "mcbsp3_fck", "mcbsp3_fck"), + DT_CLK("omap-mcbsp.4", "ick", "mcbsp4_ick"), + DT_CLK(NULL, "mcbsp4_ick", "mcbsp4_ick"), + DT_CLK(NULL, "mcbsp4_fck", "mcbsp4_fck"), + DT_CLK("omap-mcbsp.5", "ick", "mcbsp5_ick"), + DT_CLK(NULL, "mcbsp5_ick", "mcbsp5_ick"), + DT_CLK(NULL, "mcbsp5_fck", "mcbsp5_fck"), + DT_CLK("omap2_mcspi.3", "ick", "mcspi3_ick"), + DT_CLK(NULL, "mcspi3_ick", "mcspi3_ick"), + DT_CLK(NULL, "mcspi3_fck", "mcspi3_fck"), + DT_CLK(NULL, "icr_ick", "icr_ick"), + DT_CLK(NULL, "i2chs1_fck", "i2chs1_fck"), + DT_CLK(NULL, "i2chs2_fck", "i2chs2_fck"), + DT_CLK("musb-omap2430", "ick", "usbhs_ick"), + DT_CLK(NULL, "usbhs_ick", "usbhs_ick"), + DT_CLK("omap_hsmmc.0", "ick", "mmchs1_ick"), + DT_CLK(NULL, "mmchs1_ick", "mmchs1_ick"), + DT_CLK(NULL, "mmchs1_fck", "mmchs1_fck"), + DT_CLK("omap_hsmmc.1", "ick", "mmchs2_ick"), + DT_CLK(NULL, "mmchs2_ick", "mmchs2_ick"), + DT_CLK(NULL, "mmchs2_fck", "mmchs2_fck"), + DT_CLK(NULL, "gpio5_ick", "gpio5_ick"), + DT_CLK(NULL, "gpio5_fck", "gpio5_fck"), + DT_CLK(NULL, "mdm_intc_ick", "mdm_intc_ick"), + DT_CLK("omap_hsmmc.0", "mmchsdb_fck", "mmchsdb1_fck"), + DT_CLK(NULL, "mmchsdb1_fck", "mmchsdb1_fck"), + DT_CLK("omap_hsmmc.1", "mmchsdb_fck", "mmchsdb2_fck"), + DT_CLK(NULL, "mmchsdb2_fck", "mmchsdb2_fck"), + { .node_name = NULL }, +}; + +static const char *enable_init_clks[] = { + "apll96_ck", + "apll54_ck", + "sync_32k_ick", + "omapctrl_ick", + "gpmc_fck", + "sdrc_ick", +}; + +enum { + OMAP2_SOC_OMAP2420, + OMAP2_SOC_OMAP2430, +}; + +static int __init omap2xxx_dt_clk_init(int soc_type) +{ + ti_dt_clocks_register(omap2xxx_clks); + + if (soc_type == OMAP2_SOC_OMAP2420) + ti_dt_clocks_register(omap2420_clks); + else + ti_dt_clocks_register(omap2430_clks); + + omap2_clk_disable_autoidle_all(); + + omap2_clk_enable_init_clocks(enable_init_clks, + ARRAY_SIZE(enable_init_clks)); + + pr_info("Clocking rate (Crystal/DPLL/MPU): %ld.%01ld/%ld/%ld MHz\n", + (clk_get_rate(clk_get_sys(NULL, "sys_ck")) / 1000000), + (clk_get_rate(clk_get_sys(NULL, "sys_ck")) / 100000) % 10, + (clk_get_rate(clk_get_sys(NULL, "dpll_ck")) / 1000000), + (clk_get_rate(clk_get_sys(NULL, "mpu_ck")) / 1000000)); + + return 0; +} + +int __init omap2420_dt_clk_init(void) +{ + return omap2xxx_dt_clk_init(OMAP2_SOC_OMAP2420); +} + +int __init omap2430_dt_clk_init(void) +{ + return omap2xxx_dt_clk_init(OMAP2_SOC_OMAP2430); +} diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index a8390d478528..188f0cbb26c2 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -302,6 +302,8 @@ int omap5xxx_dt_clk_init(void); int dra7xx_dt_clk_init(void); int am33xx_dt_clk_init(void); int am43xx_dt_clk_init(void); +int omap2420_dt_clk_init(void); +int omap2430_dt_clk_init(void); #ifdef CONFIG_OF void of_ti_clk_allow_autoidle_all(void); -- cgit v1.2.3-71-gd317 From 61f25ca76ccc7b63371a7a6b0b8b9a8a46745b79 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Mon, 24 Feb 2014 18:49:35 +0200 Subject: ARM: OMAP2: clock: add DT boot support for cpufreq_ck The clock and clkdev for this are added manually. Signed-off-by: Tero Kristo --- arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c | 53 ++++++++++++++++++++++++++++ drivers/clk/ti/clk-2xxx.c | 2 ++ include/linux/clk/ti.h | 1 + 3 files changed, 56 insertions(+) (limited to 'include') diff --git a/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c b/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c index b935ed2922d8..85e0b0c06718 100644 --- a/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c +++ b/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c @@ -208,3 +208,56 @@ void omap2xxx_clkt_vps_late_init(void) clk_put(c); } } + +#ifdef CONFIG_OF +#include +#include + +static const struct clk_ops virt_prcm_set_ops = { + .recalc_rate = &omap2_table_mpu_recalc, + .set_rate = &omap2_select_table_rate, + .round_rate = &omap2_round_to_table_rate, +}; + +/** + * omap2xxx_clkt_vps_init - initialize virt_prcm_set clock + * + * Does a manual init for the virtual prcm DVFS clock for OMAP2. This + * function is called only from omap2 DT clock init, as the virtual + * node is not modelled in the DT clock data. + */ +void omap2xxx_clkt_vps_init(void) +{ + struct clk_init_data init = { NULL }; + struct clk_hw_omap *hw = NULL; + struct clk *clk; + const char *parent_name = "mpu_ck"; + struct clk_lookup *lookup = NULL; + + omap2xxx_clkt_vps_late_init(); + omap2xxx_clkt_vps_check_bootloader_rates(); + + hw = kzalloc(sizeof(*hw), GFP_KERNEL); + lookup = kzalloc(sizeof(*lookup), GFP_KERNEL); + if (!hw || !lookup) + goto cleanup; + init.name = "virt_prcm_set"; + init.ops = &virt_prcm_set_ops; + init.parent_names = &parent_name; + init.num_parents = 1; + + hw->hw.init = &init; + + clk = clk_register(NULL, &hw->hw); + + lookup->dev_id = NULL; + lookup->con_id = "cpufreq_ck"; + lookup->clk = clk; + + clkdev_add(lookup); + return; +cleanup: + kfree(hw); + kfree(lookup); +} +#endif diff --git a/drivers/clk/ti/clk-2xxx.c b/drivers/clk/ti/clk-2xxx.c index f6400fb5ee3e..c808ab3d2bb2 100644 --- a/drivers/clk/ti/clk-2xxx.c +++ b/drivers/clk/ti/clk-2xxx.c @@ -229,6 +229,8 @@ static int __init omap2xxx_dt_clk_init(int soc_type) else ti_dt_clocks_register(omap2430_clks); + omap2xxx_clkt_vps_init(); + omap2_clk_disable_autoidle_all(); omap2_clk_enable_init_clocks(enable_init_clks, diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 188f0cbb26c2..4231c41bed51 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -283,6 +283,7 @@ unsigned long omap2_dpllcore_recalc(struct clk_hw *hw, int omap2_reprogram_dpllcore(struct clk_hw *clk, unsigned long rate, unsigned long parent_rate); void omap2xxx_clkt_dpllcore_init(struct clk_hw *hw); +void omap2xxx_clkt_vps_init(void); void __iomem *ti_clk_get_reg_addr(struct device_node *node, int index); void ti_dt_clocks_register(struct ti_dt_clk *oclks); -- cgit v1.2.3-71-gd317 From 2d5e447914722f3c79103ad54baa1170661ac553 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 7 May 2014 13:20:46 +0300 Subject: dt:/bindings: DRA7 ATL (Audio Tracking Logic) clock bindings Audio Tracking Logic is designed to be used by HD Radio applications to synchronize the audio output clocks to the baseband clock. ATL can be also used to track errors between two reference clocks (BWS, AWS) and generate a modulated clock output which averages to some desired frequency. In essence ATL is generating a clock to be used by an audio codec and also to be used by the SoC as MCLK. To be able to integrate the ATL provided clocks to the clock tree we need two types of DT binding: - DT clock nodes to represent the ATL clocks towards the CCF - binding for the ATL IP itself which is going to handle the hw configuration The reason for this type of setup is that ATL itself is a separate device in the SoC, it has it's own address space and clock domain. Other IPs can use the ATL generated clock as their functional clock (McASPs for example) and external components like audio codecs can also use the very same clock as their MCLK. The ATL IP in DRA7 contains 4 ATL instences. Signed-off-by: Peter Ujfalusi Signed-off-by: Tero Kristo --- .../devicetree/bindings/clock/ti/dra7-atl.txt | 96 ++++++++++++++++++++++ include/dt-bindings/clk/ti-dra7-atl.h | 40 +++++++++ 2 files changed, 136 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/ti/dra7-atl.txt create mode 100644 include/dt-bindings/clk/ti-dra7-atl.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/ti/dra7-atl.txt b/Documentation/devicetree/bindings/clock/ti/dra7-atl.txt new file mode 100644 index 000000000000..585e8c191f50 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/ti/dra7-atl.txt @@ -0,0 +1,96 @@ +Device Tree Clock bindings for ATL (Audio Tracking Logic) of DRA7 SoC. + +The ATL IP is used to generate clock to be used to synchronize baseband and +audio codec. A single ATL IP provides four ATL clock instances sharing the same +functional clock but can be configured to provide different clocks. +ATL can maintain a clock averages to some desired frequency based on the bws/aws +signals - can compensate the drift between the two ws signal. + +In order to provide the support for ATL and it's output clocks (which can be used +internally within the SoC or external components) two sets of bindings is needed: + +Clock tree binding: +This binding uses the common clock binding[1]. +To be able to integrate the ATL clocks with DT clock tree. +Provides ccf level representation of the ATL clocks to be used by drivers. +Since the clock instances are part of a single IP this binding is used as a node +for the DT clock tree, the IP driver is needed to handle the actual configuration +of the IP. + +[1] Documentation/devicetree/bindings/clock/clock-bindings.txt + +Required properties: +- compatible : shall be "ti,dra7-atl-clock" +- #clock-cells : from common clock binding; shall be set to 0. +- clocks : link phandles to functional clock of ATL + +Binding for the IP driver: +This binding is used to configure the IP driver which is going to handle the +configuration of the IP for the ATL clock instances. + +Required properties: +- compatible : shall be "ti,dra7-atl" +- reg : base address for the ATL IP +- ti,provided-clocks : List of phandles to the clocks associated with the ATL +- clocks : link phandles to functional clock of ATL +- clock-names : Shall be set to "fck" +- ti,hwmods : Shall be set to "atl" + +Optional properties: +Configuration of ATL instances: +- atl{0/1/2/3} { + - bws : Baseband word select signal selection + - aws : Audio word select signal selection +}; + +For valid word select signals, see the dt-bindings/clk/ti-dra7-atl.h include +file. + +Examples: +/* clock bindings for atl provided clocks */ +atl_clkin0_ck: atl_clkin0_ck { + #clock-cells = <0>; + compatible = "ti,dra7-atl-clock"; + clocks = <&atl_gfclk_mux>; +}; + +atl_clkin1_ck: atl_clkin1_ck { + #clock-cells = <0>; + compatible = "ti,dra7-atl-clock"; + clocks = <&atl_gfclk_mux>; +}; + +atl_clkin2_ck: atl_clkin2_ck { + #clock-cells = <0>; + compatible = "ti,dra7-atl-clock"; + clocks = <&atl_gfclk_mux>; +}; + +atl_clkin3_ck: atl_clkin3_ck { + #clock-cells = <0>; + compatible = "ti,dra7-atl-clock"; + clocks = <&atl_gfclk_mux>; +}; + +/* binding for the IP */ +atl: atl@4843c000 { + compatible = "ti,dra7-atl"; + reg = <0x4843c000 0x3ff>; + ti,hwmods = "atl"; + ti,provided-clocks = <&atl_clkin0_ck>, <&atl_clkin1_ck>, + <&atl_clkin2_ck>, <&atl_clkin3_ck>; + clocks = <&atl_gfclk_mux>; + clock-names = "fck"; + status = "disabled"; +}; + +#include + +&atl { + status = "okay"; + + atl2 { + bws = ; + aws = ; + }; +}; diff --git a/include/dt-bindings/clk/ti-dra7-atl.h b/include/dt-bindings/clk/ti-dra7-atl.h new file mode 100644 index 000000000000..42dd4164f6f4 --- /dev/null +++ b/include/dt-bindings/clk/ti-dra7-atl.h @@ -0,0 +1,40 @@ +/* + * This header provides constants for DRA7 ATL (Audio Tracking Logic) + * + * The constants defined in this header are used in dts files + * + * Copyright (C) 2013 Texas Instruments, Inc. + * + * Peter Ujfalusi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _DT_BINDINGS_CLK_DRA7_ATL_H +#define _DT_BINDINGS_CLK_DRA7_ATL_H + +#define DRA7_ATL_WS_MCASP1_FSR 0 +#define DRA7_ATL_WS_MCASP1_FSX 1 +#define DRA7_ATL_WS_MCASP2_FSR 2 +#define DRA7_ATL_WS_MCASP2_FSX 3 +#define DRA7_ATL_WS_MCASP3_FSX 4 +#define DRA7_ATL_WS_MCASP4_FSX 5 +#define DRA7_ATL_WS_MCASP5_FSX 6 +#define DRA7_ATL_WS_MCASP6_FSX 7 +#define DRA7_ATL_WS_MCASP7_FSX 8 +#define DRA7_ATL_WS_MCASP8_FSX 9 +#define DRA7_ATL_WS_MCASP8_AHCLKX 10 +#define DRA7_ATL_WS_XREF_CLK3 11 +#define DRA7_ATL_WS_XREF_CLK0 12 +#define DRA7_ATL_WS_XREF_CLK1 13 +#define DRA7_ATL_WS_XREF_CLK2 14 +#define DRA7_ATL_WS_OSC1_X1 15 + +#endif -- cgit v1.2.3-71-gd317 From 494b6590043b4cd73ceb3f58e1c012a2c6c98d85 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Mon, 26 May 2014 18:06:34 +0200 Subject: wireless: add missing WLAN_EID_BSS_INTOLERANT_CHL_REPORT Signed-off-by: Jes Sorensen Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index f194ccb8539c..6bff13f74050 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1711,6 +1711,7 @@ enum ieee80211_eid { WLAN_EID_RRM_ENABLED_CAPABILITIES = 70, WLAN_EID_MULTIPLE_BSSID = 71, WLAN_EID_BSS_COEX_2040 = 72, + WLAN_EID_BSS_INTOLERANT_CHL_REPORT = 73, WLAN_EID_OVERLAP_BSS_SCAN_PARAM = 74, WLAN_EID_RIC_DESCRIPTOR = 75, WLAN_EID_MMIE = 76, -- cgit v1.2.3-71-gd317 From f08dbf8a61462aa122b9b5077849a3f4bd84702a Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Wed, 8 Jan 2014 11:23:35 +0000 Subject: cpuidle: declare cpuidle_dev in cpuidle.h Declaring this allows drivers which need to initialise each struct cpuidle_device at initialisation time to make use of the structures already defined in cpuidle.c, rather than having to wastefully define their own. Signed-off-by: Paul Burton --- include/linux/cpuidle.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index b0238cba440b..99cbd7a74e9f 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -84,6 +84,7 @@ struct cpuidle_device { }; DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices); +DECLARE_PER_CPU(struct cpuidle_device, cpuidle_dev); /** * cpuidle_get_last_residency - retrieves the last state's residency time -- cgit v1.2.3-71-gd317 From c25dc82899e67a32fdcfb20dd72a37fc236fde2e Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 22 May 2014 17:07:30 -0600 Subject: PCI: Add DMA alias iterator In a mixed PCI/PCI-X/PCIe topology, bridges can take ownership of transactions, replacing the original requester ID with their own. Sometimes we just want to know the resulting device or resulting alias; other times we want each step in the chain. This iterator allows either usage. When an endpoint is connected via an unbroken chain of PCIe switches and root ports, it has no alias and its requester ID is visible to the root bus. When PCI/X get in the way, we pick up aliases for bridges. The reason why we potentially care about each step in the path is because of PCI-X. PCI-X has the concept of a requester ID, but bridges may or may not take ownership of various types of transactions. We therefore leave it to the consumer of this function to prune out what they don't care about rather than attempt to flatten the alias ourselves. Tested-by: George Spelvin Tested-by: Pat Erley Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas --- drivers/pci/search.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 4 +++ 2 files changed, 74 insertions(+) (limited to 'include') diff --git a/drivers/pci/search.c b/drivers/pci/search.c index 4a1b972efe7f..5601cdb8bbb3 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -17,6 +17,76 @@ DECLARE_RWSEM(pci_bus_sem); EXPORT_SYMBOL_GPL(pci_bus_sem); +/* + * pci_for_each_dma_alias - Iterate over DMA aliases for a device + * @pdev: starting downstream device + * @fn: function to call for each alias + * @data: opaque data to pass to @fn + * + * Starting @pdev, walk up the bus calling @fn for each possible alias + * of @pdev at the root bus. + */ +int pci_for_each_dma_alias(struct pci_dev *pdev, + int (*fn)(struct pci_dev *pdev, + u16 alias, void *data), void *data) +{ + struct pci_bus *bus; + int ret; + + ret = fn(pdev, PCI_DEVID(pdev->bus->number, pdev->devfn), data); + if (ret) + return ret; + + for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) { + struct pci_dev *tmp; + + /* Skip virtual buses */ + if (!bus->self) + continue; + + tmp = bus->self; + + /* + * PCIe-to-PCI/X bridges alias transactions from downstream + * devices using the subordinate bus number (PCI Express to + * PCI/PCI-X Bridge Spec, rev 1.0, sec 2.3). For all cases + * where the upstream bus is PCI/X we alias to the bridge + * (there are various conditions in the previous reference + * where the bridge may take ownership of transactions, even + * when the secondary interface is PCI-X). + */ + if (pci_is_pcie(tmp)) { + switch (pci_pcie_type(tmp)) { + case PCI_EXP_TYPE_ROOT_PORT: + case PCI_EXP_TYPE_UPSTREAM: + case PCI_EXP_TYPE_DOWNSTREAM: + continue; + case PCI_EXP_TYPE_PCI_BRIDGE: + ret = fn(tmp, + PCI_DEVID(tmp->subordinate->number, + PCI_DEVFN(0, 0)), data); + if (ret) + return ret; + continue; + case PCI_EXP_TYPE_PCIE_BRIDGE: + ret = fn(tmp, + PCI_DEVID(tmp->bus->number, + tmp->devfn), data); + if (ret) + return ret; + continue; + } + } else { + ret = fn(tmp, PCI_DEVID(tmp->bus->number, tmp->devfn), + data); + if (ret) + return ret; + } + } + + return ret; +} + /* * find the upstream PCIe-to-PCI bridge of a PCI device * if the device is PCIE, return NULL diff --git a/include/linux/pci.h b/include/linux/pci.h index aab57b4abe7f..14b074bbc841 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1795,6 +1795,10 @@ static inline struct eeh_dev *pci_dev_to_eeh_dev(struct pci_dev *pdev) } #endif +int pci_for_each_dma_alias(struct pci_dev *pdev, + int (*fn)(struct pci_dev *pdev, + u16 alias, void *data), void *data); + /** * pci_find_upstream_pcie_bridge - find upstream PCIe-to-PCI bridge of a device * @pdev: the PCI device -- cgit v1.2.3-71-gd317 From 6b121592f8a3fd2bd0de128637b76a0d0864d993 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 22 May 2014 17:07:36 -0600 Subject: PCI: Convert pci_dev_flags definitions to bit shifts Convert the pci_dev_flags definitions from decimal constants to bit shifts. We're only a few entries away from where using the decimal value becomes cumbersome. No functional change. Tested-by: George Spelvin Tested-by: Pat Erley Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 14b074bbc841..545903df00dc 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -164,13 +164,13 @@ enum pci_dev_flags { /* INTX_DISABLE in PCI_COMMAND register disables MSI * generation too. */ - PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) 1, + PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) (1 << 0), /* Device configuration is irrevocably lost if disabled into D3 */ - PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2, + PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) (1 << 1), /* Provide indication device is assigned by a Virtual Machine Manager */ - PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) 4, + PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) (1 << 2), /* Flag for quirk use to store if quirk-specific ACS is enabled */ - PCI_DEV_FLAGS_ACS_ENABLED_QUIRK = (__force pci_dev_flags_t) 8, + PCI_DEV_FLAGS_ACS_ENABLED_QUIRK = (__force pci_dev_flags_t) (1 << 3), }; enum pci_irq_reroute_variant { -- cgit v1.2.3-71-gd317 From 31c2b8153c58f11ddb80dfd392c16f13c2d709c6 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 22 May 2014 17:07:43 -0600 Subject: PCI: Add support for DMA alias quirks Some devices are broken and use a requester ID other than their physical devfn. Add a byte, using an existing gap in the pci_dev structure, to store an alternate "alias" devfn. A bit in the dev_flags tells us when this is valid. We then add the alias as one more step in the pci_for_each_dma_alias() iterator. Tested-by: George Spelvin Tested-by: Pat Erley Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas --- drivers/pci/search.c | 11 +++++++++++ include/linux/pci.h | 3 +++ 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/drivers/pci/search.c b/drivers/pci/search.c index 5601cdb8bbb3..2c19f3f40621 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -37,6 +37,17 @@ int pci_for_each_dma_alias(struct pci_dev *pdev, if (ret) return ret; + /* + * If the device is broken and uses an alias requester ID for + * DMA, iterate over that too. + */ + if (unlikely(pdev->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN)) { + ret = fn(pdev, PCI_DEVID(pdev->bus->number, + pdev->dma_alias_devfn), data); + if (ret) + return ret; + } + for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) { struct pci_dev *tmp; diff --git a/include/linux/pci.h b/include/linux/pci.h index 545903df00dc..9d4035c276f4 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -171,6 +171,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) (1 << 2), /* Flag for quirk use to store if quirk-specific ACS is enabled */ PCI_DEV_FLAGS_ACS_ENABLED_QUIRK = (__force pci_dev_flags_t) (1 << 3), + /* Flag to indicate the device uses dma_alias_devfn */ + PCI_DEV_FLAGS_DMA_ALIAS_DEVFN = (__force pci_dev_flags_t) (1 << 4), }; enum pci_irq_reroute_variant { @@ -268,6 +270,7 @@ struct pci_dev { u8 rom_base_reg; /* which config register controls the ROM */ u8 pin; /* which interrupt pin this device uses */ u16 pcie_flags_reg; /* cached PCIe Capabilities Register */ + u8 dma_alias_devfn;/* devfn of DMA alias, if any */ struct pci_driver *driver; /* which driver has allocated this device */ u64 dma_mask; /* Mask of the bits of bus address this -- cgit v1.2.3-71-gd317 From a20c93e3160e37ecccc738d8eef085c8507949ed Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Apr 2014 15:07:21 +0200 Subject: nfs: remove ->write_pageio_init from rpc ops The write_pageio_init method is just a very convoluted way to grab the right nfs_pageio_ops vector. The vector to chose is not a choice of protocol version, but just a pNFS vs MDS I/O choice that can simply be done inside nfs_pageio_init_write based on the presence of a layout driver, and a new force_mds flag to the special case of falling back to MDS I/O on a pNFS-capable volume. Signed-off-by: Christoph Hellwig Tested-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 4 ++-- fs/nfs/internal.h | 2 +- fs/nfs/nfs3proc.c | 1 - fs/nfs/nfs4proc.c | 1 - fs/nfs/pnfs.c | 16 +--------------- fs/nfs/pnfs.h | 8 -------- fs/nfs/proc.c | 1 - fs/nfs/write.c | 21 +++++++++++++-------- include/linux/nfs_xdr.h | 2 -- 9 files changed, 17 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index b8797ae6831f..6a31102b0819 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -564,7 +564,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) dreq->count = 0; get_dreq(dreq); - NFS_PROTO(dreq->inode)->write_pageio_init(&desc, dreq->inode, FLUSH_STABLE, + nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; @@ -874,7 +874,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, size_t requested_bytes = 0; unsigned long seg; - NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE, + nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; get_dreq(dreq); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index dd8bfc2e2464..8431083de179 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -422,7 +422,7 @@ int nfs_remount(struct super_block *sb, int *flags, char *raw_data); /* write.c */ extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, - struct inode *inode, int ioflags, + struct inode *inode, int ioflags, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops); extern struct nfs_write_header *nfs_writehdr_alloc(void); extern void nfs_writehdr_free(struct nfs_pgio_header *hdr); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index db60149c4579..e98488053906 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -951,7 +951,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .read_rpc_prepare = nfs3_proc_read_rpc_prepare, .read_done = nfs3_read_done, .write_setup = nfs3_proc_write_setup, - .write_pageio_init = nfs_pageio_init_write, .write_rpc_prepare = nfs3_proc_write_rpc_prepare, .write_done = nfs3_write_done, .commit_setup = nfs3_proc_commit_setup, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 397be39c6dc8..8da0c62966b5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8437,7 +8437,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .read_rpc_prepare = nfs4_proc_read_rpc_prepare, .read_done = nfs4_read_done, .write_setup = nfs4_proc_write_setup, - .write_pageio_init = pnfs_pageio_init_write, .write_rpc_prepare = nfs4_proc_write_rpc_prepare, .write_done = nfs4_write_done, .commit_setup = nfs4_proc_commit_setup, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index cb53d450ae32..9edac9f01c2a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1447,20 +1447,6 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0); } -void -pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, - int ioflags, - const struct nfs_pgio_completion_ops *compl_ops) -{ - struct nfs_server *server = NFS_SERVER(inode); - struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; - - if (ld == NULL) - nfs_pageio_init_write(pgio, inode, ioflags, compl_ops); - else - nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, server->wsize, ioflags); -} - bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) @@ -1496,7 +1482,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode, LIST_HEAD(failed); /* Resend all requests through the MDS */ - nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, compl_ops); + nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops); pgio.pg_dreq = dreq; while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 023793909778..e9ac8fbaee3d 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -182,8 +182,6 @@ void pnfs_put_lseg(struct pnfs_layout_segment *lseg); void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, const struct nfs_pgio_completion_ops *); -void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, - int, const struct nfs_pgio_completion_ops *); void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); void unset_pnfs_layoutdriver(struct nfs_server *); @@ -467,12 +465,6 @@ static inline void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, str nfs_pageio_init_read(pgio, inode, compl_ops); } -static inline void pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, - const struct nfs_pgio_completion_ops *compl_ops) -{ - nfs_pageio_init_write(pgio, inode, ioflags, compl_ops); -} - static inline int pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how, struct nfs_commit_info *cinfo) diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index e55ce9e8b034..f9cc29590a18 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -739,7 +739,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .read_rpc_prepare = nfs_proc_read_rpc_prepare, .read_done = nfs_read_done, .write_setup = nfs_proc_write_setup, - .write_pageio_init = nfs_pageio_init_write, .write_rpc_prepare = nfs_proc_write_rpc_prepare, .write_done = nfs_write_done, .commit_setup = nfs_proc_commit_setup, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index cd7c651f9b84..ee6d46fde76c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -354,10 +354,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc struct nfs_pageio_descriptor pgio; int err; - NFS_PROTO(page_file_mapping(page)->host)->write_pageio_init(&pgio, - page->mapping->host, - wb_priority(wbc), - &nfs_async_write_completion_ops); + nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc), + false, &nfs_async_write_completion_ops); err = nfs_do_writepage(page, wbc, &pgio); nfs_pageio_complete(&pgio); if (err < 0) @@ -400,7 +398,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); - NFS_PROTO(inode)->write_pageio_init(&pgio, inode, wb_priority(wbc), &nfs_async_write_completion_ops); + nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false, + &nfs_async_write_completion_ops); err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); nfs_pageio_complete(&pgio); @@ -1282,11 +1281,17 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = { }; void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, - struct inode *inode, int ioflags, + struct inode *inode, int ioflags, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops) { - nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops, - NFS_SERVER(inode)->wsize, ioflags); + struct nfs_server *server = NFS_SERVER(inode); + const struct nfs_pageio_ops *pg_ops = &nfs_pageio_write_ops; + +#ifdef CONFIG_NFS_V4_1 + if (server->pnfs_curr_ld && !force_mds) + pg_ops = server->pnfs_curr_ld->pg_write_ops; +#endif + nfs_pageio_init(pgio, inode, pg_ops, compl_ops, server->wsize, ioflags); } EXPORT_SYMBOL_GPL(nfs_pageio_init_write); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6fb5b2335b59..78216f859527 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1471,8 +1471,6 @@ struct nfs_rpc_ops { int (*read_rpc_prepare)(struct rpc_task *, struct nfs_read_data *); int (*read_done) (struct rpc_task *, struct nfs_read_data *); void (*write_setup) (struct nfs_write_data *, struct rpc_message *); - void (*write_pageio_init)(struct nfs_pageio_descriptor *, struct inode *, int, - const struct nfs_pgio_completion_ops *); int (*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *); int (*write_done) (struct rpc_task *, struct nfs_write_data *); void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *); -- cgit v1.2.3-71-gd317 From fab5fc25d230edcc8ee72367e505955a2fae0cac Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Apr 2014 15:07:22 +0200 Subject: nfs: remove ->read_pageio_init from rpc ops The read_pageio_init method is just a very convoluted way to grab the right nfs_pageio_ops vector. The vector to chose is not a choice of protocol version, but just a pNFS vs MDS I/O choice that can simply be done inside nfs_pageio_init_read based on the presence of a layout driver, and a new force_mds flag to the special case of falling back to MDS I/O on a pNFS-capable volume. Signed-off-by: Christoph Hellwig Tested-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 2 +- fs/nfs/internal.h | 2 +- fs/nfs/nfs3proc.c | 1 - fs/nfs/nfs4proc.c | 1 - fs/nfs/pnfs.c | 15 +-------------- fs/nfs/pnfs.h | 9 --------- fs/nfs/proc.c | 1 - fs/nfs/read.c | 19 ++++++++++++++----- include/linux/nfs_xdr.h | 2 -- 9 files changed, 17 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 6a31102b0819..bbe688e2cc89 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -424,7 +424,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, size_t requested_bytes = 0; unsigned long seg; - NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode, + nfs_pageio_init_read(&desc, dreq->inode, false, &nfs_direct_read_completion_ops); get_dreq(dreq); desc.pg_dreq = dreq; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 8431083de179..98fe618db2aa 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -398,7 +398,7 @@ struct nfs_pgio_completion_ops; extern struct nfs_read_header *nfs_readhdr_alloc(void); extern void nfs_readhdr_free(struct nfs_pgio_header *hdr); extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, - struct inode *inode, + struct inode *inode, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops); extern int nfs_initiate_read(struct rpc_clnt *clnt, struct nfs_read_data *data, diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index e98488053906..d873241a9b3a 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -947,7 +947,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .pathconf = nfs3_proc_pathconf, .decode_dirent = nfs3_decode_dirent, .read_setup = nfs3_proc_read_setup, - .read_pageio_init = nfs_pageio_init_read, .read_rpc_prepare = nfs3_proc_read_rpc_prepare, .read_done = nfs3_read_done, .write_setup = nfs3_proc_write_setup, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8da0c62966b5..21cd1f2ee35a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8433,7 +8433,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .set_capabilities = nfs4_server_capabilities, .decode_dirent = nfs4_decode_dirent, .read_setup = nfs4_proc_read_setup, - .read_pageio_init = pnfs_pageio_init_read, .read_rpc_prepare = nfs4_proc_read_rpc_prepare, .read_done = nfs4_read_done, .write_setup = nfs4_proc_write_setup, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 9edac9f01c2a..3d5bc2baafd1 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1434,19 +1434,6 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, } EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); -void -pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, - const struct nfs_pgio_completion_ops *compl_ops) -{ - struct nfs_server *server = NFS_SERVER(inode); - struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; - - if (ld == NULL) - nfs_pageio_init_read(pgio, inode, compl_ops); - else - nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0); -} - bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) @@ -1641,7 +1628,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode, LIST_HEAD(failed); /* Resend all requests through the MDS */ - nfs_pageio_init_read(&pgio, inode, compl_ops); + nfs_pageio_init_read(&pgio, inode, true, compl_ops); pgio.pg_dreq = dreq; while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index e9ac8fbaee3d..94a9a1834b3f 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -180,9 +180,6 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); void pnfs_put_lseg(struct pnfs_layout_segment *lseg); -void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, - const struct nfs_pgio_completion_ops *); - void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); void unset_pnfs_layoutdriver(struct nfs_server *); void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); @@ -459,12 +456,6 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s) { } -static inline void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, - const struct nfs_pgio_completion_ops *compl_ops) -{ - nfs_pageio_init_read(pgio, inode, compl_ops); -} - static inline int pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how, struct nfs_commit_info *cinfo) diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index f9cc29590a18..8cc227fcd4d2 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -735,7 +735,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .pathconf = nfs_proc_pathconf, .decode_dirent = nfs2_decode_dirent, .read_setup = nfs_proc_read_setup, - .read_pageio_init = nfs_pageio_init_read, .read_rpc_prepare = nfs_proc_read_rpc_prepare, .read_done = nfs_read_done, .write_setup = nfs_proc_write_setup, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 411aedda14bb..7f87461be3a9 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -24,6 +24,7 @@ #include "internal.h" #include "iostat.h" #include "fscache.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -114,11 +115,17 @@ int nfs_return_empty_page(struct page *page) } void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, - struct inode *inode, + struct inode *inode, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops) { - nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops, - NFS_SERVER(inode)->rsize, 0); + struct nfs_server *server = NFS_SERVER(inode); + const struct nfs_pageio_ops *pg_ops = &nfs_pageio_read_ops; + +#ifdef CONFIG_NFS_V4_1 + if (server->pnfs_curr_ld && !force_mds) + pg_ops = server->pnfs_curr_ld->pg_read_ops; +#endif + nfs_pageio_init(pgio, inode, pg_ops, compl_ops, server->rsize, 0); } EXPORT_SYMBOL_GPL(nfs_pageio_init_read); @@ -147,7 +154,8 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, if (len < PAGE_CACHE_SIZE) zero_user_segment(page, len, PAGE_CACHE_SIZE); - NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops); + nfs_pageio_init_read(&pgio, inode, false, + &nfs_async_read_completion_ops); nfs_pageio_add_request(&pgio, new); nfs_pageio_complete(&pgio); NFS_I(inode)->read_io += pgio.pg_bytes_written; @@ -654,7 +662,8 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (ret == 0) goto read_complete; /* all pages were read */ - NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops); + nfs_pageio_init_read(&pgio, inode, false, + &nfs_async_read_completion_ops); ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 78216f859527..3e8fc1fe585b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1466,8 +1466,6 @@ struct nfs_rpc_ops { int (*set_capabilities)(struct nfs_server *, struct nfs_fh *); int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int); void (*read_setup) (struct nfs_read_data *, struct rpc_message *); - void (*read_pageio_init)(struct nfs_pageio_descriptor *, struct inode *, - const struct nfs_pgio_completion_ops *); int (*read_rpc_prepare)(struct rpc_task *, struct nfs_read_data *); int (*read_done) (struct rpc_task *, struct nfs_read_data *); void (*write_setup) (struct nfs_write_data *, struct rpc_message *); -- cgit v1.2.3-71-gd317 From c8fe16e3f96a9bb95a10cedb19d2be2d2d580940 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 28 May 2014 14:57:02 -0600 Subject: PCI: Add support for PCIe-to-PCI bridge DMA alias quirks Several PCIe-to-PCI bridges fail to provide a PCIe capability, causing us to handle them as conventional PCI devices when they really use the requester ID of the secondary bus. We need to differentiate these from PCIe-to-PCI bridges that actually use the conventional PCI ID when a PCIe capability is not present, such as those found on the root complex of may Intel chipsets. Add a dev_flag bit to identify devices to be handled as standard PCIe-to-PCI bridges. Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas --- drivers/pci/search.c | 10 ++++++++-- include/linux/pci.h | 2 ++ 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/pci/search.c b/drivers/pci/search.c index 2c19f3f40621..df38f73f091f 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -88,8 +88,14 @@ int pci_for_each_dma_alias(struct pci_dev *pdev, continue; } } else { - ret = fn(tmp, PCI_DEVID(tmp->bus->number, tmp->devfn), - data); + if (tmp->dev_flags & PCI_DEV_FLAG_PCIE_BRIDGE_ALIAS) + ret = fn(tmp, + PCI_DEVID(tmp->subordinate->number, + PCI_DEVFN(0, 0)), data); + else + ret = fn(tmp, + PCI_DEVID(tmp->bus->number, + tmp->devfn), data); if (ret) return ret; } diff --git a/include/linux/pci.h b/include/linux/pci.h index 9d4035c276f4..85ab35e974a9 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -173,6 +173,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_ACS_ENABLED_QUIRK = (__force pci_dev_flags_t) (1 << 3), /* Flag to indicate the device uses dma_alias_devfn */ PCI_DEV_FLAGS_DMA_ALIAS_DEVFN = (__force pci_dev_flags_t) (1 << 4), + /* Use a PCIe-to-PCI bridge alias even if !pci_is_pcie */ + PCI_DEV_FLAG_PCIE_BRIDGE_ALIAS = (__force pci_dev_flags_t) (1 << 5), }; enum pci_irq_reroute_variant { -- cgit v1.2.3-71-gd317 From 3c6b899c49e5e9c2803b59ee553eddaf69cea7f6 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:24 -0400 Subject: NFS: Create a common argument structure for reads and writes Reads and writes have very similar arguments. This patch combines them together and documents the few fields used only by write. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs2xdr.c | 8 ++++---- fs/nfs/nfs3xdr.c | 8 ++++---- fs/nfs/nfs4proc.c | 4 ++-- fs/nfs/nfs4xdr.c | 10 ++++++---- fs/nfs/read.c | 2 +- fs/nfs/write.c | 2 +- include/linux/nfs_xdr.h | 47 +++++++++++++++++++---------------------------- 7 files changed, 37 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 62db136339ea..461cd8bd9401 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -613,7 +613,7 @@ static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req, * }; */ static void encode_readargs(struct xdr_stream *xdr, - const struct nfs_readargs *args) + const struct nfs_pgio_args *args) { u32 offset = args->offset; u32 count = args->count; @@ -629,7 +629,7 @@ static void encode_readargs(struct xdr_stream *xdr, static void nfs2_xdr_enc_readargs(struct rpc_rqst *req, struct xdr_stream *xdr, - const struct nfs_readargs *args) + const struct nfs_pgio_args *args) { encode_readargs(xdr, args); prepare_reply_buffer(req, args->pages, args->pgbase, @@ -649,7 +649,7 @@ static void nfs2_xdr_enc_readargs(struct rpc_rqst *req, * }; */ static void encode_writeargs(struct xdr_stream *xdr, - const struct nfs_writeargs *args) + const struct nfs_pgio_args *args) { u32 offset = args->offset; u32 count = args->count; @@ -669,7 +669,7 @@ static void encode_writeargs(struct xdr_stream *xdr, static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req, struct xdr_stream *xdr, - const struct nfs_writeargs *args) + const struct nfs_pgio_args *args) { encode_writeargs(xdr, args); xdr->buf->flags |= XDRBUF_WRITE; diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index fa6d72131c19..02f16c212007 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -953,7 +953,7 @@ static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req, * }; */ static void encode_read3args(struct xdr_stream *xdr, - const struct nfs_readargs *args) + const struct nfs_pgio_args *args) { __be32 *p; @@ -966,7 +966,7 @@ static void encode_read3args(struct xdr_stream *xdr, static void nfs3_xdr_enc_read3args(struct rpc_rqst *req, struct xdr_stream *xdr, - const struct nfs_readargs *args) + const struct nfs_pgio_args *args) { encode_read3args(xdr, args); prepare_reply_buffer(req, args->pages, args->pgbase, @@ -992,7 +992,7 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req, * }; */ static void encode_write3args(struct xdr_stream *xdr, - const struct nfs_writeargs *args) + const struct nfs_pgio_args *args) { __be32 *p; @@ -1008,7 +1008,7 @@ static void encode_write3args(struct xdr_stream *xdr, static void nfs3_xdr_enc_write3args(struct rpc_rqst *req, struct xdr_stream *xdr, - const struct nfs_writeargs *args) + const struct nfs_pgio_args *args) { encode_write3args(xdr, args); xdr->buf->flags |= XDRBUF_WRITE; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 21cd1f2ee35a..4794ca693367 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4055,7 +4055,7 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) } static bool nfs4_read_stateid_changed(struct rpc_task *task, - struct nfs_readargs *args) + struct nfs_pgio_args *args) { if (!nfs4_error_stateid_expired(task->tk_status) || @@ -4121,7 +4121,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data } static bool nfs4_write_stateid_changed(struct rpc_task *task, - struct nfs_writeargs *args) + struct nfs_pgio_args *args) { if (!nfs4_error_stateid_expired(task->tk_status) || diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 73ce8d4fe2c8..032159c36a57 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1556,7 +1556,8 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr); } -static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) +static void encode_read(struct xdr_stream *xdr, const struct nfs_pgio_args *args, + struct compound_hdr *hdr) { __be32 *p; @@ -1701,7 +1702,8 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4 encode_nfs4_verifier(xdr, &arg->confirm); } -static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) +static void encode_write(struct xdr_stream *xdr, const struct nfs_pgio_args *args, + struct compound_hdr *hdr) { __be32 *p; @@ -2451,7 +2453,7 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr, * Encode a READ request */ static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr, - struct nfs_readargs *args) + struct nfs_pgio_args *args) { struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), @@ -2513,7 +2515,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, * Encode a WRITE request */ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr, - struct nfs_writeargs *args) + struct nfs_pgio_args *args) { struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 7f87461be3a9..46d555206023 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -470,7 +470,7 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) { - struct nfs_readargs *argp = &data->args; + struct nfs_pgio_args *argp = &data->args; struct nfs_readres *resp = &data->res; /* This is a short read! */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ee6d46fde76c..25ba3830ec8b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1388,7 +1388,7 @@ static int nfs_should_remove_suid(const struct inode *inode) */ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) { - struct nfs_writeargs *argp = &data->args; + struct nfs_pgio_args *argp = &data->args; struct nfs_writeres *resp = &data->res; struct inode *inode = data->header->inode; int status; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 3e8fc1fe585b..5875001928f9 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -491,18 +491,6 @@ struct nfs4_delegreturnres { /* * Arguments to the read call. */ -struct nfs_readargs { - struct nfs4_sequence_args seq_args; - struct nfs_fh * fh; - struct nfs_open_context *context; - struct nfs_lock_context *lock_context; - nfs4_stateid stateid; - __u64 offset; - __u32 count; - unsigned int pgbase; - struct page ** pages; -}; - struct nfs_readres { struct nfs4_sequence_res seq_res; struct nfs_fattr * fattr; @@ -513,20 +501,6 @@ struct nfs_readres { /* * Arguments to the write call. */ -struct nfs_writeargs { - struct nfs4_sequence_args seq_args; - struct nfs_fh * fh; - struct nfs_open_context *context; - struct nfs_lock_context *lock_context; - nfs4_stateid stateid; - __u64 offset; - __u32 count; - enum nfs3_stable_how stable; - unsigned int pgbase; - struct page ** pages; - const u32 * bitmask; -}; - struct nfs_write_verifier { char data[8]; }; @@ -544,6 +518,23 @@ struct nfs_writeres { const struct nfs_server *server; }; +/* + * Arguments shared by the read and write call. + */ +struct nfs_pgio_args { + struct nfs4_sequence_args seq_args; + struct nfs_fh * fh; + struct nfs_open_context *context; + struct nfs_lock_context *lock_context; + nfs4_stateid stateid; + __u64 offset; + __u32 count; + unsigned int pgbase; + struct page ** pages; + const u32 * bitmask; /* used by write */ + enum nfs3_stable_how stable; /* used by write */ +}; + /* * Arguments to the commit call. */ @@ -1269,7 +1260,7 @@ struct nfs_read_data { struct list_head list; struct rpc_task task; struct nfs_fattr fattr; /* fattr storage */ - struct nfs_readargs args; + struct nfs_pgio_args args; struct nfs_readres res; unsigned long timestamp; /* For lease renewal */ int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data); @@ -1321,7 +1312,7 @@ struct nfs_write_data { struct rpc_task task; struct nfs_fattr fattr; struct nfs_writeverf verf; - struct nfs_writeargs args; /* argument struct */ + struct nfs_pgio_args args; /* argument struct */ struct nfs_writeres res; /* result struct */ unsigned long timestamp; /* For lease renewal */ int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data); -- cgit v1.2.3-71-gd317 From 9137bdf3d241fc2cbeb2a8ced51d1546150aa6a1 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:25 -0400 Subject: NFS: Create a common results structure for reads and writes Reads and writes have very similar results. This patch combines the two structs together with comments to show where the differing fields are used. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs2xdr.c | 6 +++--- fs/nfs/nfs3xdr.c | 8 ++++---- fs/nfs/nfs4xdr.c | 9 +++++---- fs/nfs/read.c | 2 +- fs/nfs/write.c | 2 +- include/linux/nfs_xdr.h | 32 ++++++++++++-------------------- 6 files changed, 26 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 461cd8bd9401..5f61b83f4a1c 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -103,7 +103,7 @@ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) /* * typedef opaque nfsdata<>; */ -static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_readres *result) +static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_pgio_res *result) { u32 recvd, count; __be32 *p; @@ -857,7 +857,7 @@ out_default: * }; */ static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr, - struct nfs_readres *result) + struct nfs_pgio_res *result) { enum nfs_stat status; int error; @@ -878,7 +878,7 @@ out_default: } static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr, - struct nfs_writeres *result) + struct nfs_pgio_res *result) { /* All NFSv2 writes are "file sync" writes */ result->verf->committed = NFS_FILE_SYNC; diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 02f16c212007..8f4cbe7f4aa8 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1589,7 +1589,7 @@ out_default: * }; */ static int decode_read3resok(struct xdr_stream *xdr, - struct nfs_readres *result) + struct nfs_pgio_res *result) { u32 eof, count, ocount, recvd; __be32 *p; @@ -1625,7 +1625,7 @@ out_overflow: } static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, - struct nfs_readres *result) + struct nfs_pgio_res *result) { enum nfs_stat status; int error; @@ -1673,7 +1673,7 @@ out_status: * }; */ static int decode_write3resok(struct xdr_stream *xdr, - struct nfs_writeres *result) + struct nfs_pgio_res *result) { __be32 *p; @@ -1697,7 +1697,7 @@ out_eio: } static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr, - struct nfs_writeres *result) + struct nfs_pgio_res *result) { enum nfs_stat status; int error; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 032159c36a57..939ae606cfa4 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5087,7 +5087,8 @@ static int decode_putrootfh(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_PUTROOTFH); } -static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_readres *res) +static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, + struct nfs_pgio_res *res) { __be32 *p; uint32_t count, eof, recvd; @@ -5341,7 +5342,7 @@ static int decode_setclientid_confirm(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM); } -static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res) +static int decode_write(struct xdr_stream *xdr, struct nfs_pgio_res *res) { __be32 *p; int status; @@ -6638,7 +6639,7 @@ out: * Decode Read response */ static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr, - struct nfs_readres *res) + struct nfs_pgio_res *res) { struct compound_hdr hdr; int status; @@ -6663,7 +6664,7 @@ out: * Decode WRITE response */ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr, - struct nfs_writeres *res) + struct nfs_pgio_res *res) { struct compound_hdr hdr; int status; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 46d555206023..473bba35a2cb 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -471,7 +471,7 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) { struct nfs_pgio_args *argp = &data->args; - struct nfs_readres *resp = &data->res; + struct nfs_pgio_res *resp = &data->res; /* This is a short read! */ nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 25ba3830ec8b..d392a70092fe 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1389,7 +1389,7 @@ static int nfs_should_remove_suid(const struct inode *inode) void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) { struct nfs_pgio_args *argp = &data->args; - struct nfs_writeres *resp = &data->res; + struct nfs_pgio_res *resp = &data->res; struct inode *inode = data->header->inode; int status; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 5875001928f9..381f832b03c6 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -488,16 +488,6 @@ struct nfs4_delegreturnres { const struct nfs_server *server; }; -/* - * Arguments to the read call. - */ -struct nfs_readres { - struct nfs4_sequence_res seq_res; - struct nfs_fattr * fattr; - __u32 count; - int eof; -}; - /* * Arguments to the write call. */ @@ -510,14 +500,6 @@ struct nfs_writeverf { enum nfs3_stable_how committed; }; -struct nfs_writeres { - struct nfs4_sequence_res seq_res; - struct nfs_fattr * fattr; - struct nfs_writeverf * verf; - __u32 count; - const struct nfs_server *server; -}; - /* * Arguments shared by the read and write call. */ @@ -535,6 +517,16 @@ struct nfs_pgio_args { enum nfs3_stable_how stable; /* used by write */ }; +struct nfs_pgio_res { + struct nfs4_sequence_res seq_res; + struct nfs_fattr * fattr; + __u32 count; + int eof; /* used by read */ + struct nfs_writeverf * verf; /* used by write */ + const struct nfs_server *server; /* used by write */ + +}; + /* * Arguments to the commit call. */ @@ -1261,7 +1253,7 @@ struct nfs_read_data { struct rpc_task task; struct nfs_fattr fattr; /* fattr storage */ struct nfs_pgio_args args; - struct nfs_readres res; + struct nfs_pgio_res res; unsigned long timestamp; /* For lease renewal */ int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data); __u64 mds_offset; @@ -1313,7 +1305,7 @@ struct nfs_write_data { struct nfs_fattr fattr; struct nfs_writeverf verf; struct nfs_pgio_args args; /* argument struct */ - struct nfs_writeres res; /* result struct */ + struct nfs_pgio_res res; /* result struct */ unsigned long timestamp; /* For lease renewal */ int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data); __u64 mds_offset; /* Filelayout dense stripe */ -- cgit v1.2.3-71-gd317 From 9c7e1b3d50b56b8d8f6237ed232350b7c6476cd5 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:26 -0400 Subject: NFS: Create a common read and write data struct At this point, the only difference between nfs_read_data and nfs_write_data is the write verifier. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 22 ++++++++++---------- fs/nfs/internal.h | 10 ++++----- fs/nfs/nfs3proc.c | 12 +++++------ fs/nfs/nfs4_fs.h | 4 ++-- fs/nfs/nfs4filelayout.c | 34 +++++++++++++++---------------- fs/nfs/nfs4proc.c | 30 +++++++++++++-------------- fs/nfs/nfs4trace.h | 8 ++++---- fs/nfs/objlayout/objio_osd.c | 6 +++--- fs/nfs/objlayout/objlayout.c | 16 +++++++-------- fs/nfs/objlayout/objlayout.h | 8 ++++---- fs/nfs/pnfs.c | 26 ++++++++++++------------ fs/nfs/pnfs.h | 10 ++++----- fs/nfs/proc.c | 12 +++++------ fs/nfs/read.c | 32 ++++++++++++++--------------- fs/nfs/write.c | 36 ++++++++++++++++---------------- include/linux/nfs_fs.h | 4 ++-- include/linux/nfs_xdr.h | 44 ++++++++++++++-------------------------- 17 files changed, 150 insertions(+), 164 deletions(-) (limited to 'include') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 65d849bdf77a..206cc68c9694 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -210,7 +210,7 @@ static void bl_end_io_read(struct bio *bio, int err) SetPageUptodate(bvec->bv_page); if (err) { - struct nfs_read_data *rdata = par->data; + struct nfs_pgio_data *rdata = par->data; struct nfs_pgio_header *header = rdata->header; if (!header->pnfs_error) @@ -224,17 +224,17 @@ static void bl_end_io_read(struct bio *bio, int err) static void bl_read_cleanup(struct work_struct *work) { struct rpc_task *task; - struct nfs_read_data *rdata; + struct nfs_pgio_data *rdata; dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); - rdata = container_of(task, struct nfs_read_data, task); + rdata = container_of(task, struct nfs_pgio_data, task); pnfs_ld_read_done(rdata); } static void bl_end_par_io_read(void *data, int unused) { - struct nfs_read_data *rdata = data; + struct nfs_pgio_data *rdata = data; rdata->task.tk_status = rdata->header->pnfs_error; INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); @@ -242,7 +242,7 @@ bl_end_par_io_read(void *data, int unused) } static enum pnfs_try_status -bl_read_pagelist(struct nfs_read_data *rdata) +bl_read_pagelist(struct nfs_pgio_data *rdata) { struct nfs_pgio_header *header = rdata->header; int i, hole; @@ -390,7 +390,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err) } if (unlikely(err)) { - struct nfs_write_data *data = par->data; + struct nfs_pgio_data *data = par->data; struct nfs_pgio_header *header = data->header; if (!header->pnfs_error) @@ -405,7 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err) { struct parallel_io *par = bio->bi_private; const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct nfs_write_data *data = par->data; + struct nfs_pgio_data *data = par->data; struct nfs_pgio_header *header = data->header; if (!uptodate) { @@ -423,10 +423,10 @@ static void bl_end_io_write(struct bio *bio, int err) static void bl_write_cleanup(struct work_struct *work) { struct rpc_task *task; - struct nfs_write_data *wdata; + struct nfs_pgio_data *wdata; dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); - wdata = container_of(task, struct nfs_write_data, task); + wdata = container_of(task, struct nfs_pgio_data, task); if (likely(!wdata->header->pnfs_error)) { /* Marks for LAYOUTCOMMIT */ mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg), @@ -438,7 +438,7 @@ static void bl_write_cleanup(struct work_struct *work) /* Called when last of bios associated with a bl_write_pagelist call finishes */ static void bl_end_par_io_write(void *data, int num_se) { - struct nfs_write_data *wdata = data; + struct nfs_pgio_data *wdata = data; if (unlikely(wdata->header->pnfs_error)) { bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval, @@ -673,7 +673,7 @@ check_page: } static enum pnfs_try_status -bl_write_pagelist(struct nfs_write_data *wdata, int sync) +bl_write_pagelist(struct nfs_pgio_data *wdata, int sync) { struct nfs_pgio_header *header = wdata->header; int i, ret, npg_zero, pg_index, last = 0; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 98fe618db2aa..af01b80412dd 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -401,13 +401,13 @@ extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops); extern int nfs_initiate_read(struct rpc_clnt *clnt, - struct nfs_read_data *data, + struct nfs_pgio_data *data, const struct rpc_call_ops *call_ops, int flags); extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); -extern void nfs_readdata_release(struct nfs_read_data *rdata); +extern void nfs_readdata_release(struct nfs_pgio_data *rdata); /* super.c */ void nfs_clone_super(struct super_block *, struct nfs_mount_info *); @@ -429,10 +429,10 @@ extern void nfs_writehdr_free(struct nfs_pgio_header *hdr); extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); -extern void nfs_writedata_release(struct nfs_write_data *wdata); +extern void nfs_writedata_release(struct nfs_pgio_data *wdata); extern void nfs_commit_free(struct nfs_commit_data *p); extern int nfs_initiate_write(struct rpc_clnt *clnt, - struct nfs_write_data *data, + struct nfs_pgio_data *data, const struct rpc_call_ops *call_ops, int how, int flags); extern void nfs_write_prepare(struct rpc_task *task, void *calldata); @@ -492,7 +492,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode) extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); /* nfs4proc.c */ -extern void __nfs4_read_done_cb(struct nfs_read_data *); +extern void __nfs4_read_done_cb(struct nfs_pgio_data *); extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, const char *ip_addr); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index d873241a9b3a..d235369c3dfb 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -795,7 +795,7 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return status; } -static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) +static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_data *data) { struct inode *inode = data->header->inode; @@ -807,18 +807,18 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) return 0; } -static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) +static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; } -static int nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) +static int nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) { rpc_call_start(task); return 0; } -static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) +static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_data *data) { struct inode *inode = data->header->inode; @@ -829,12 +829,12 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) return 0; } -static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) +static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; } -static int nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) +static int nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) { rpc_call_start(task); return 0; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index e1d1badbe53c..f63cb87cd730 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -337,7 +337,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode, */ static inline void nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, - struct rpc_message *msg, struct nfs_write_data *wdata) + struct rpc_message *msg, struct nfs_pgio_data *wdata) { if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) && !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags)) @@ -369,7 +369,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags, static inline void nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, - struct rpc_message *msg, struct nfs_write_data *wdata) + struct rpc_message *msg, struct nfs_pgio_data *wdata) { } #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index b9a35c05b60f..e6936147ad95 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -84,7 +84,7 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset) BUG(); } -static void filelayout_reset_write(struct nfs_write_data *data) +static void filelayout_reset_write(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; struct rpc_task *task = &data->task; @@ -105,7 +105,7 @@ static void filelayout_reset_write(struct nfs_write_data *data) } } -static void filelayout_reset_read(struct nfs_read_data *data) +static void filelayout_reset_read(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; struct rpc_task *task = &data->task; @@ -243,7 +243,7 @@ wait_on_recovery: /* NFS_PROTO call done callback routines */ static int filelayout_read_done_cb(struct rpc_task *task, - struct nfs_read_data *data) + struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; int err; @@ -270,7 +270,7 @@ static int filelayout_read_done_cb(struct rpc_task *task, * rfc5661 is not clear about which credential should be used. */ static void -filelayout_set_layoutcommit(struct nfs_write_data *wdata) +filelayout_set_layoutcommit(struct nfs_pgio_data *wdata) { struct nfs_pgio_header *hdr = wdata->header; @@ -305,7 +305,7 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg) */ static void filelayout_read_prepare(struct rpc_task *task, void *data) { - struct nfs_read_data *rdata = data; + struct nfs_pgio_data *rdata = data; if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) { rpc_exit(task, -EIO); @@ -317,7 +317,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) rpc_exit(task, 0); return; } - rdata->read_done_cb = filelayout_read_done_cb; + rdata->pgio_done_cb = filelayout_read_done_cb; if (nfs41_setup_sequence(rdata->ds_clp->cl_session, &rdata->args.seq_args, @@ -331,7 +331,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) static void filelayout_read_call_done(struct rpc_task *task, void *data) { - struct nfs_read_data *rdata = data; + struct nfs_pgio_data *rdata = data; dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); @@ -347,14 +347,14 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data) static void filelayout_read_count_stats(struct rpc_task *task, void *data) { - struct nfs_read_data *rdata = data; + struct nfs_pgio_data *rdata = data; rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics); } static void filelayout_read_release(void *data) { - struct nfs_read_data *rdata = data; + struct nfs_pgio_data *rdata = data; struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout; filelayout_fenceme(lo->plh_inode, lo); @@ -363,7 +363,7 @@ static void filelayout_read_release(void *data) } static int filelayout_write_done_cb(struct rpc_task *task, - struct nfs_write_data *data) + struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; int err; @@ -419,7 +419,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task, static void filelayout_write_prepare(struct rpc_task *task, void *data) { - struct nfs_write_data *wdata = data; + struct nfs_pgio_data *wdata = data; if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) { rpc_exit(task, -EIO); @@ -443,7 +443,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data) static void filelayout_write_call_done(struct rpc_task *task, void *data) { - struct nfs_write_data *wdata = data; + struct nfs_pgio_data *wdata = data; if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) && task->tk_status == 0) { @@ -457,14 +457,14 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data) static void filelayout_write_count_stats(struct rpc_task *task, void *data) { - struct nfs_write_data *wdata = data; + struct nfs_pgio_data *wdata = data; rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics); } static void filelayout_write_release(void *data) { - struct nfs_write_data *wdata = data; + struct nfs_pgio_data *wdata = data; struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout; filelayout_fenceme(lo->plh_inode, lo); @@ -529,7 +529,7 @@ static const struct rpc_call_ops filelayout_commit_call_ops = { }; static enum pnfs_try_status -filelayout_read_pagelist(struct nfs_read_data *data) +filelayout_read_pagelist(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; struct pnfs_layout_segment *lseg = hdr->lseg; @@ -575,7 +575,7 @@ filelayout_read_pagelist(struct nfs_read_data *data) /* Perform async writes. */ static enum pnfs_try_status -filelayout_write_pagelist(struct nfs_write_data *data, int sync) +filelayout_write_pagelist(struct nfs_pgio_data *data, int sync) { struct nfs_pgio_header *hdr = data->header; struct pnfs_layout_segment *lseg = hdr->lseg; @@ -600,7 +600,7 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) __func__, hdr->inode->i_ino, sync, (size_t) data->args.count, offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); - data->write_done_cb = filelayout_write_done_cb; + data->pgio_done_cb = filelayout_write_done_cb; atomic_inc(&ds->ds_clp->cl_count); data->ds_clp = ds->ds_clp; fh = nfs4_fl_select_ds_fh(lseg, j); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4794ca693367..e793aa91454a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4033,12 +4033,12 @@ static bool nfs4_error_stateid_expired(int err) return false; } -void __nfs4_read_done_cb(struct nfs_read_data *data) +void __nfs4_read_done_cb(struct nfs_pgio_data *data) { nfs_invalidate_atime(data->header->inode); } -static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) +static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_data *data) { struct nfs_server *server = NFS_SERVER(data->header->inode); @@ -4068,7 +4068,7 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task, return true; } -static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) +static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_data *data) { dprintk("--> %s\n", __func__); @@ -4077,19 +4077,19 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) return -EAGAIN; if (nfs4_read_stateid_changed(task, &data->args)) return -EAGAIN; - return data->read_done_cb ? data->read_done_cb(task, data) : + return data->pgio_done_cb ? data->pgio_done_cb(task, data) : nfs4_read_done_cb(task, data); } -static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) +static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) { data->timestamp = jiffies; - data->read_done_cb = nfs4_read_done_cb; + data->pgio_done_cb = nfs4_read_done_cb; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); } -static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) +static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) { if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), &data->args.seq_args, @@ -4104,7 +4104,7 @@ static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_dat return 0; } -static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) +static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_pgio_data *data) { struct inode *inode = data->header->inode; @@ -4134,18 +4134,18 @@ static bool nfs4_write_stateid_changed(struct rpc_task *task, return true; } -static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) +static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_data *data) { if (!nfs4_sequence_done(task, &data->res.seq_res)) return -EAGAIN; if (nfs4_write_stateid_changed(task, &data->args)) return -EAGAIN; - return data->write_done_cb ? data->write_done_cb(task, data) : + return data->pgio_done_cb ? data->pgio_done_cb(task, data) : nfs4_write_done_cb(task, data); } static -bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data) +bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data) { const struct nfs_pgio_header *hdr = data->header; @@ -4158,7 +4158,7 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data) return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; } -static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) +static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) { struct nfs_server *server = NFS_SERVER(data->header->inode); @@ -4168,8 +4168,8 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag } else data->args.bitmask = server->cache_consistency_bitmask; - if (!data->write_done_cb) - data->write_done_cb = nfs4_write_done_cb; + if (!data->pgio_done_cb) + data->pgio_done_cb = nfs4_write_done_cb; data->res.server = server; data->timestamp = jiffies; @@ -4177,7 +4177,7 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); } -static int nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) +static int nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) { if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), &data->args.seq_args, diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 849cf146db30..0a744f3a86f6 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -932,7 +932,7 @@ DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group); DECLARE_EVENT_CLASS(nfs4_read_event, TP_PROTO( - const struct nfs_read_data *data, + const struct nfs_pgio_data *data, int error ), @@ -972,7 +972,7 @@ DECLARE_EVENT_CLASS(nfs4_read_event, #define DEFINE_NFS4_READ_EVENT(name) \ DEFINE_EVENT(nfs4_read_event, name, \ TP_PROTO( \ - const struct nfs_read_data *data, \ + const struct nfs_pgio_data *data, \ int error \ ), \ TP_ARGS(data, error)) @@ -983,7 +983,7 @@ DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read); DECLARE_EVENT_CLASS(nfs4_write_event, TP_PROTO( - const struct nfs_write_data *data, + const struct nfs_pgio_data *data, int error ), @@ -1024,7 +1024,7 @@ DECLARE_EVENT_CLASS(nfs4_write_event, #define DEFINE_NFS4_WRITE_EVENT(name) \ DEFINE_EVENT(nfs4_write_event, name, \ TP_PROTO( \ - const struct nfs_write_data *data, \ + const struct nfs_pgio_data *data, \ int error \ ), \ TP_ARGS(data, error)) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 5457745dd4f1..426b366b0b33 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -439,7 +439,7 @@ static void _read_done(struct ore_io_state *ios, void *private) objlayout_read_done(&objios->oir, status, objios->sync); } -int objio_read_pagelist(struct nfs_read_data *rdata) +int objio_read_pagelist(struct nfs_pgio_data *rdata) { struct nfs_pgio_header *hdr = rdata->header; struct objio_state *objios; @@ -487,7 +487,7 @@ static void _write_done(struct ore_io_state *ios, void *private) static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) { struct objio_state *objios = priv; - struct nfs_write_data *wdata = objios->oir.rpcdata; + struct nfs_pgio_data *wdata = objios->oir.rpcdata; struct address_space *mapping = wdata->header->inode->i_mapping; pgoff_t index = offset / PAGE_SIZE; struct page *page; @@ -531,7 +531,7 @@ static const struct _ore_r4w_op _r4w_op = { .put_page = &__r4w_put_page, }; -int objio_write_pagelist(struct nfs_write_data *wdata, int how) +int objio_write_pagelist(struct nfs_pgio_data *wdata, int how) { struct nfs_pgio_header *hdr = wdata->header; struct objio_state *objios; diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index e4f9cbfec67b..2f955f671003 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -229,11 +229,11 @@ objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index, static void _rpc_read_complete(struct work_struct *work) { struct rpc_task *task; - struct nfs_read_data *rdata; + struct nfs_pgio_data *rdata; dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); - rdata = container_of(task, struct nfs_read_data, task); + rdata = container_of(task, struct nfs_pgio_data, task); pnfs_ld_read_done(rdata); } @@ -241,7 +241,7 @@ static void _rpc_read_complete(struct work_struct *work) void objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) { - struct nfs_read_data *rdata = oir->rpcdata; + struct nfs_pgio_data *rdata = oir->rpcdata; oir->status = rdata->task.tk_status = status; if (status >= 0) @@ -266,7 +266,7 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) * Perform sync or async reads. */ enum pnfs_try_status -objlayout_read_pagelist(struct nfs_read_data *rdata) +objlayout_read_pagelist(struct nfs_pgio_data *rdata) { struct nfs_pgio_header *hdr = rdata->header; struct inode *inode = hdr->inode; @@ -312,11 +312,11 @@ objlayout_read_pagelist(struct nfs_read_data *rdata) static void _rpc_write_complete(struct work_struct *work) { struct rpc_task *task; - struct nfs_write_data *wdata; + struct nfs_pgio_data *wdata; dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); - wdata = container_of(task, struct nfs_write_data, task); + wdata = container_of(task, struct nfs_pgio_data, task); pnfs_ld_write_done(wdata); } @@ -324,7 +324,7 @@ static void _rpc_write_complete(struct work_struct *work) void objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) { - struct nfs_write_data *wdata = oir->rpcdata; + struct nfs_pgio_data *wdata = oir->rpcdata; oir->status = wdata->task.tk_status = status; if (status >= 0) { @@ -351,7 +351,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) * Perform sync or async writes. */ enum pnfs_try_status -objlayout_write_pagelist(struct nfs_write_data *wdata, +objlayout_write_pagelist(struct nfs_pgio_data *wdata, int how) { struct nfs_pgio_header *hdr = wdata->header; diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index 87aa1dec6120..01e041029a6c 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h @@ -119,8 +119,8 @@ extern void objio_free_lseg(struct pnfs_layout_segment *lseg); */ extern void objio_free_result(struct objlayout_io_res *oir); -extern int objio_read_pagelist(struct nfs_read_data *rdata); -extern int objio_write_pagelist(struct nfs_write_data *wdata, int how); +extern int objio_read_pagelist(struct nfs_pgio_data *rdata); +extern int objio_write_pagelist(struct nfs_pgio_data *wdata, int how); /* * callback API @@ -168,10 +168,10 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg( extern void objlayout_free_lseg(struct pnfs_layout_segment *); extern enum pnfs_try_status objlayout_read_pagelist( - struct nfs_read_data *); + struct nfs_pgio_data *); extern enum pnfs_try_status objlayout_write_pagelist( - struct nfs_write_data *, + struct nfs_pgio_data *, int how); extern void objlayout_encode_layoutcommit( diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3d5bc2baafd1..e9cea3ab7cf9 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1492,7 +1492,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode, } EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); -static void pnfs_ld_handle_write_error(struct nfs_write_data *data) +static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; @@ -1511,7 +1511,7 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data) /* * Called by non rpc-based layout drivers */ -void pnfs_ld_write_done(struct nfs_write_data *data) +void pnfs_ld_write_done(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; @@ -1527,7 +1527,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_write_done); static void pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, - struct nfs_write_data *data) + struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; @@ -1540,7 +1540,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, } static enum pnfs_try_status -pnfs_try_to_write_data(struct nfs_write_data *wdata, +pnfs_try_to_write_data(struct nfs_pgio_data *wdata, const struct rpc_call_ops *call_ops, struct pnfs_layout_segment *lseg, int how) @@ -1564,7 +1564,7 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata, static void pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how) { - struct nfs_write_data *data; + struct nfs_pgio_data *data; const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; struct pnfs_layout_segment *lseg = desc->pg_lseg; @@ -1572,7 +1572,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he while (!list_empty(head)) { enum pnfs_try_status trypnfs; - data = list_first_entry(head, struct nfs_write_data, list); + data = list_first_entry(head, struct nfs_pgio_data, list); list_del_init(&data->list); trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); @@ -1647,7 +1647,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode, } EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); -static void pnfs_ld_handle_read_error(struct nfs_read_data *data) +static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; @@ -1666,7 +1666,7 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data) /* * Called by non rpc-based layout drivers */ -void pnfs_ld_read_done(struct nfs_read_data *data) +void pnfs_ld_read_done(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; @@ -1682,7 +1682,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_read_done); static void pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, - struct nfs_read_data *data) + struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; @@ -1698,7 +1698,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, * Call the appropriate parallel I/O subsystem read function. */ static enum pnfs_try_status -pnfs_try_to_read_data(struct nfs_read_data *rdata, +pnfs_try_to_read_data(struct nfs_pgio_data *rdata, const struct rpc_call_ops *call_ops, struct pnfs_layout_segment *lseg) { @@ -1722,7 +1722,7 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata, static void pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head) { - struct nfs_read_data *data; + struct nfs_pgio_data *data; const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; struct pnfs_layout_segment *lseg = desc->pg_lseg; @@ -1730,7 +1730,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea while (!list_empty(head)) { enum pnfs_try_status trypnfs; - data = list_first_entry(head, struct nfs_read_data, list); + data = list_first_entry(head, struct nfs_pgio_data, list); list_del_init(&data->list); trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); @@ -1821,7 +1821,7 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); void -pnfs_set_layoutcommit(struct nfs_write_data *wdata) +pnfs_set_layoutcommit(struct nfs_pgio_data *wdata) { struct nfs_pgio_header *hdr = wdata->header; struct inode *inode = hdr->inode; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 94a9a1834b3f..0031267d7492 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -113,8 +113,8 @@ struct pnfs_layoutdriver_type { * Return PNFS_ATTEMPTED to indicate the layout code has attempted * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS */ - enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data); - enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how); + enum pnfs_try_status (*read_pagelist) (struct nfs_pgio_data *nfs_data); + enum pnfs_try_status (*write_pagelist) (struct nfs_pgio_data *nfs_data, int how); void (*free_deviceid_node) (struct nfs4_deviceid_node *); @@ -212,13 +212,13 @@ bool pnfs_roc(struct inode *ino); void pnfs_roc_release(struct inode *ino); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); -void pnfs_set_layoutcommit(struct nfs_write_data *wdata); +void pnfs_set_layoutcommit(struct nfs_pgio_data *wdata); void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); int _pnfs_return_layout(struct inode *); int pnfs_commit_and_return_layout(struct inode *); -void pnfs_ld_write_done(struct nfs_write_data *); -void pnfs_ld_read_done(struct nfs_read_data *); +void pnfs_ld_write_done(struct nfs_pgio_data *); +void pnfs_ld_read_done(struct nfs_pgio_data *); struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, loff_t pos, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 8cc227fcd4d2..c54829eb2156 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -578,7 +578,7 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return 0; } -static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) +static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_data *data) { struct inode *inode = data->header->inode; @@ -594,18 +594,18 @@ static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) return 0; } -static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) +static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) { msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; } -static int nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) +static int nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) { rpc_call_start(task); return 0; } -static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) +static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_data *data) { struct inode *inode = data->header->inode; @@ -614,14 +614,14 @@ static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) return 0; } -static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) +static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) { /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ data->args.stable = NFS_FILE_SYNC; msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; } -static int nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) +static int nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) { rpc_call_start(task); return 0; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 473bba35a2cb..9e426cc179ed 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -51,10 +51,10 @@ struct nfs_read_header *nfs_readhdr_alloc(void) } EXPORT_SYMBOL_GPL(nfs_readhdr_alloc); -static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, +static struct nfs_pgio_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, unsigned int pagecount) { - struct nfs_read_data *data, *prealloc; + struct nfs_pgio_data *data, *prealloc; prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data; if (prealloc->header == NULL) @@ -84,7 +84,7 @@ void nfs_readhdr_free(struct nfs_pgio_header *hdr) } EXPORT_SYMBOL_GPL(nfs_readhdr_free); -void nfs_readdata_release(struct nfs_read_data *rdata) +void nfs_readdata_release(struct nfs_pgio_data *rdata) { struct nfs_pgio_header *hdr = rdata->header; struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header); @@ -212,7 +212,7 @@ out: } int nfs_initiate_read(struct rpc_clnt *clnt, - struct nfs_read_data *data, + struct nfs_pgio_data *data, const struct rpc_call_ops *call_ops, int flags) { struct inode *inode = data->header->inode; @@ -255,7 +255,7 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read); /* * Set up the NFS read request struct */ -static void nfs_read_rpcsetup(struct nfs_read_data *data, +static void nfs_read_rpcsetup(struct nfs_pgio_data *data, unsigned int count, unsigned int offset) { struct nfs_page *req = data->header->req; @@ -274,7 +274,7 @@ static void nfs_read_rpcsetup(struct nfs_read_data *data, nfs_fattr_init(&data->fattr); } -static int nfs_do_read(struct nfs_read_data *data, +static int nfs_do_read(struct nfs_pgio_data *data, const struct rpc_call_ops *call_ops) { struct inode *inode = data->header->inode; @@ -286,13 +286,13 @@ static int nfs_do_multiple_reads(struct list_head *head, const struct rpc_call_ops *call_ops) { - struct nfs_read_data *data; + struct nfs_pgio_data *data; int ret = 0; while (!list_empty(head)) { int ret2; - data = list_first_entry(head, struct nfs_read_data, list); + data = list_first_entry(head, struct nfs_pgio_data, list); list_del_init(&data->list); ret2 = nfs_do_read(data, call_ops); @@ -324,8 +324,8 @@ static void nfs_pagein_error(struct nfs_pageio_descriptor *desc, { set_bit(NFS_IOHDR_REDO, &hdr->flags); while (!list_empty(&hdr->rpc_list)) { - struct nfs_read_data *data = list_first_entry(&hdr->rpc_list, - struct nfs_read_data, list); + struct nfs_pgio_data *data = list_first_entry(&hdr->rpc_list, + struct nfs_pgio_data, list); list_del(&data->list); nfs_readdata_release(data); } @@ -350,7 +350,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, { struct nfs_page *req = hdr->req; struct page *page = req->wb_page; - struct nfs_read_data *data; + struct nfs_pgio_data *data; size_t rsize = desc->pg_bsize, nbytes; unsigned int offset; @@ -382,7 +382,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, { struct nfs_page *req; struct page **pages; - struct nfs_read_data *data; + struct nfs_pgio_data *data; struct list_head *head = &desc->pg_list; data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base, @@ -447,7 +447,7 @@ static const struct nfs_pageio_ops nfs_pageio_read_ops = { * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). */ -int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) +int nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data) { struct inode *inode = data->header->inode; int status; @@ -468,7 +468,7 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) return 0; } -static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) +static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data) { struct nfs_pgio_args *argp = &data->args; struct nfs_pgio_res *resp = &data->res; @@ -490,7 +490,7 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data static void nfs_readpage_result_common(struct rpc_task *task, void *calldata) { - struct nfs_read_data *data = calldata; + struct nfs_pgio_data *data = calldata; struct nfs_pgio_header *hdr = data->header; /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */ @@ -520,7 +520,7 @@ static void nfs_readpage_release_common(void *calldata) void nfs_read_prepare(struct rpc_task *task, void *calldata) { - struct nfs_read_data *data = calldata; + struct nfs_pgio_data *data = calldata; int err; err = NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data); if (err) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d392a70092fe..3a2fc5c4c79a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -88,10 +88,10 @@ struct nfs_write_header *nfs_writehdr_alloc(void) } EXPORT_SYMBOL_GPL(nfs_writehdr_alloc); -static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, +static struct nfs_pgio_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, unsigned int pagecount) { - struct nfs_write_data *data, *prealloc; + struct nfs_pgio_data *data, *prealloc; prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data; if (prealloc->header == NULL) @@ -120,7 +120,7 @@ void nfs_writehdr_free(struct nfs_pgio_header *hdr) } EXPORT_SYMBOL_GPL(nfs_writehdr_free); -void nfs_writedata_release(struct nfs_write_data *wdata) +void nfs_writedata_release(struct nfs_pgio_data *wdata) { struct nfs_pgio_header *hdr = wdata->header; struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header); @@ -582,7 +582,7 @@ nfs_clear_request_commit(struct nfs_page *req) } static inline -int nfs_write_need_commit(struct nfs_write_data *data) +int nfs_write_need_commit(struct nfs_pgio_data *data) { if (data->verf.committed == NFS_DATA_SYNC) return data->header->lseg == NULL; @@ -613,7 +613,7 @@ nfs_clear_request_commit(struct nfs_page *req) } static inline -int nfs_write_need_commit(struct nfs_write_data *data) +int nfs_write_need_commit(struct nfs_pgio_data *data) { return 0; } @@ -990,7 +990,7 @@ static int flush_task_priority(int how) } int nfs_initiate_write(struct rpc_clnt *clnt, - struct nfs_write_data *data, + struct nfs_pgio_data *data, const struct rpc_call_ops *call_ops, int how, int flags) { @@ -1047,7 +1047,7 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write); /* * Set up the argument/result storage required for the RPC call. */ -static void nfs_write_rpcsetup(struct nfs_write_data *data, +static void nfs_write_rpcsetup(struct nfs_pgio_data *data, unsigned int count, unsigned int offset, int how, struct nfs_commit_info *cinfo) { @@ -1082,7 +1082,7 @@ static void nfs_write_rpcsetup(struct nfs_write_data *data, nfs_fattr_init(&data->fattr); } -static int nfs_do_write(struct nfs_write_data *data, +static int nfs_do_write(struct nfs_pgio_data *data, const struct rpc_call_ops *call_ops, int how) { @@ -1095,13 +1095,13 @@ static int nfs_do_multiple_writes(struct list_head *head, const struct rpc_call_ops *call_ops, int how) { - struct nfs_write_data *data; + struct nfs_pgio_data *data; int ret = 0; while (!list_empty(head)) { int ret2; - data = list_first_entry(head, struct nfs_write_data, list); + data = list_first_entry(head, struct nfs_pgio_data, list); list_del_init(&data->list); ret2 = nfs_do_write(data, call_ops, how); @@ -1144,8 +1144,8 @@ static void nfs_flush_error(struct nfs_pageio_descriptor *desc, { set_bit(NFS_IOHDR_REDO, &hdr->flags); while (!list_empty(&hdr->rpc_list)) { - struct nfs_write_data *data = list_first_entry(&hdr->rpc_list, - struct nfs_write_data, list); + struct nfs_pgio_data *data = list_first_entry(&hdr->rpc_list, + struct nfs_pgio_data, list); list_del(&data->list); nfs_writedata_release(data); } @@ -1161,7 +1161,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, { struct nfs_page *req = hdr->req; struct page *page = req->wb_page; - struct nfs_write_data *data; + struct nfs_pgio_data *data; size_t wsize = desc->pg_bsize, nbytes; unsigned int offset; int requests = 0; @@ -1211,7 +1211,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, { struct nfs_page *req; struct page **pages; - struct nfs_write_data *data; + struct nfs_pgio_data *data; struct list_head *head = &desc->pg_list; struct nfs_commit_info cinfo; @@ -1305,7 +1305,7 @@ EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); void nfs_write_prepare(struct rpc_task *task, void *calldata) { - struct nfs_write_data *data = calldata; + struct nfs_pgio_data *data = calldata; int err; err = NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data); if (err) @@ -1328,14 +1328,14 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata) */ static void nfs_writeback_done_common(struct rpc_task *task, void *calldata) { - struct nfs_write_data *data = calldata; + struct nfs_pgio_data *data = calldata; nfs_writeback_done(task, data); } static void nfs_writeback_release_common(void *calldata) { - struct nfs_write_data *data = calldata; + struct nfs_pgio_data *data = calldata; struct nfs_pgio_header *hdr = data->header; int status = data->task.tk_status; @@ -1386,7 +1386,7 @@ static int nfs_should_remove_suid(const struct inode *inode) /* * This function is called when the WRITE call is complete. */ -void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) +void nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data) { struct nfs_pgio_args *argp = &data->args; struct nfs_pgio_res *resp = &data->res; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index fa6918b0f829..7e0db561d829 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -520,7 +520,7 @@ extern int nfs_writepage(struct page *page, struct writeback_control *wbc); extern int nfs_writepages(struct address_space *, struct writeback_control *); extern int nfs_flush_incompatible(struct file *file, struct page *page); extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); -extern void nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); +extern void nfs_writeback_done(struct rpc_task *, struct nfs_pgio_data *); /* * Try to write back everything synchronously (but check the @@ -553,7 +553,7 @@ nfs_have_writebacks(struct inode *inode) extern int nfs_readpage(struct file *, struct page *); extern int nfs_readpages(struct file *, struct address_space *, struct list_head *, unsigned); -extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *); +extern int nfs_readpage_result(struct rpc_task *, struct nfs_pgio_data *); extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, struct page *); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 381f832b03c6..64f6f1491c0d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1247,20 +1247,6 @@ struct nfs_page_array { struct page *page_array[NFS_PAGEVEC_SIZE]; }; -struct nfs_read_data { - struct nfs_pgio_header *header; - struct list_head list; - struct rpc_task task; - struct nfs_fattr fattr; /* fattr storage */ - struct nfs_pgio_args args; - struct nfs_pgio_res res; - unsigned long timestamp; /* For lease renewal */ - int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data); - __u64 mds_offset; - struct nfs_page_array pages; - struct nfs_client *ds_clp; /* pNFS data server */ -}; - /* used as flag bits in nfs_pgio_header */ enum { NFS_IOHDR_ERROR = 0, @@ -1293,29 +1279,29 @@ struct nfs_pgio_header { unsigned long flags; }; -struct nfs_read_header { - struct nfs_pgio_header header; - struct nfs_read_data rpc_data; -}; - -struct nfs_write_data { +struct nfs_pgio_data { struct nfs_pgio_header *header; struct list_head list; struct rpc_task task; struct nfs_fattr fattr; - struct nfs_writeverf verf; + struct nfs_writeverf verf; /* Used for writes */ struct nfs_pgio_args args; /* argument struct */ struct nfs_pgio_res res; /* result struct */ unsigned long timestamp; /* For lease renewal */ - int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data); + int (*pgio_done_cb) (struct rpc_task *task, struct nfs_pgio_data *data); __u64 mds_offset; /* Filelayout dense stripe */ struct nfs_page_array pages; struct nfs_client *ds_clp; /* pNFS data server */ }; +struct nfs_read_header { + struct nfs_pgio_header header; + struct nfs_pgio_data rpc_data; +}; + struct nfs_write_header { struct nfs_pgio_header header; - struct nfs_write_data rpc_data; + struct nfs_pgio_data rpc_data; struct nfs_writeverf verf; }; @@ -1448,12 +1434,12 @@ struct nfs_rpc_ops { struct nfs_pathconf *); int (*set_capabilities)(struct nfs_server *, struct nfs_fh *); int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int); - void (*read_setup) (struct nfs_read_data *, struct rpc_message *); - int (*read_rpc_prepare)(struct rpc_task *, struct nfs_read_data *); - int (*read_done) (struct rpc_task *, struct nfs_read_data *); - void (*write_setup) (struct nfs_write_data *, struct rpc_message *); - int (*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *); - int (*write_done) (struct rpc_task *, struct nfs_write_data *); + void (*read_setup) (struct nfs_pgio_data *, struct rpc_message *); + int (*read_rpc_prepare)(struct rpc_task *, struct nfs_pgio_data *); + int (*read_done) (struct rpc_task *, struct nfs_pgio_data *); + void (*write_setup) (struct nfs_pgio_data *, struct rpc_message *); + int (*write_rpc_prepare)(struct rpc_task *, struct nfs_pgio_data *); + int (*write_done) (struct rpc_task *, struct nfs_pgio_data *); void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *); void (*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *); int (*commit_done) (struct rpc_task *, struct nfs_commit_data *); -- cgit v1.2.3-71-gd317 From c0752cdfbbb691cfe98812f7aed8ce1e766823c4 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:27 -0400 Subject: NFS: Create a common read and write header struct The only difference is the write verifier field, but we can keep that for a little bit longer. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 4 ++-- fs/nfs/pnfs.c | 4 ++-- fs/nfs/read.c | 14 +++++++------- fs/nfs/write.c | 14 +++++++------- include/linux/nfs_xdr.h | 7 +------ 5 files changed, 19 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index af01b80412dd..b0e7a41d14a8 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -395,7 +395,7 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool struct nfs_pgio_completion_ops; /* read.c */ -extern struct nfs_read_header *nfs_readhdr_alloc(void); +extern struct nfs_rw_header *nfs_readhdr_alloc(void); extern void nfs_readhdr_free(struct nfs_pgio_header *hdr); extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, bool force_mds, @@ -424,7 +424,7 @@ int nfs_remount(struct super_block *sb, int *flags, char *raw_data); extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops); -extern struct nfs_write_header *nfs_writehdr_alloc(void); +extern struct nfs_rw_header *nfs_writehdr_alloc(void); extern void nfs_writehdr_free(struct nfs_pgio_header *hdr); extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index e9cea3ab7cf9..43cfe11aa1a4 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1592,7 +1592,7 @@ EXPORT_SYMBOL_GPL(pnfs_writehdr_free); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { - struct nfs_write_header *whdr; + struct nfs_rw_header *whdr; struct nfs_pgio_header *hdr; int ret; @@ -1750,7 +1750,7 @@ EXPORT_SYMBOL_GPL(pnfs_readhdr_free); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { - struct nfs_read_header *rhdr; + struct nfs_rw_header *rhdr; struct nfs_pgio_header *hdr; int ret; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 9e426cc179ed..d29ca3673694 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -34,9 +34,9 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; static struct kmem_cache *nfs_rdata_cachep; -struct nfs_read_header *nfs_readhdr_alloc(void) +struct nfs_rw_header *nfs_readhdr_alloc(void) { - struct nfs_read_header *rhdr; + struct nfs_rw_header *rhdr; rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); if (rhdr) { @@ -56,7 +56,7 @@ static struct nfs_pgio_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, { struct nfs_pgio_data *data, *prealloc; - prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data; + prealloc = &container_of(hdr, struct nfs_rw_header, header)->rpc_data; if (prealloc->header == NULL) data = prealloc; else @@ -78,7 +78,7 @@ out: void nfs_readhdr_free(struct nfs_pgio_header *hdr) { - struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header); + struct nfs_rw_header *rhdr = container_of(hdr, struct nfs_rw_header, header); kmem_cache_free(nfs_rdata_cachep, rhdr); } @@ -87,7 +87,7 @@ EXPORT_SYMBOL_GPL(nfs_readhdr_free); void nfs_readdata_release(struct nfs_pgio_data *rdata) { struct nfs_pgio_header *hdr = rdata->header; - struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header); + struct nfs_rw_header *read_header = container_of(hdr, struct nfs_rw_header, header); put_nfs_open_context(rdata->args.context); if (rdata->pages.pagevec != rdata->pages.page_array) @@ -417,7 +417,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pagein); static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { - struct nfs_read_header *rhdr; + struct nfs_rw_header *rhdr; struct nfs_pgio_header *hdr; int ret; @@ -680,7 +680,7 @@ out: int __init nfs_init_readpagecache(void) { nfs_rdata_cachep = kmem_cache_create("nfs_read_data", - sizeof(struct nfs_read_header), + sizeof(struct nfs_rw_header), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_rdata_cachep == NULL) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3a2fc5c4c79a..37c4c988519c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -70,9 +70,9 @@ void nfs_commit_free(struct nfs_commit_data *p) } EXPORT_SYMBOL_GPL(nfs_commit_free); -struct nfs_write_header *nfs_writehdr_alloc(void) +struct nfs_rw_header *nfs_writehdr_alloc(void) { - struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); + struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); if (p) { struct nfs_pgio_header *hdr = &p->header; @@ -93,7 +93,7 @@ static struct nfs_pgio_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, { struct nfs_pgio_data *data, *prealloc; - prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data; + prealloc = &container_of(hdr, struct nfs_rw_header, header)->rpc_data; if (prealloc->header == NULL) data = prealloc; else @@ -115,7 +115,7 @@ out: void nfs_writehdr_free(struct nfs_pgio_header *hdr) { - struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header); + struct nfs_rw_header *whdr = container_of(hdr, struct nfs_rw_header, header); mempool_free(whdr, nfs_wdata_mempool); } EXPORT_SYMBOL_GPL(nfs_writehdr_free); @@ -123,7 +123,7 @@ EXPORT_SYMBOL_GPL(nfs_writehdr_free); void nfs_writedata_release(struct nfs_pgio_data *wdata) { struct nfs_pgio_header *hdr = wdata->header; - struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header); + struct nfs_rw_header *write_header = container_of(hdr, struct nfs_rw_header, header); put_nfs_open_context(wdata->args.context); if (wdata->pages.pagevec != wdata->pages.page_array) @@ -1253,7 +1253,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_flush); static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { - struct nfs_write_header *whdr; + struct nfs_rw_header *whdr; struct nfs_pgio_header *hdr; int ret; @@ -1910,7 +1910,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, int __init nfs_init_writepagecache(void) { nfs_wdata_cachep = kmem_cache_create("nfs_write_data", - sizeof(struct nfs_write_header), + sizeof(struct nfs_rw_header), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_wdata_cachep == NULL) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 64f6f1491c0d..2d34e0dc1870 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1294,12 +1294,7 @@ struct nfs_pgio_data { struct nfs_client *ds_clp; /* pNFS data server */ }; -struct nfs_read_header { - struct nfs_pgio_header header; - struct nfs_pgio_data rpc_data; -}; - -struct nfs_write_header { +struct nfs_rw_header { struct nfs_pgio_header header; struct nfs_pgio_data rpc_data; struct nfs_writeverf verf; -- cgit v1.2.3-71-gd317 From f79d06f544a797d75cbf5256a5d06c4b3d2759cc Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:28 -0400 Subject: NFS: Move the write verifier into the nfs_pgio_header The header had a pointer to the verifier that was set from the old write data struct. We don't need to keep the pointer around now that we have shared structures. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 4 ++-- fs/nfs/write.c | 7 +++---- include/linux/nfs_xdr.h | 3 +-- 3 files changed, 6 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index bbe688e2cc89..164b0167677b 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -813,12 +813,12 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) bit = NFS_IOHDR_NEED_RESCHED; else if (dreq->flags == 0) { - memcpy(&dreq->verf, hdr->verf, + memcpy(&dreq->verf, &hdr->verf, sizeof(dreq->verf)); bit = NFS_IOHDR_NEED_COMMIT; dreq->flags = NFS_ODIRECT_DO_COMMIT; } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { - if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) { + if (memcmp(&dreq->verf, &hdr->verf, sizeof(dreq->verf))) { dreq->flags = NFS_ODIRECT_RESCHED_WRITES; bit = NFS_IOHDR_NEED_RESCHED; } else diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 37c4c988519c..321a791c72bf 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -82,7 +82,6 @@ struct nfs_rw_header *nfs_writehdr_alloc(void) INIT_LIST_HEAD(&hdr->rpc_list); spin_lock_init(&hdr->lock); atomic_set(&hdr->refcnt, 0); - hdr->verf = &p->verf; } return p; } @@ -644,7 +643,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) goto next; } if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { - memcpy(&req->wb_verf, &hdr->verf->verifier, sizeof(req->wb_verf)); + memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); nfs_mark_request_commit(req, hdr->lseg, &cinfo); goto next; } @@ -1344,8 +1343,8 @@ static void nfs_writeback_release_common(void *calldata) if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) ; /* Do nothing */ else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) - memcpy(hdr->verf, &data->verf, sizeof(*hdr->verf)); - else if (memcmp(hdr->verf, &data->verf, sizeof(*hdr->verf))) + memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf)); + else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf))) set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); spin_unlock(&hdr->lock); } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 2d34e0dc1870..965c2aa6b33f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1263,7 +1263,7 @@ struct nfs_pgio_header { struct list_head rpc_list; atomic_t refcnt; struct nfs_page *req; - struct nfs_writeverf *verf; + struct nfs_writeverf verf; /* Used for writes */ struct pnfs_layout_segment *lseg; loff_t io_start; const struct rpc_call_ops *mds_ops; @@ -1297,7 +1297,6 @@ struct nfs_pgio_data { struct nfs_rw_header { struct nfs_pgio_header header; struct nfs_pgio_data rpc_data; - struct nfs_writeverf verf; }; struct nfs_mds_commit_info { -- cgit v1.2.3-71-gd317 From 4a0de55c565a36cac8422b76a948c4634a90781e Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:30 -0400 Subject: NFS: Create a common rw_header_alloc and rw_header_free function I create a new struct nfs_rw_ops to decide the differences between reads and writes. This struct will be set when initializing a new nfs_pgio_descriptor, and then passed on to the nfs_rw_header when a new header is allocated. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 6 ++---- fs/nfs/pagelist.c | 33 +++++++++++++++++++++++++++++++++ fs/nfs/pnfs.c | 8 ++++---- fs/nfs/read.c | 34 +++++++++++++--------------------- fs/nfs/write.c | 28 ++++++++++++---------------- include/linux/nfs_page.h | 7 +++++++ include/linux/nfs_xdr.h | 1 + 7 files changed, 72 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5ddc142c5062..9d6a40eae11c 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -237,6 +237,8 @@ extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); int nfs_iocounter_wait(struct nfs_io_counter *c); +struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *); +void nfs_rw_header_free(struct nfs_pgio_header *); struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *, unsigned int); void nfs_pgio_data_release(struct nfs_pgio_data *); @@ -397,8 +399,6 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool struct nfs_pgio_completion_ops; /* read.c */ -extern struct nfs_rw_header *nfs_readhdr_alloc(void); -extern void nfs_readhdr_free(struct nfs_pgio_header *hdr); extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops); @@ -425,8 +425,6 @@ int nfs_remount(struct super_block *sb, int *flags, char *raw_data); extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops); -extern struct nfs_rw_header *nfs_writehdr_alloc(void); -extern void nfs_writehdr_free(struct nfs_pgio_header *hdr); extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index a98ccf722d7b..ca356fe0836b 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -300,6 +300,37 @@ static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr) return container_of(hdr, struct nfs_rw_header, header); } +/** + * nfs_rw_header_alloc - Allocate a header for a read or write + * @ops: Read or write function vector + */ +struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops) +{ + struct nfs_rw_header *header = ops->rw_alloc_header(); + + if (header) { + struct nfs_pgio_header *hdr = &header->header; + + INIT_LIST_HEAD(&hdr->pages); + INIT_LIST_HEAD(&hdr->rpc_list); + spin_lock_init(&hdr->lock); + atomic_set(&hdr->refcnt, 0); + hdr->rw_ops = ops; + } + return header; +} +EXPORT_SYMBOL_GPL(nfs_rw_header_alloc); + +/* + * nfs_rw_header_free - Free a read or write header + * @hdr: The header to free + */ +void nfs_rw_header_free(struct nfs_pgio_header *hdr) +{ + hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr)); +} +EXPORT_SYMBOL_GPL(nfs_rw_header_free); + /** * nfs_pgio_data_alloc - Allocate pageio data * @hdr: The header making a request @@ -367,6 +398,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, const struct nfs_pageio_ops *pg_ops, const struct nfs_pgio_completion_ops *compl_ops, + const struct nfs_rw_ops *rw_ops, size_t bsize, int io_flags) { @@ -380,6 +412,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_inode = inode; desc->pg_ops = pg_ops; desc->pg_completion_ops = compl_ops; + desc->pg_rw_ops = rw_ops; desc->pg_ioflags = io_flags; desc->pg_error = 0; desc->pg_lseg = NULL; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index e192ba69a7d4..54c84c128b2b 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1585,7 +1585,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) { pnfs_put_lseg(hdr->lseg); - nfs_writehdr_free(hdr); + nfs_rw_header_free(hdr); } EXPORT_SYMBOL_GPL(pnfs_writehdr_free); @@ -1596,7 +1596,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) struct nfs_pgio_header *hdr; int ret; - whdr = nfs_writehdr_alloc(); + whdr = nfs_rw_header_alloc(desc->pg_rw_ops); if (!whdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); pnfs_put_lseg(desc->pg_lseg); @@ -1743,7 +1743,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) { pnfs_put_lseg(hdr->lseg); - nfs_readhdr_free(hdr); + nfs_rw_header_free(hdr); } EXPORT_SYMBOL_GPL(pnfs_readhdr_free); @@ -1754,7 +1754,7 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) struct nfs_pgio_header *hdr; int ret; - rhdr = nfs_readhdr_alloc(); + rhdr = nfs_rw_header_alloc(desc->pg_rw_ops); if (!rhdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); ret = -ENOMEM; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index ab4c1a5b5fbd..4cf3577bd54e 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -31,33 +31,19 @@ static const struct nfs_pageio_ops nfs_pageio_read_ops; static const struct rpc_call_ops nfs_read_common_ops; static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; +static const struct nfs_rw_ops nfs_rw_read_ops; static struct kmem_cache *nfs_rdata_cachep; -struct nfs_rw_header *nfs_readhdr_alloc(void) +static struct nfs_rw_header *nfs_readhdr_alloc(void) { - struct nfs_rw_header *rhdr; - - rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); - if (rhdr) { - struct nfs_pgio_header *hdr = &rhdr->header; - - INIT_LIST_HEAD(&hdr->pages); - INIT_LIST_HEAD(&hdr->rpc_list); - spin_lock_init(&hdr->lock); - atomic_set(&hdr->refcnt, 0); - } - return rhdr; + return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); } -EXPORT_SYMBOL_GPL(nfs_readhdr_alloc); -void nfs_readhdr_free(struct nfs_pgio_header *hdr) +static void nfs_readhdr_free(struct nfs_rw_header *rhdr) { - struct nfs_rw_header *rhdr = container_of(hdr, struct nfs_rw_header, header); - kmem_cache_free(nfs_rdata_cachep, rhdr); } -EXPORT_SYMBOL_GPL(nfs_readhdr_free); static int nfs_return_empty_page(struct page *page) @@ -79,7 +65,8 @@ void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, if (server->pnfs_curr_ld && !force_mds) pg_ops = server->pnfs_curr_ld->pg_read_ops; #endif - nfs_pageio_init(pgio, inode, pg_ops, compl_ops, server->rsize, 0); + nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops, + server->rsize, 0); } EXPORT_SYMBOL_GPL(nfs_pageio_init_read); @@ -375,13 +362,13 @@ static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) struct nfs_pgio_header *hdr; int ret; - rhdr = nfs_readhdr_alloc(); + rhdr = nfs_rw_header_alloc(desc->pg_rw_ops); if (!rhdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); return -ENOMEM; } hdr = &rhdr->header; - nfs_pgheader_init(desc, hdr, nfs_readhdr_free); + nfs_pgheader_init(desc, hdr, nfs_rw_header_free); atomic_inc(&hdr->refcnt); ret = nfs_generic_pagein(desc, hdr); if (ret == 0) @@ -647,3 +634,8 @@ void nfs_destroy_readpagecache(void) { kmem_cache_destroy(nfs_rdata_cachep); } + +static const struct nfs_rw_ops nfs_rw_read_ops = { + .rw_alloc_header = nfs_readhdr_alloc, + .rw_free_header = nfs_readhdr_free, +}; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0dc4d6a28bd0..9c5cde38da45 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -46,6 +46,7 @@ static const struct rpc_call_ops nfs_write_common_ops; static const struct rpc_call_ops nfs_commit_ops; static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; static const struct nfs_commit_completion_ops nfs_commit_completion_ops; +static const struct nfs_rw_ops nfs_rw_write_ops; static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; @@ -70,29 +71,19 @@ void nfs_commit_free(struct nfs_commit_data *p) } EXPORT_SYMBOL_GPL(nfs_commit_free); -struct nfs_rw_header *nfs_writehdr_alloc(void) +static struct nfs_rw_header *nfs_writehdr_alloc(void) { struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); - if (p) { - struct nfs_pgio_header *hdr = &p->header; - + if (p) memset(p, 0, sizeof(*p)); - INIT_LIST_HEAD(&hdr->pages); - INIT_LIST_HEAD(&hdr->rpc_list); - spin_lock_init(&hdr->lock); - atomic_set(&hdr->refcnt, 0); - } return p; } -EXPORT_SYMBOL_GPL(nfs_writehdr_alloc); -void nfs_writehdr_free(struct nfs_pgio_header *hdr) +static void nfs_writehdr_free(struct nfs_rw_header *whdr) { - struct nfs_rw_header *whdr = container_of(hdr, struct nfs_rw_header, header); mempool_free(whdr, nfs_wdata_mempool); } -EXPORT_SYMBOL_GPL(nfs_writehdr_free); static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) { @@ -1210,13 +1201,13 @@ static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) struct nfs_pgio_header *hdr; int ret; - whdr = nfs_writehdr_alloc(); + whdr = nfs_rw_header_alloc(desc->pg_rw_ops); if (!whdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); return -ENOMEM; } hdr = &whdr->header; - nfs_pgheader_init(desc, hdr, nfs_writehdr_free); + nfs_pgheader_init(desc, hdr, nfs_rw_header_free); atomic_inc(&hdr->refcnt); ret = nfs_generic_flush(desc, hdr); if (ret == 0) @@ -1244,7 +1235,8 @@ void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, if (server->pnfs_curr_ld && !force_mds) pg_ops = server->pnfs_curr_ld->pg_write_ops; #endif - nfs_pageio_init(pgio, inode, pg_ops, compl_ops, server->wsize, ioflags); + nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops, + server->wsize, ioflags); } EXPORT_SYMBOL_GPL(nfs_pageio_init_write); @@ -1925,3 +1917,7 @@ void nfs_destroy_writepagecache(void) kmem_cache_destroy(nfs_wdata_cachep); } +static const struct nfs_rw_ops nfs_rw_write_ops = { + .rw_alloc_header = nfs_writehdr_alloc, + .rw_free_header = nfs_writehdr_free, +}; diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 92ce5783b707..594812546c25 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -52,6 +52,11 @@ struct nfs_pageio_ops { int (*pg_doio)(struct nfs_pageio_descriptor *); }; +struct nfs_rw_ops { + struct nfs_rw_header *(*rw_alloc_header)(void); + void (*rw_free_header)(struct nfs_rw_header *); +}; + struct nfs_pageio_descriptor { struct list_head pg_list; unsigned long pg_bytes_written; @@ -63,6 +68,7 @@ struct nfs_pageio_descriptor { struct inode *pg_inode; const struct nfs_pageio_ops *pg_ops; + const struct nfs_rw_ops *pg_rw_ops; int pg_ioflags; int pg_error; const struct rpc_call_ops *pg_rpc_callops; @@ -86,6 +92,7 @@ extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, const struct nfs_pageio_ops *pg_ops, const struct nfs_pgio_completion_ops *compl_ops, + const struct nfs_rw_ops *rw_ops, size_t bsize, int how); extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 965c2aa6b33f..a1b91b67145e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1269,6 +1269,7 @@ struct nfs_pgio_header { const struct rpc_call_ops *mds_ops; void (*release) (struct nfs_pgio_header *hdr); const struct nfs_pgio_completion_ops *completion_ops; + const struct nfs_rw_ops *rw_ops; struct nfs_direct_req *dreq; void *layout_private; spinlock_t lock; -- cgit v1.2.3-71-gd317 From a4cdda59111f92000297e0d3edb1e0e08ba3549b Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:31 -0400 Subject: NFS: Create a common pgio_rpc_prepare function The read and write paths do exactly the same thing for the rpc_prepare rpc_op. This patch combines them together into a single function. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 2 ++ fs/nfs/nfs3proc.c | 11 ++--------- fs/nfs/nfs4proc.c | 22 +++------------------- fs/nfs/pagelist.c | 26 ++++++++++++++++++++++++++ fs/nfs/proc.c | 11 ++--------- fs/nfs/read.c | 19 +++---------------- fs/nfs/write.c | 19 +++++-------------- include/linux/nfs_page.h | 2 ++ include/linux/nfs_xdr.h | 3 +-- 9 files changed, 46 insertions(+), 69 deletions(-) (limited to 'include') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 9d6a40eae11c..1959260f8c57 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -241,6 +241,8 @@ struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *); void nfs_rw_header_free(struct nfs_pgio_header *); struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *, unsigned int); void nfs_pgio_data_release(struct nfs_pgio_data *); +void nfs_pgio_prepare(struct rpc_task *, void *); +void nfs_pgio_release(void *); static inline void nfs_iocounter_init(struct nfs_io_counter *c) { diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index d235369c3dfb..e7daa42bbc86 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -812,7 +812,7 @@ static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; } -static int nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) { rpc_call_start(task); return 0; @@ -834,12 +834,6 @@ static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; } -static int nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) -{ - rpc_call_start(task); - return 0; -} - static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) { rpc_call_start(task); @@ -946,11 +940,10 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .fsinfo = nfs3_proc_fsinfo, .pathconf = nfs3_proc_pathconf, .decode_dirent = nfs3_decode_dirent, + .pgio_rpc_prepare = nfs3_proc_pgio_rpc_prepare, .read_setup = nfs3_proc_read_setup, - .read_rpc_prepare = nfs3_proc_read_rpc_prepare, .read_done = nfs3_read_done, .write_setup = nfs3_proc_write_setup, - .write_rpc_prepare = nfs3_proc_write_rpc_prepare, .write_done = nfs3_write_done, .commit_setup = nfs3_proc_commit_setup, .commit_rpc_prepare = nfs3_proc_commit_rpc_prepare, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e793aa91454a..44fb93a66d26 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4089,7 +4089,7 @@ static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); } -static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) { if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), &data->args.seq_args, @@ -4097,7 +4097,7 @@ static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_dat task)) return 0; if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context, - data->args.lock_context, FMODE_READ) == -EIO) + data->args.lock_context, data->header->rw_ops->rw_mode) == -EIO) return -EIO; if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) return -EIO; @@ -4177,21 +4177,6 @@ static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); } -static int nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) -{ - if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), - &data->args.seq_args, - &data->res.seq_res, - task)) - return 0; - if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context, - data->args.lock_context, FMODE_WRITE) == -EIO) - return -EIO; - if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) - return -EIO; - return 0; -} - static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) { nfs4_setup_sequence(NFS_SERVER(data->inode), @@ -8432,11 +8417,10 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .pathconf = nfs4_proc_pathconf, .set_capabilities = nfs4_server_capabilities, .decode_dirent = nfs4_decode_dirent, + .pgio_rpc_prepare = nfs4_proc_pgio_rpc_prepare, .read_setup = nfs4_proc_read_setup, - .read_rpc_prepare = nfs4_proc_read_rpc_prepare, .read_done = nfs4_read_done, .write_setup = nfs4_proc_write_setup, - .write_rpc_prepare = nfs4_proc_write_rpc_prepare, .write_done = nfs4_write_done, .commit_setup = nfs4_proc_commit_setup, .commit_rpc_prepare = nfs4_proc_commit_rpc_prepare, diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ca356fe0836b..0fa211d35e40 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -386,6 +386,32 @@ void nfs_pgio_data_release(struct nfs_pgio_data *data) } EXPORT_SYMBOL_GPL(nfs_pgio_data_release); +/** + * nfs_pgio_prepare - Prepare pageio data to go over the wire + * @task: The current task + * @calldata: pageio data to prepare + */ +void nfs_pgio_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs_pgio_data *data = calldata; + int err; + err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data); + if (err) + rpc_exit(task, err); +} + +/** + * nfs_pgio_release - Release pageio data + * @calldata: The pageio data to release + */ +void nfs_pgio_release(void *calldata) +{ + struct nfs_pgio_data *data = calldata; + if (data->header->rw_ops->rw_release) + data->header->rw_ops->rw_release(data); + nfs_pgio_data_release(data); +} + /** * nfs_pageio_init - initialise a page io descriptor * @desc: pointer to descriptor diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index c54829eb2156..c171ce1a8a30 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -599,7 +599,7 @@ static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message * msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; } -static int nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) { rpc_call_start(task); return 0; @@ -621,12 +621,6 @@ static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; } -static int nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) -{ - rpc_call_start(task); - return 0; -} - static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) { BUG(); @@ -734,11 +728,10 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .fsinfo = nfs_proc_fsinfo, .pathconf = nfs_proc_pathconf, .decode_dirent = nfs2_decode_dirent, + .pgio_rpc_prepare = nfs_proc_pgio_rpc_prepare, .read_setup = nfs_proc_read_setup, - .read_rpc_prepare = nfs_proc_read_rpc_prepare, .read_done = nfs_read_done, .write_setup = nfs_proc_write_setup, - .write_rpc_prepare = nfs_proc_write_rpc_prepare, .write_done = nfs_write_done, .commit_setup = nfs_proc_commit_setup, .commit_rpc_prepare = nfs_proc_commit_rpc_prepare, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 4cf3577bd54e..cfa15e828dd6 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -454,24 +454,10 @@ static void nfs_readpage_result_common(struct rpc_task *task, void *calldata) nfs_readpage_retry(task, data); } -static void nfs_readpage_release_common(void *calldata) -{ - nfs_pgio_data_release(calldata); -} - -void nfs_read_prepare(struct rpc_task *task, void *calldata) -{ - struct nfs_pgio_data *data = calldata; - int err; - err = NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data); - if (err) - rpc_exit(task, err); -} - static const struct rpc_call_ops nfs_read_common_ops = { - .rpc_call_prepare = nfs_read_prepare, + .rpc_call_prepare = nfs_pgio_prepare, .rpc_call_done = nfs_readpage_result_common, - .rpc_release = nfs_readpage_release_common, + .rpc_release = nfs_pgio_release, }; /* @@ -636,6 +622,7 @@ void nfs_destroy_readpagecache(void) } static const struct nfs_rw_ops nfs_rw_read_ops = { + .rw_mode = FMODE_READ, .rw_alloc_header = nfs_readhdr_alloc, .rw_free_header = nfs_readhdr_free, }; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9c5cde38da45..ae799c96ec2b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1248,15 +1248,6 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); -void nfs_write_prepare(struct rpc_task *task, void *calldata) -{ - struct nfs_pgio_data *data = calldata; - int err; - err = NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data); - if (err) - rpc_exit(task, err); -} - void nfs_commit_prepare(struct rpc_task *task, void *calldata) { struct nfs_commit_data *data = calldata; @@ -1278,9 +1269,8 @@ static void nfs_writeback_done_common(struct rpc_task *task, void *calldata) nfs_writeback_done(task, data); } -static void nfs_writeback_release_common(void *calldata) +static void nfs_writeback_release_common(struct nfs_pgio_data *data) { - struct nfs_pgio_data *data = calldata; struct nfs_pgio_header *hdr = data->header; int status = data->task.tk_status; @@ -1294,13 +1284,12 @@ static void nfs_writeback_release_common(void *calldata) set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); spin_unlock(&hdr->lock); } - nfs_pgio_data_release(data); } static const struct rpc_call_ops nfs_write_common_ops = { - .rpc_call_prepare = nfs_write_prepare, + .rpc_call_prepare = nfs_pgio_prepare, .rpc_call_done = nfs_writeback_done_common, - .rpc_release = nfs_writeback_release_common, + .rpc_release = nfs_pgio_release, }; /* @@ -1918,6 +1907,8 @@ void nfs_destroy_writepagecache(void) } static const struct nfs_rw_ops nfs_rw_write_ops = { + .rw_mode = FMODE_WRITE, .rw_alloc_header = nfs_writehdr_alloc, .rw_free_header = nfs_writehdr_free, + .rw_release = nfs_writeback_release_common, }; diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 594812546c25..da00a4d6f470 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -53,8 +53,10 @@ struct nfs_pageio_ops { }; struct nfs_rw_ops { + const fmode_t rw_mode; struct nfs_rw_header *(*rw_alloc_header)(void); void (*rw_free_header)(struct nfs_rw_header *); + void (*rw_release)(struct nfs_pgio_data *); }; struct nfs_pageio_descriptor { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index a1b91b67145e..adef7bd2d06d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1429,11 +1429,10 @@ struct nfs_rpc_ops { struct nfs_pathconf *); int (*set_capabilities)(struct nfs_server *, struct nfs_fh *); int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int); + int (*pgio_rpc_prepare)(struct rpc_task *, struct nfs_pgio_data *); void (*read_setup) (struct nfs_pgio_data *, struct rpc_message *); - int (*read_rpc_prepare)(struct rpc_task *, struct nfs_pgio_data *); int (*read_done) (struct rpc_task *, struct nfs_pgio_data *); void (*write_setup) (struct nfs_pgio_data *, struct rpc_message *); - int (*write_rpc_prepare)(struct rpc_task *, struct nfs_pgio_data *); int (*write_done) (struct rpc_task *, struct nfs_pgio_data *); void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *); void (*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *); -- cgit v1.2.3-71-gd317 From 0eecb2145c1ce18e36617008424a93836ad0a3bd Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:32 -0400 Subject: NFS: Create a common nfs_pgio_result_common function Combining these functions will let me make a single nfs_rw_common_ops struct (see the next patch). Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 1 + fs/nfs/pagelist.c | 23 ++++++++++++++++++++++ fs/nfs/read.c | 25 ++++++++---------------- fs/nfs/write.c | 51 +++++++++++++++++++----------------------------- include/linux/nfs_fs.h | 2 -- include/linux/nfs_page.h | 2 ++ 6 files changed, 54 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 1959260f8c57..7c0ae364bdad 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -243,6 +243,7 @@ struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *, unsigned int void nfs_pgio_data_release(struct nfs_pgio_data *); void nfs_pgio_prepare(struct rpc_task *, void *); void nfs_pgio_release(void *); +void nfs_pgio_result(struct rpc_task *, void *); static inline void nfs_iocounter_init(struct nfs_io_counter *c) { diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 0fa211d35e40..f74df87058b6 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -24,6 +24,8 @@ #include "internal.h" #include "pnfs.h" +#define NFSDBG_FACILITY NFSDBG_PAGECACHE + static struct kmem_cache *nfs_page_cachep; static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) @@ -447,6 +449,27 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, } EXPORT_SYMBOL_GPL(nfs_pageio_init); +/** + * nfs_pgio_result - Basic pageio error handling + * @task: The task that ran + * @calldata: Pageio data to check + */ +void nfs_pgio_result(struct rpc_task *task, void *calldata) +{ + struct nfs_pgio_data *data = calldata; + struct inode *inode = data->header->inode; + + dprintk("NFS: %s: %5u, (status %d)\n", __func__, + task->tk_pid, task->tk_status); + + if (data->header->rw_ops->rw_done(task, data, inode) != 0) + return; + if (task->tk_status < 0) + nfs_set_pgio_error(data->header, task->tk_status, data->args.offset); + else + data->header->rw_ops->rw_result(task, data); +} + static bool nfs_match_open_context(const struct nfs_open_context *ctx1, const struct nfs_open_context *ctx2) { diff --git a/fs/nfs/read.c b/fs/nfs/read.c index cfa15e828dd6..bc78bd248eb8 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -388,15 +388,10 @@ static const struct nfs_pageio_ops nfs_pageio_read_ops = { * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). */ -int nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data, + struct inode *inode) { - struct inode *inode = data->header->inode; - int status; - - dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, - task->tk_status); - - status = NFS_PROTO(inode)->read_done(task, data); + int status = NFS_PROTO(inode)->read_done(task, data); if (status != 0) return status; @@ -429,17 +424,11 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data rpc_restart_call_prepare(task); } -static void nfs_readpage_result_common(struct rpc_task *task, void *calldata) +static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data) { - struct nfs_pgio_data *data = calldata; struct nfs_pgio_header *hdr = data->header; - /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */ - if (nfs_readpage_result(task, data) != 0) - return; - if (task->tk_status < 0) - nfs_set_pgio_error(hdr, task->tk_status, data->args.offset); - else if (data->res.eof) { + if (data->res.eof) { loff_t bound; bound = data->args.offset + data->res.count; @@ -456,7 +445,7 @@ static void nfs_readpage_result_common(struct rpc_task *task, void *calldata) static const struct rpc_call_ops nfs_read_common_ops = { .rpc_call_prepare = nfs_pgio_prepare, - .rpc_call_done = nfs_readpage_result_common, + .rpc_call_done = nfs_pgio_result, .rpc_release = nfs_pgio_release, }; @@ -625,4 +614,6 @@ static const struct nfs_rw_ops nfs_rw_read_ops = { .rw_mode = FMODE_READ, .rw_alloc_header = nfs_readhdr_alloc, .rw_free_header = nfs_readhdr_free, + .rw_done = nfs_readpage_done, + .rw_result = nfs_readpage_result, }; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ae799c96ec2b..1d3e1d75c8c5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1255,20 +1255,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata) NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); } -/* - * Handle a write reply that flushes a whole page. - * - * FIXME: There is an inherent race with invalidate_inode_pages and - * writebacks since the page->count is kept > 1 for as long - * as the page has a write request pending. - */ -static void nfs_writeback_done_common(struct rpc_task *task, void *calldata) -{ - struct nfs_pgio_data *data = calldata; - - nfs_writeback_done(task, data); -} - static void nfs_writeback_release_common(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; @@ -1288,7 +1274,7 @@ static void nfs_writeback_release_common(struct nfs_pgio_data *data) static const struct rpc_call_ops nfs_write_common_ops = { .rpc_call_prepare = nfs_pgio_prepare, - .rpc_call_done = nfs_writeback_done_common, + .rpc_call_done = nfs_pgio_result, .rpc_release = nfs_pgio_release, }; @@ -1320,16 +1306,11 @@ static int nfs_should_remove_suid(const struct inode *inode) /* * This function is called when the WRITE call is complete. */ -void nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data, + struct inode *inode) { - struct nfs_pgio_args *argp = &data->args; - struct nfs_pgio_res *resp = &data->res; - struct inode *inode = data->header->inode; int status; - dprintk("NFS: %5u nfs_writeback_done (status %d)\n", - task->tk_pid, task->tk_status); - /* * ->write_done will attempt to use post-op attributes to detect * conflicting writes by other clients. A strict interpretation @@ -1339,11 +1320,11 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data) */ status = NFS_PROTO(inode)->write_done(task, data); if (status != 0) - return; - nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count); + return status; + nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count); #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) - if (resp->verf->committed < argp->stable && task->tk_status >= 0) { + if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) { /* We tried a write call, but the server did not * commit data to stable storage even though we * requested it. @@ -1359,25 +1340,31 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data) dprintk("NFS: faulty NFS server %s:" " (committed = %d) != (stable = %d)\n", NFS_SERVER(inode)->nfs_client->cl_hostname, - resp->verf->committed, argp->stable); + data->res.verf->committed, data->args.stable); complain = jiffies + 300 * HZ; } } #endif - if (task->tk_status < 0) { - nfs_set_pgio_error(data->header, task->tk_status, argp->offset); - return; - } /* Deal with the suid/sgid bit corner case */ if (nfs_should_remove_suid(inode)) nfs_mark_for_revalidate(inode); + return 0; +} + +/* + * This function is called when the WRITE call is complete. + */ +static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data) +{ + struct nfs_pgio_args *argp = &data->args; + struct nfs_pgio_res *resp = &data->res; if (resp->count < argp->count) { static unsigned long complain; /* This a short write! */ - nfs_inc_stats(inode, NFSIOS_SHORTWRITE); + nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE); /* Has the server at least made some progress? */ if (resp->count == 0) { @@ -1911,4 +1898,6 @@ static const struct nfs_rw_ops nfs_rw_write_ops = { .rw_alloc_header = nfs_writehdr_alloc, .rw_free_header = nfs_writehdr_free, .rw_release = nfs_writeback_release_common, + .rw_done = nfs_writeback_done, + .rw_result = nfs_writeback_result, }; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 7e0db561d829..919576b8e2cf 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -520,7 +520,6 @@ extern int nfs_writepage(struct page *page, struct writeback_control *wbc); extern int nfs_writepages(struct address_space *, struct writeback_control *); extern int nfs_flush_incompatible(struct file *file, struct page *page); extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); -extern void nfs_writeback_done(struct rpc_task *, struct nfs_pgio_data *); /* * Try to write back everything synchronously (but check the @@ -553,7 +552,6 @@ nfs_have_writebacks(struct inode *inode) extern int nfs_readpage(struct file *, struct page *); extern int nfs_readpages(struct file *, struct address_space *, struct list_head *, unsigned); -extern int nfs_readpage_result(struct rpc_task *, struct nfs_pgio_data *); extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, struct page *); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index da00a4d6f470..01aa29c5ec42 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -57,6 +57,8 @@ struct nfs_rw_ops { struct nfs_rw_header *(*rw_alloc_header)(void); void (*rw_free_header)(struct nfs_rw_header *); void (*rw_release)(struct nfs_pgio_data *); + int (*rw_done)(struct rpc_task *, struct nfs_pgio_data *, struct inode *); + void (*rw_result)(struct rpc_task *, struct nfs_pgio_data *); }; struct nfs_pageio_descriptor { -- cgit v1.2.3-71-gd317 From b98abe52fa8e2a3797d3cc2db3d0e109f4549c03 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Wed, 28 May 2014 23:51:53 -0700 Subject: Input: add common DT binding for touchscreens Add common DT binding documentation for touchscreen devices and implement input_parse_touchscreen_of_params, which parses the common properties and configures the input device accordingly. The method currently does not interpret the axis inversion properties, since there is no matching flag in the generic linux input device. Reviewed-by: Pavel Machek Acked-by: Aaro Koskinen Signed-off-by: Sebastian Reichel Signed-off-by: Dmitry Torokhov --- .../bindings/input/touchscreen/touchscreen.txt | 27 +++++++++++++ drivers/input/touchscreen/Kconfig | 4 ++ drivers/input/touchscreen/Makefile | 1 + drivers/input/touchscreen/of_touchscreen.c | 45 ++++++++++++++++++++++ include/linux/input/touchscreen.h | 22 +++++++++++ 5 files changed, 99 insertions(+) create mode 100644 Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt create mode 100644 drivers/input/touchscreen/of_touchscreen.c create mode 100644 include/linux/input/touchscreen.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt b/Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt new file mode 100644 index 000000000000..d8e06163c54e --- /dev/null +++ b/Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt @@ -0,0 +1,27 @@ +General Touchscreen Properties: + +Optional properties for Touchscreens: + - touchscreen-size-x : horizontal resolution of touchscreen + (in pixels) + - touchscreen-size-y : vertical resolution of touchscreen + (in pixels) + - touchscreen-max-pressure : maximum reported pressure (arbitrary range + dependent on the controller) + - touchscreen-fuzz-x : horizontal noise value of the absolute input + device (in pixels) + - touchscreen-fuzz-y : vertical noise value of the absolute input + device (in pixels) + - touchscreen-fuzz-pressure : pressure noise value of the absolute input + device (arbitrary range dependent on the + controller) + - touchscreen-inverted-x : X axis is inverted (boolean) + - touchscreen-inverted-y : Y axis is inverted (boolean) + +Deprecated properties for Touchscreens: + - x-size : deprecated name for touchscreen-size-x + - y-size : deprecated name for touchscreen-size-y + - moving-threshold : deprecated name for a combination of + touchscreen-fuzz-x and touchscreen-fuzz-y + - contact-threshold : deprecated name for touchscreen-fuzz-pressure + - x-invert : deprecated name for touchscreen-inverted-x + - y-invert : deprecated name for touchscreen-inverted-y diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index 43389c097b02..8e07fe8505fd 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -11,6 +11,10 @@ menuconfig INPUT_TOUCHSCREEN if INPUT_TOUCHSCREEN +config OF_TOUCHSCREEN + def_tristate INPUT + depends on INPUT && OF + config TOUCHSCREEN_88PM860X tristate "Marvell 88PM860x touchscreen" depends on MFD_88PM860X diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile index 71a97559ce68..4d479fb0a768 100644 --- a/drivers/input/touchscreen/Makefile +++ b/drivers/input/touchscreen/Makefile @@ -6,6 +6,7 @@ wm97xx-ts-y := wm97xx-core.o +obj-$(CONFIG_OF_TOUCHSCREEN) += of_touchscreen.o obj-$(CONFIG_TOUCHSCREEN_88PM860X) += 88pm860x-ts.o obj-$(CONFIG_TOUCHSCREEN_AD7877) += ad7877.o obj-$(CONFIG_TOUCHSCREEN_AD7879) += ad7879.o diff --git a/drivers/input/touchscreen/of_touchscreen.c b/drivers/input/touchscreen/of_touchscreen.c new file mode 100644 index 000000000000..f8f9b84230b1 --- /dev/null +++ b/drivers/input/touchscreen/of_touchscreen.c @@ -0,0 +1,45 @@ +/* + * Generic DT helper functions for touchscreen devices + * + * Copyright (c) 2014 Sebastian Reichel + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include + +/** + * touchscreen_parse_of_params - parse common touchscreen DT properties + * @dev: device that should be parsed + * + * This function parses common DT properties for touchscreens and setups the + * input device accordingly. The function keeps previously setuped default + * values if no value is specified via DT. + */ +void touchscreen_parse_of_params(struct input_dev *dev) +{ + struct device_node *np = dev->dev.parent->of_node; + struct input_absinfo *absinfo; + + input_alloc_absinfo(dev); + if (!dev->absinfo) + return; + + absinfo = &dev->absinfo[ABS_X]; + of_property_read_u32(np, "touchscreen-size-x", &absinfo->maximum); + of_property_read_u32(np, "touchscreen-fuzz-x", &absinfo->fuzz); + + absinfo = &dev->absinfo[ABS_Y]; + of_property_read_u32(np, "touchscreen-size-y", &absinfo->maximum); + of_property_read_u32(np, "touchscreen-fuzz-y", &absinfo->fuzz); + + absinfo = &dev->absinfo[ABS_PRESSURE]; + of_property_read_u32(np, "touchscreen-max-pressure", &absinfo->maximum); + of_property_read_u32(np, "touchscreen-fuzz-pressure", &absinfo->fuzz); +} +EXPORT_SYMBOL(touchscreen_parse_of_params); diff --git a/include/linux/input/touchscreen.h b/include/linux/input/touchscreen.h new file mode 100644 index 000000000000..08a5ef6e8f25 --- /dev/null +++ b/include/linux/input/touchscreen.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2014 Sebastian Reichel + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#ifndef _TOUCHSCREEN_H +#define _TOUCHSCREEN_H + +#include + +#ifdef CONFIG_OF +void touchscreen_parse_of_params(struct input_dev *dev); +#else +static inline void touchscreen_parse_of_params(struct input_dev *dev) +{ +} +#endif + +#endif -- cgit v1.2.3-71-gd317 From 61721c88b8d85c9dc13bfeedf75dfc245f397c3c Mon Sep 17 00:00:00 2001 From: Joachim Eastwood Date: Thu, 29 May 2014 00:30:02 -0700 Subject: Input: omap-keypad - remove platform data support This is unused since all users (OMAP4/5) are DT only. Signed-off-by: Joachim Eastwood Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/Kconfig | 2 +- drivers/input/keyboard/omap4-keypad.c | 32 ++++++------------------------ include/linux/platform_data/omap4-keypad.h | 13 ------------ 3 files changed, 7 insertions(+), 40 deletions(-) delete mode 100644 include/linux/platform_data/omap4-keypad.h (limited to 'include') diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index 948a30304870..0f84f2346fe4 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -589,7 +589,7 @@ config KEYBOARD_OMAP config KEYBOARD_OMAP4 tristate "TI OMAP4+ keypad support" - depends on ARCH_OMAP2PLUS + depends on OF || ARCH_OMAP2PLUS select INPUT_MATRIXKMAP help Say Y here if you want to use the OMAP4+ keypad. diff --git a/drivers/input/keyboard/omap4-keypad.c b/drivers/input/keyboard/omap4-keypad.c index 0400b3f2b4b9..024b7bdffe5b 100644 --- a/drivers/input/keyboard/omap4-keypad.c +++ b/drivers/input/keyboard/omap4-keypad.c @@ -28,11 +28,10 @@ #include #include #include +#include #include #include -#include - /* OMAP4 registers */ #define OMAP4_KBD_REVISION 0x00 #define OMAP4_KBD_SYSCONFIG 0x10 @@ -218,7 +217,6 @@ static void omap4_keypad_close(struct input_dev *input) pm_runtime_put_sync(input->dev.parent); } -#ifdef CONFIG_OF static int omap4_keypad_parse_dt(struct device *dev, struct omap4_keypad *keypad_data) { @@ -235,20 +233,9 @@ static int omap4_keypad_parse_dt(struct device *dev, return 0; } -#else -static inline int omap4_keypad_parse_dt(struct device *dev, - struct omap4_keypad *keypad_data) -{ - return -ENOSYS; -} -#endif static int omap4_keypad_probe(struct platform_device *pdev) { - const struct omap4_keypad_platform_data *pdata = - dev_get_platdata(&pdev->dev); - const struct matrix_keymap_data *keymap_data = - pdata ? pdata->keymap_data : NULL; struct omap4_keypad *keypad_data; struct input_dev *input_dev; struct resource *res; @@ -277,14 +264,9 @@ static int omap4_keypad_probe(struct platform_device *pdev) keypad_data->irq = irq; - if (pdata) { - keypad_data->rows = pdata->rows; - keypad_data->cols = pdata->cols; - } else { - error = omap4_keypad_parse_dt(&pdev->dev, keypad_data); - if (error) - return error; - } + error = omap4_keypad_parse_dt(&pdev->dev, keypad_data); + if (error) + return error; res = request_mem_region(res->start, resource_size(res), pdev->name); if (!res) { @@ -363,7 +345,7 @@ static int omap4_keypad_probe(struct platform_device *pdev) goto err_free_input; } - error = matrix_keypad_build_keymap(keymap_data, NULL, + error = matrix_keypad_build_keymap(NULL, NULL, keypad_data->rows, keypad_data->cols, keypad_data->keymap, input_dev); if (error) { @@ -434,13 +416,11 @@ static int omap4_keypad_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_OF static const struct of_device_id omap_keypad_dt_match[] = { { .compatible = "ti,omap4-keypad" }, {}, }; MODULE_DEVICE_TABLE(of, omap_keypad_dt_match); -#endif #ifdef CONFIG_PM_SLEEP static int omap4_keypad_suspend(struct device *dev) @@ -482,7 +462,7 @@ static struct platform_driver omap4_keypad_driver = { .name = "omap4-keypad", .owner = THIS_MODULE, .pm = &omap4_keypad_pm_ops, - .of_match_table = of_match_ptr(omap_keypad_dt_match), + .of_match_table = omap_keypad_dt_match, }, }; module_platform_driver(omap4_keypad_driver); diff --git a/include/linux/platform_data/omap4-keypad.h b/include/linux/platform_data/omap4-keypad.h deleted file mode 100644 index 4eef5fb05a17..000000000000 --- a/include/linux/platform_data/omap4-keypad.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __LINUX_INPUT_OMAP4_KEYPAD_H -#define __LINUX_INPUT_OMAP4_KEYPAD_H - -#include - -struct omap4_keypad_platform_data { - const struct matrix_keymap_data *keymap_data; - - u8 rows; - u8 cols; -}; - -#endif /* __LINUX_INPUT_OMAP4_KEYPAD_H */ -- cgit v1.2.3-71-gd317 From 1ed26f33008e954a8e91d26f97d4380dea8145db Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:37 -0400 Subject: NFS: Create a common initiate_pgio() function Most of this code is the same for both the read and write paths, so combine everything and use the rw_ops when necessary. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 9 ++------ fs/nfs/nfs4filelayout.c | 6 +++--- fs/nfs/pagelist.c | 46 ++++++++++++++++++++++++++++++++++++++++ fs/nfs/read.c | 42 ++++++------------------------------ fs/nfs/write.c | 55 ++++++------------------------------------------ include/linux/nfs_page.h | 2 ++ 6 files changed, 66 insertions(+), 94 deletions(-) (limited to 'include') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 365cdb11d0de..be4f2a7e9178 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -241,6 +241,8 @@ struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *); void nfs_rw_header_free(struct nfs_pgio_header *); void nfs_pgio_data_release(struct nfs_pgio_data *); int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); +int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *, + const struct rpc_call_ops *, int, int); static inline void nfs_iocounter_init(struct nfs_io_counter *c) { @@ -402,9 +404,6 @@ struct nfs_pgio_completion_ops; extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops); -extern int nfs_initiate_read(struct rpc_clnt *clnt, - struct nfs_pgio_data *data, - const struct rpc_call_ops *call_ops, int flags); extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); @@ -425,10 +424,6 @@ extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, const struct nfs_pgio_completion_ops *compl_ops); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_commit_free(struct nfs_commit_data *p); -extern int nfs_initiate_write(struct rpc_clnt *clnt, - struct nfs_pgio_data *data, - const struct rpc_call_ops *call_ops, - int how, int flags); extern void nfs_write_prepare(struct rpc_task *task, void *calldata); extern void nfs_commit_prepare(struct rpc_task *task, void *calldata); extern int nfs_initiate_commit(struct rpc_clnt *clnt, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index e6936147ad95..7954e16a6d83 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -568,8 +568,8 @@ filelayout_read_pagelist(struct nfs_pgio_data *data) data->mds_offset = offset; /* Perform an asynchronous read to ds */ - nfs_initiate_read(ds_clnt, data, - &filelayout_read_call_ops, RPC_TASK_SOFTCONN); + nfs_initiate_pgio(ds_clnt, data, + &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN); return PNFS_ATTEMPTED; } @@ -613,7 +613,7 @@ filelayout_write_pagelist(struct nfs_pgio_data *data, int sync) data->args.offset = filelayout_get_dserver_offset(lseg, offset); /* Perform an asynchronous write */ - nfs_initiate_write(ds_clnt, data, + nfs_initiate_pgio(ds_clnt, data, &filelayout_write_call_ops, sync, RPC_TASK_SOFTCONN); return PNFS_ATTEMPTED; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index d8d25a4deb88..ab5b1850ca4f 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -447,6 +447,52 @@ static void nfs_pgio_prepare(struct rpc_task *task, void *calldata) rpc_exit(task, err); } +int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data, + const struct rpc_call_ops *call_ops, int how, int flags) +{ + struct rpc_task *task; + struct rpc_message msg = { + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = data->header->cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = clnt, + .task = &data->task, + .rpc_message = &msg, + .callback_ops = call_ops, + .callback_data = data, + .workqueue = nfsiod_workqueue, + .flags = RPC_TASK_ASYNC | flags, + }; + int ret = 0; + + data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how); + + dprintk("NFS: %5u initiated pgio call " + "(req %s/%llu, %u bytes @ offset %llu)\n", + data->task.tk_pid, + data->header->inode->i_sb->s_id, + (unsigned long long)NFS_FILEID(data->header->inode), + data->args.count, + (unsigned long long)data->args.offset); + + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) { + ret = PTR_ERR(task); + goto out; + } + if (how & FLUSH_SYNC) { + ret = rpc_wait_for_completion_task(task); + if (ret == 0) + ret = task->tk_status; + } + rpc_put_task(task); +out: + return ret; +} +EXPORT_SYMBOL_GPL(nfs_initiate_pgio); + /** * nfs_pgio_error - Clean up from a pageio error * @desc: IO descriptor diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 4fcef82d78b4..0359b0d76ef6 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -151,53 +151,22 @@ out: hdr->release(hdr); } -int nfs_initiate_read(struct rpc_clnt *clnt, - struct nfs_pgio_data *data, - const struct rpc_call_ops *call_ops, int flags) +static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg, + struct rpc_task_setup *task_setup_data, int how) { struct inode *inode = data->header->inode; int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; - struct rpc_task *task; - struct rpc_message msg = { - .rpc_argp = &data->args, - .rpc_resp = &data->res, - .rpc_cred = data->header->cred, - }; - struct rpc_task_setup task_setup_data = { - .task = &data->task, - .rpc_client = clnt, - .rpc_message = &msg, - .callback_ops = call_ops, - .callback_data = data, - .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC | swap_flags | flags, - }; - /* Set up the initial task struct. */ - NFS_PROTO(inode)->read_setup(data, &msg); - - dprintk("NFS: %5u initiated read call (req %s/%llu, %u bytes @ " - "offset %llu)\n", - data->task.tk_pid, - inode->i_sb->s_id, - (unsigned long long)NFS_FILEID(inode), - data->args.count, - (unsigned long long)data->args.offset); - - task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) - return PTR_ERR(task); - rpc_put_task(task); - return 0; + task_setup_data->flags |= swap_flags; + NFS_PROTO(inode)->read_setup(data, msg); } -EXPORT_SYMBOL_GPL(nfs_initiate_read); static int nfs_do_read(struct nfs_pgio_data *data, const struct rpc_call_ops *call_ops) { struct inode *inode = data->header->inode; - return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0); + return nfs_initiate_pgio(NFS_CLIENT(inode), data, call_ops, 0, 0); } static int @@ -491,4 +460,5 @@ static const struct nfs_rw_ops nfs_rw_read_ops = { .rw_free_header = nfs_readhdr_free, .rw_done = nfs_readpage_done, .rw_result = nfs_readpage_result, + .rw_initiate = nfs_initiate_read, }; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0e34c7024195..e46a1fc6c1fe 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -932,60 +932,18 @@ static int flush_task_priority(int how) return RPC_PRIORITY_NORMAL; } -int nfs_initiate_write(struct rpc_clnt *clnt, - struct nfs_pgio_data *data, - const struct rpc_call_ops *call_ops, - int how, int flags) +static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg, + struct rpc_task_setup *task_setup_data, int how) { struct inode *inode = data->header->inode; int priority = flush_task_priority(how); - struct rpc_task *task; - struct rpc_message msg = { - .rpc_argp = &data->args, - .rpc_resp = &data->res, - .rpc_cred = data->header->cred, - }; - struct rpc_task_setup task_setup_data = { - .rpc_client = clnt, - .task = &data->task, - .rpc_message = &msg, - .callback_ops = call_ops, - .callback_data = data, - .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC | flags, - .priority = priority, - }; - int ret = 0; - /* Set up the initial task struct. */ - NFS_PROTO(inode)->write_setup(data, &msg); - - dprintk("NFS: %5u initiated write call " - "(req %s/%llu, %u bytes @ offset %llu)\n", - data->task.tk_pid, - inode->i_sb->s_id, - (unsigned long long)NFS_FILEID(inode), - data->args.count, - (unsigned long long)data->args.offset); + task_setup_data->priority = priority; + NFS_PROTO(inode)->write_setup(data, msg); nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client, - &task_setup_data.rpc_client, &msg, data); - - task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) { - ret = PTR_ERR(task); - goto out; - } - if (how & FLUSH_SYNC) { - ret = rpc_wait_for_completion_task(task); - if (ret == 0) - ret = task->tk_status; - } - rpc_put_task(task); -out: - return ret; + &task_setup_data->rpc_client, msg, data); } -EXPORT_SYMBOL_GPL(nfs_initiate_write); static int nfs_do_write(struct nfs_pgio_data *data, const struct rpc_call_ops *call_ops, @@ -993,7 +951,7 @@ static int nfs_do_write(struct nfs_pgio_data *data, { struct inode *inode = data->header->inode; - return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0); + return nfs_initiate_pgio(NFS_CLIENT(inode), data, call_ops, how, 0); } static int nfs_do_multiple_writes(struct list_head *head, @@ -1743,4 +1701,5 @@ static const struct nfs_rw_ops nfs_rw_write_ops = { .rw_release = nfs_writeback_release_common, .rw_done = nfs_writeback_done, .rw_result = nfs_writeback_result, + .rw_initiate = nfs_initiate_write, }; diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 01aa29c5ec42..c6a587f7118f 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -59,6 +59,8 @@ struct nfs_rw_ops { void (*rw_release)(struct nfs_pgio_data *); int (*rw_done)(struct rpc_task *, struct nfs_pgio_data *, struct inode *); void (*rw_result)(struct rpc_task *, struct nfs_pgio_data *); + void (*rw_initiate)(struct nfs_pgio_data *, struct rpc_message *, + struct rpc_task_setup *, int); }; struct nfs_pageio_descriptor { -- cgit v1.2.3-71-gd317 From 12c05792599ec57ebab33096b2c75b863dfe6ea4 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:41 -0400 Subject: nfs: clean up PG_* flags Remove unused flags PG_NEED_COMMIT and PG_NEED_RESCHED. Add comments describing how each flag is used. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- include/linux/nfs_page.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index c6a587f7118f..eb2eb6396874 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -22,12 +22,10 @@ * Valid flags for a dirty buffer */ enum { - PG_BUSY = 0, - PG_MAPPED, - PG_CLEAN, - PG_NEED_COMMIT, - PG_NEED_RESCHED, - PG_COMMIT_TO_DS, + PG_BUSY = 0, /* nfs_{un}lock_request */ + PG_MAPPED, /* page private set for buffered io */ + PG_CLEAN, /* write succeeded */ + PG_COMMIT_TO_DS, /* used by pnfs layouts */ }; struct nfs_inode; -- cgit v1.2.3-71-gd317 From 8c8f1ac109726e4ed44a920f5c962c84610d4a17 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:42 -0400 Subject: nfs: remove unused arg from nfs_create_request @inode is passed but not used. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 6 ++---- fs/nfs/pagelist.c | 4 +--- fs/nfs/read.c | 5 ++--- fs/nfs/write.c | 2 +- include/linux/nfs_page.h | 1 - 5 files changed, 6 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 164b0167677b..1dd8c622d719 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -380,8 +380,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de struct nfs_page *req; unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); /* XXX do we need to do the eof zeroing found in async_filler? */ - req = nfs_create_request(dreq->ctx, dreq->inode, - pagevec[i], + req = nfs_create_request(dreq->ctx, pagevec[i], pgbase, req_len); if (IS_ERR(req)) { result = PTR_ERR(req); @@ -750,8 +749,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d struct nfs_page *req; unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); - req = nfs_create_request(dreq->ctx, dreq->inode, - pagevec[i], + req = nfs_create_request(dreq->ctx, pagevec[i], pgbase, req_len); if (IS_ERR(req)) { result = PTR_ERR(req); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 29591094125a..4b4b212ec6b2 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -139,7 +139,6 @@ nfs_iocounter_wait(struct nfs_io_counter *c) /** * nfs_create_request - Create an NFS read/write request. * @ctx: open context to use - * @inode: inode to which the request is attached * @page: page to write * @offset: starting offset within the page for the write * @count: number of bytes to read/write @@ -149,8 +148,7 @@ nfs_iocounter_wait(struct nfs_io_counter *c) * User should ensure it is safe to sleep in this function. */ struct nfs_page * -nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, - struct page *page, +nfs_create_request(struct nfs_open_context *ctx, struct page *page, unsigned int offset, unsigned int count) { struct nfs_page *req; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 3986668e4390..46d90448f69b 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -85,7 +85,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, len = nfs_page_length(page); if (len == 0) return nfs_return_empty_page(page); - new = nfs_create_request(ctx, inode, page, 0, len); + new = nfs_create_request(ctx, page, 0, len); if (IS_ERR(new)) { unlock_page(page); return PTR_ERR(new); @@ -303,7 +303,6 @@ static int readpage_async_filler(void *data, struct page *page) { struct nfs_readdesc *desc = (struct nfs_readdesc *)data; - struct inode *inode = page_file_mapping(page)->host; struct nfs_page *new; unsigned int len; int error; @@ -312,7 +311,7 @@ readpage_async_filler(void *data, struct page *page) if (len == 0) return nfs_return_empty_page(page); - new = nfs_create_request(desc->ctx, inode, page, 0, len); + new = nfs_create_request(desc->ctx, page, 0, len); if (IS_ERR(new)) goto out_error; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2680f29f8a51..e773df207c05 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -761,7 +761,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, req = nfs_try_to_update_request(inode, page, offset, bytes); if (req != NULL) goto out; - req = nfs_create_request(ctx, inode, page, offset, bytes); + req = nfs_create_request(ctx, page, offset, bytes); if (IS_ERR(req)) goto out; nfs_inode_add_request(inode, req); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index eb2eb6396874..be0b0981e7a0 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -85,7 +85,6 @@ struct nfs_pageio_descriptor { #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, - struct inode *inode, struct page *page, unsigned int offset, unsigned int count); -- cgit v1.2.3-71-gd317 From b4fdac1a5150174df0847a45dc6612ce5ce3daeb Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:43 -0400 Subject: nfs: modify pg_test interface to return size_t This is a step toward allowing pg_test to inform the the coalescing code to reduce the size of requests so they may fit in whatever scheme the pg_test callback wants to define. For now, just return the size of the request if there is space, or 0 if there is not. This shouldn't change any behavior as it acts the same as when the pg_test functions returned bool. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 16 ++++++++++++---- fs/nfs/nfs4filelayout.c | 12 +++++++----- fs/nfs/objlayout/objio_osd.c | 15 ++++++++++----- fs/nfs/pagelist.c | 22 +++++++++++++++++++--- fs/nfs/pnfs.c | 12 +++++++++--- fs/nfs/pnfs.h | 3 ++- include/linux/nfs_page.h | 5 +++-- 7 files changed, 62 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 206cc68c9694..9b431f44fad9 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -1189,13 +1189,17 @@ bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) pnfs_generic_pg_init_read(pgio, req); } -static bool +/* + * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number + * of bytes (maximum @req->wb_bytes) that can be coalesced. + */ +static size_t bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { if (pgio->pg_dreq != NULL && !is_aligned_req(req, SECTOR_SIZE)) - return false; + return 0; return pnfs_generic_pg_test(pgio, prev, req); } @@ -1241,13 +1245,17 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) } } -static bool +/* + * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number + * of bytes (maximum @req->wb_bytes) that can be coalesced. + */ +static size_t bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { if (pgio->pg_dreq != NULL && !is_aligned_req(req, PAGE_CACHE_SIZE)) - return false; + return 0; return pnfs_generic_pg_test(pgio, prev, req); } diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 9fd7cebbff04..ba9a9aadf6c8 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -915,10 +915,10 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, /* * filelayout_pg_test(). Called by nfs_can_coalesce_requests() * - * return true : coalesce page - * return false : don't coalesce page + * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number + * of bytes (maximum @req->wb_bytes) that can be coalesced. */ -static bool +static size_t filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { @@ -927,7 +927,7 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, if (!pnfs_generic_pg_test(pgio, prev, req) || !nfs_generic_pg_test(pgio, prev, req)) - return false; + return 0; p_stripe = (u64)req_offset(prev); r_stripe = (u64)req_offset(req); @@ -936,7 +936,9 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, do_div(p_stripe, stripe_unit); do_div(r_stripe, stripe_unit); - return (p_stripe == r_stripe); + if (p_stripe == r_stripe) + return req->wb_bytes; + return 0; } static void diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 426b366b0b33..71b9c69dbe9c 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -564,14 +564,19 @@ int objio_write_pagelist(struct nfs_pgio_data *wdata, int how) return 0; } -static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, +/* + * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number + * of bytes (maximum @req->wb_bytes) that can be coalesced. + */ +static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { - if (!pnfs_generic_pg_test(pgio, prev, req)) - return false; + if (!pnfs_generic_pg_test(pgio, prev, req) || + pgio->pg_count + req->wb_bytes > + (unsigned long)pgio->pg_layout_private) + return 0; - return pgio->pg_count + req->wb_bytes <= - (unsigned long)pgio->pg_layout_private; + return req->wb_bytes; } static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 4b4b212ec6b2..82233431880d 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -280,7 +280,17 @@ nfs_wait_on_request(struct nfs_page *req) TASK_UNINTERRUPTIBLE); } -bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) +/* + * nfs_generic_pg_test - determine if requests can be coalesced + * @desc: pointer to descriptor + * @prev: previous request in desc, or NULL + * @req: this request + * + * Returns zero if @req can be coalesced into @desc, otherwise it returns + * the size of the request. + */ +size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, + struct nfs_page *prev, struct nfs_page *req) { /* * FIXME: ideally we should be able to coalesce all requests @@ -292,7 +302,9 @@ bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *pr if (desc->pg_bsize < PAGE_SIZE) return 0; - return desc->pg_count + req->wb_bytes <= desc->pg_bsize; + if (desc->pg_count + req->wb_bytes <= desc->pg_bsize) + return req->wb_bytes; + return 0; } EXPORT_SYMBOL_GPL(nfs_generic_pg_test); @@ -747,6 +759,8 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, struct nfs_page *req, struct nfs_pageio_descriptor *pgio) { + size_t size; + if (!nfs_match_open_context(req->wb_context, prev->wb_context)) return false; if (req->wb_context->dentry->d_inode->i_flock != NULL && @@ -758,7 +772,9 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, return false; if (req_offset(req) != req_offset(prev) + prev->wb_bytes) return false; - return pgio->pg_ops->pg_test(pgio, prev, req); + size = pgio->pg_ops->pg_test(pgio, prev, req); + WARN_ON_ONCE(size && size != req->wb_bytes); + return size > 0; } /** diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0fe670189fd1..de6eb16f94d1 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1434,7 +1434,11 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, } EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); -bool +/* + * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number + * of bytes (maximum @req->wb_bytes) that can be coalesced. + */ +size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { @@ -1455,8 +1459,10 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, * first byte that lies outside the pnfs_layout_range. FIXME? * */ - return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset, - pgio->pg_lseg->pls_range.length); + if (req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset, + pgio->pg_lseg->pls_range.length)) + return req->wb_bytes; + return 0; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 0031267d7492..dccf182ec4d8 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -187,7 +187,8 @@ int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req, u64 wb_size); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); -bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); +size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, + struct nfs_page *prev, struct nfs_page *req); void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg); struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index be0b0981e7a0..13d59af561f6 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -46,7 +46,8 @@ struct nfs_page { struct nfs_pageio_descriptor; struct nfs_pageio_ops { void (*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *); - bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); + size_t (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, + struct nfs_page *); int (*pg_doio)(struct nfs_pageio_descriptor *); }; @@ -102,7 +103,7 @@ extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, struct nfs_page *); extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc); extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t); -extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, +extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req); extern int nfs_wait_on_request(struct nfs_page *); -- cgit v1.2.3-71-gd317 From 2bfc6e566daa8386c9cffef2f7de17fc330d3835 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:45 -0400 Subject: nfs: add support for multiple nfs reqs per page Add "page groups" - a circular list of nfs requests (struct nfs_page) that all reference the same page. This gives nfs read and write paths the ability to account for sub-page regions independently. This somewhat follows the design of struct buffer_head's sub-page accounting. Only "head" requests are ever added/removed from the inode list in the buffered write path. "head" and "sub" requests are treated the same through the read path and the rest of the write/commit path. Requests are given an extra reference across the life of the list. Page groups are never rejoined after being split. If the read/write request fails and the client falls back to another path (ie revert to MDS in PNFS case), the already split requests are pushed through the recoalescing code again, which may split them further and then coalesce them into properly sized requests on the wire. Fragmentation shouldn't be a problem with the current design, because we flush all requests in page group when a non-contiguous request is added, so the only time resplitting should occur is on a resend of a read or write. This patch lays the groundwork for sub-page splitting, but does not actually do any splitting. For now all page groups have one request as pg_test functions don't yet split pages. There are several related patches that are needed support multiple requests per page group. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 7 +- fs/nfs/pagelist.c | 220 ++++++++++++++++++++++++++++++++++++++++++++--- fs/nfs/read.c | 4 +- fs/nfs/write.c | 13 ++- include/linux/nfs_page.h | 13 ++- 5 files changed, 236 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 1dd8c622d719..2c0e08f4cf71 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -380,7 +380,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de struct nfs_page *req; unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); /* XXX do we need to do the eof zeroing found in async_filler? */ - req = nfs_create_request(dreq->ctx, pagevec[i], + req = nfs_create_request(dreq->ctx, pagevec[i], NULL, pgbase, req_len); if (IS_ERR(req)) { result = PTR_ERR(req); @@ -749,7 +749,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d struct nfs_page *req; unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); - req = nfs_create_request(dreq->ctx, pagevec[i], + req = nfs_create_request(dreq->ctx, pagevec[i], NULL, pgbase, req_len); if (IS_ERR(req)) { result = PTR_ERR(req); @@ -827,6 +827,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) spin_unlock(&dreq->lock); while (!list_empty(&hdr->pages)) { + bool do_destroy = true; + req = nfs_list_entry(hdr->pages.next); nfs_list_remove_request(req); switch (bit) { @@ -834,6 +836,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) case NFS_IOHDR_NEED_COMMIT: kref_get(&req->wb_kref); nfs_mark_request_commit(req, hdr->lseg, &cinfo); + do_destroy = false; } nfs_unlock_and_release_request(req); } diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index f343f49ff596..015fb7b48dfe 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -29,6 +29,8 @@ static struct kmem_cache *nfs_page_cachep; static const struct rpc_call_ops nfs_pgio_common_ops; +static void nfs_free_request(struct nfs_page *); + static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) { p->npages = pagecount; @@ -136,10 +138,151 @@ nfs_iocounter_wait(struct nfs_io_counter *c) return __nfs_iocounter_wait(c); } +/* + * nfs_page_group_lock - lock the head of the page group + * @req - request in group that is to be locked + * + * this lock must be held if modifying the page group list + */ +void +nfs_page_group_lock(struct nfs_page *req) +{ + struct nfs_page *head = req->wb_head; + int err = -EAGAIN; + + WARN_ON_ONCE(head != head->wb_head); + + while (err) + err = wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, + nfs_wait_bit_killable, TASK_KILLABLE); +} + +/* + * nfs_page_group_unlock - unlock the head of the page group + * @req - request in group that is to be unlocked + */ +void +nfs_page_group_unlock(struct nfs_page *req) +{ + struct nfs_page *head = req->wb_head; + + WARN_ON_ONCE(head != head->wb_head); + + smp_mb__before_clear_bit(); + clear_bit(PG_HEADLOCK, &head->wb_flags); + smp_mb__after_clear_bit(); + wake_up_bit(&head->wb_flags, PG_HEADLOCK); +} + +/* + * nfs_page_group_sync_on_bit_locked + * + * must be called with page group lock held + */ +static bool +nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) +{ + struct nfs_page *head = req->wb_head; + struct nfs_page *tmp; + + WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_flags)); + WARN_ON_ONCE(test_and_set_bit(bit, &req->wb_flags)); + + tmp = req->wb_this_page; + while (tmp != req) { + if (!test_bit(bit, &tmp->wb_flags)) + return false; + tmp = tmp->wb_this_page; + } + + /* true! reset all bits */ + tmp = req; + do { + clear_bit(bit, &tmp->wb_flags); + tmp = tmp->wb_this_page; + } while (tmp != req); + + return true; +} + +/* + * nfs_page_group_sync_on_bit - set bit on current request, but only + * return true if the bit is set for all requests in page group + * @req - request in page group + * @bit - PG_* bit that is used to sync page group + */ +bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit) +{ + bool ret; + + nfs_page_group_lock(req); + ret = nfs_page_group_sync_on_bit_locked(req, bit); + nfs_page_group_unlock(req); + + return ret; +} + +/* + * nfs_page_group_init - Initialize the page group linkage for @req + * @req - a new nfs request + * @prev - the previous request in page group, or NULL if @req is the first + * or only request in the group (the head). + */ +static inline void +nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) +{ + WARN_ON_ONCE(prev == req); + + if (!prev) { + req->wb_head = req; + req->wb_this_page = req; + } else { + WARN_ON_ONCE(prev->wb_this_page != prev->wb_head); + WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &prev->wb_head->wb_flags)); + req->wb_head = prev->wb_head; + req->wb_this_page = prev->wb_this_page; + prev->wb_this_page = req; + + /* grab extra ref if head request has extra ref from + * the write/commit path to handle handoff between write + * and commit lists */ + if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) + kref_get(&req->wb_kref); + } +} + +/* + * nfs_page_group_destroy - sync the destruction of page groups + * @req - request that no longer needs the page group + * + * releases the page group reference from each member once all + * members have called this function. + */ +static void +nfs_page_group_destroy(struct kref *kref) +{ + struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); + struct nfs_page *tmp, *next; + + if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN)) + return; + + tmp = req; + do { + next = tmp->wb_this_page; + /* unlink and free */ + tmp->wb_this_page = tmp; + tmp->wb_head = tmp; + nfs_free_request(tmp); + tmp = next; + } while (tmp != req); +} + /** * nfs_create_request - Create an NFS read/write request. * @ctx: open context to use * @page: page to write + * @last: last nfs request created for this page group or NULL if head * @offset: starting offset within the page for the write * @count: number of bytes to read/write * @@ -149,7 +292,8 @@ nfs_iocounter_wait(struct nfs_io_counter *c) */ struct nfs_page * nfs_create_request(struct nfs_open_context *ctx, struct page *page, - unsigned int offset, unsigned int count) + struct nfs_page *last, unsigned int offset, + unsigned int count) { struct nfs_page *req; struct nfs_lock_context *l_ctx; @@ -181,6 +325,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page, req->wb_bytes = count; req->wb_context = get_nfs_open_context(ctx); kref_init(&req->wb_kref); + nfs_page_group_init(req, last); return req; } @@ -238,16 +383,18 @@ static void nfs_clear_request(struct nfs_page *req) } } - /** * nfs_release_request - Release the count on an NFS read/write request * @req: request to release * * Note: Should never be called with the spinlock held! */ -static void nfs_free_request(struct kref *kref) +static void nfs_free_request(struct nfs_page *req) { - struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); + WARN_ON_ONCE(req->wb_this_page != req); + + /* extra debug: make sure no sync bits are still set */ + WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags)); /* Release struct file and open context */ nfs_clear_request(req); @@ -256,7 +403,7 @@ static void nfs_free_request(struct kref *kref) void nfs_release_request(struct nfs_page *req) { - kref_put(&req->wb_kref, nfs_free_request); + kref_put(&req->wb_kref, nfs_page_group_destroy); } static int nfs_wait_bit_uninterruptible(void *word) @@ -832,21 +979,66 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) * @desc: destination io descriptor * @req: request * + * This may split a request into subrequests which are all part of the + * same page group. + * * Returns true if the request 'req' was successfully coalesced into the * existing list of pages 'desc'. */ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { - while (!nfs_pageio_do_add_request(desc, req)) { - desc->pg_moreio = 1; - nfs_pageio_doio(desc); - if (desc->pg_error < 0) - return 0; - desc->pg_moreio = 0; - if (desc->pg_recoalesce) - return 0; - } + struct nfs_page *subreq; + unsigned int bytes_left = 0; + unsigned int offset, pgbase; + + nfs_page_group_lock(req); + + subreq = req; + bytes_left = subreq->wb_bytes; + offset = subreq->wb_offset; + pgbase = subreq->wb_pgbase; + + do { + if (!nfs_pageio_do_add_request(desc, subreq)) { + /* make sure pg_test call(s) did nothing */ + WARN_ON_ONCE(subreq->wb_bytes != bytes_left); + WARN_ON_ONCE(subreq->wb_offset != offset); + WARN_ON_ONCE(subreq->wb_pgbase != pgbase); + + nfs_page_group_unlock(req); + desc->pg_moreio = 1; + nfs_pageio_doio(desc); + if (desc->pg_error < 0) + return 0; + desc->pg_moreio = 0; + if (desc->pg_recoalesce) + return 0; + /* retry add_request for this subreq */ + nfs_page_group_lock(req); + continue; + } + + /* check for buggy pg_test call(s) */ + WARN_ON_ONCE(subreq->wb_bytes + subreq->wb_pgbase > PAGE_SIZE); + WARN_ON_ONCE(subreq->wb_bytes > bytes_left); + WARN_ON_ONCE(subreq->wb_bytes == 0); + + bytes_left -= subreq->wb_bytes; + offset += subreq->wb_bytes; + pgbase += subreq->wb_bytes; + + if (bytes_left) { + subreq = nfs_create_request(req->wb_context, + req->wb_page, + subreq, pgbase, bytes_left); + nfs_lock_request(subreq); + subreq->wb_offset = offset; + subreq->wb_index = req->wb_index; + } + } while (bytes_left > 0); + + nfs_page_group_unlock(req); return 1; } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 46d90448f69b..902ba2c63d05 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -85,7 +85,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, len = nfs_page_length(page); if (len == 0) return nfs_return_empty_page(page); - new = nfs_create_request(ctx, page, 0, len); + new = nfs_create_request(ctx, page, NULL, 0, len); if (IS_ERR(new)) { unlock_page(page); return PTR_ERR(new); @@ -311,7 +311,7 @@ readpage_async_filler(void *data, struct page *page) if (len == 0) return nfs_return_empty_page(page); - new = nfs_create_request(desc->ctx, page, 0, len); + new = nfs_create_request(desc->ctx, page, NULL, 0, len); if (IS_ERR(new)) goto out_error; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e773df207c05..d0f30f12a8b3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -367,6 +367,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(inode); + WARN_ON_ONCE(req->wb_this_page != req); + /* Lock the request! */ nfs_lock_request(req); @@ -383,6 +385,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) set_page_private(req->wb_page, (unsigned long)req); } nfsi->npages++; + set_bit(PG_INODE_REF, &req->wb_flags); kref_get(&req->wb_kref); spin_unlock(&inode->i_lock); } @@ -567,6 +570,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) { struct nfs_commit_info cinfo; unsigned long bytes = 0; + bool do_destroy; if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) goto out; @@ -596,6 +600,7 @@ remove_req: next: nfs_unlock_request(req); nfs_end_page_writeback(req->wb_page); + do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags); nfs_release_request(req); } out: @@ -700,6 +705,10 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, if (req == NULL) goto out_unlock; + /* should be handled by nfs_flush_incompatible */ + WARN_ON_ONCE(req->wb_head != req); + WARN_ON_ONCE(req->wb_this_page != req); + rqend = req->wb_offset + req->wb_bytes; /* * Tell the caller to flush out the request if @@ -761,7 +770,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, req = nfs_try_to_update_request(inode, page, offset, bytes); if (req != NULL) goto out; - req = nfs_create_request(ctx, page, offset, bytes); + req = nfs_create_request(ctx, page, NULL, offset, bytes); if (IS_ERR(req)) goto out; nfs_inode_add_request(inode, req); @@ -805,6 +814,8 @@ int nfs_flush_incompatible(struct file *file, struct page *page) return 0; l_ctx = req->wb_lock_context; do_flush = req->wb_page != page || req->wb_context != ctx; + /* for now, flush if more than 1 request in page_group */ + do_flush |= req->wb_this_page != req; if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) { do_flush |= l_ctx->lockowner.l_owner != current->files || l_ctx->lockowner.l_pid != current->tgid; diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 13d59af561f6..986c0c279d0e 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -26,6 +26,9 @@ enum { PG_MAPPED, /* page private set for buffered io */ PG_CLEAN, /* write succeeded */ PG_COMMIT_TO_DS, /* used by pnfs layouts */ + PG_INODE_REF, /* extra ref held by inode (head req only) */ + PG_HEADLOCK, /* page group lock of wb_head */ + PG_TEARDOWN, /* page group sync for destroy */ }; struct nfs_inode; @@ -41,6 +44,8 @@ struct nfs_page { struct kref wb_kref; /* reference count */ unsigned long wb_flags; struct nfs_write_verifier wb_verf; /* Commit cookie */ + struct nfs_page *wb_this_page; /* list of reqs for this page */ + struct nfs_page *wb_head; /* head pointer for req list */ }; struct nfs_pageio_descriptor; @@ -87,9 +92,10 @@ struct nfs_pageio_descriptor { extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, struct page *page, + struct nfs_page *last, unsigned int offset, unsigned int count); -extern void nfs_release_request(struct nfs_page *req); +extern void nfs_release_request(struct nfs_page *); extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, @@ -108,7 +114,10 @@ extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *req); extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); -extern void nfs_unlock_and_release_request(struct nfs_page *req); +extern void nfs_unlock_and_release_request(struct nfs_page *); +extern void nfs_page_group_lock(struct nfs_page *); +extern void nfs_page_group_unlock(struct nfs_page *); +extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); /* * Lock the page of an asynchronous request -- cgit v1.2.3-71-gd317 From 67d0338edd71db9a4f406d8778f7c525d31e9f7f Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:46 -0400 Subject: nfs: page group syncing in read path Operations that modify state for a whole page must be syncronized across all requests within a page group. In the read path, this is calling unlock_page and SetPageUptodate. Both of these functions should not be called until all requests in a page group have reached the point where they would call them. This patch should have no effect yet since all page groups currently have one request, but will come into play when pg_test functions are modified to split pages into sub-page regions. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 2 ++ fs/nfs/read.c | 22 +++++++++++++++++----- include/linux/nfs_page.h | 2 ++ 3 files changed, 21 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 015fb7b48dfe..18ee4e99347e 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -395,6 +395,8 @@ static void nfs_free_request(struct nfs_page *req) /* extra debug: make sure no sync bits are still set */ WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags)); + WARN_ON_ONCE(test_bit(PG_UNLOCKPAGE, &req->wb_flags)); + WARN_ON_ONCE(test_bit(PG_UPTODATE, &req->wb_flags)); /* Release struct file and open context */ nfs_clear_request(req); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 902ba2c63d05..53d5b83611ce 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -105,10 +105,16 @@ static void nfs_readpage_release(struct nfs_page *req) { struct inode *d_inode = req->wb_context->dentry->d_inode; - if (PageUptodate(req->wb_page)) - nfs_readpage_to_fscache(d_inode, req->wb_page, 0); + dprintk("NFS: read done (%s/%llu %d@%lld)\n", d_inode->i_sb->s_id, + (unsigned long long)NFS_FILEID(d_inode), req->wb_bytes, + (long long)req_offset(req)); - unlock_page(req->wb_page); + if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) { + if (PageUptodate(req->wb_page)) + nfs_readpage_to_fscache(d_inode, req->wb_page, 0); + + unlock_page(req->wb_page); + } dprintk("NFS: read done (%s/%Lu %d@%Ld)\n", req->wb_context->dentry->d_inode->i_sb->s_id, @@ -118,6 +124,12 @@ static void nfs_readpage_release(struct nfs_page *req) nfs_release_request(req); } +static void nfs_page_group_set_uptodate(struct nfs_page *req) +{ + if (nfs_page_group_sync_on_bit(req, PG_UPTODATE)) + SetPageUptodate(req->wb_page); +} + /* Note io was page aligned */ static void nfs_read_completion(struct nfs_pgio_header *hdr) { @@ -140,9 +152,9 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr) bytes += req->wb_bytes; if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { if (bytes <= hdr->good_bytes) - SetPageUptodate(page); + nfs_page_group_set_uptodate(req); } else - SetPageUptodate(page); + nfs_page_group_set_uptodate(req); nfs_list_remove_request(req); nfs_readpage_release(req); } diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 986c0c279d0e..6385175a127b 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -29,6 +29,8 @@ enum { PG_INODE_REF, /* extra ref held by inode (head req only) */ PG_HEADLOCK, /* page group lock of wb_head */ PG_TEARDOWN, /* page group sync for destroy */ + PG_UNLOCKPAGE, /* page group sync bit in read path */ + PG_UPTODATE, /* page group sync bit in read path */ }; struct nfs_inode; -- cgit v1.2.3-71-gd317 From 20633f042fd0907300069714b98aaf607a8b5bf8 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:47 -0400 Subject: nfs: page group syncing in write path Operations that modify state for a whole page must be syncronized across all requests within a page group. In the write path, this is calling end_page_writeback and removing the head request from an inode. Both of these operations should not be called until all requests in a page group have reached the point where they would call them. This patch should have no effect yet since all page groups currently have one request, but will come into play when pg_test functions are modified to split pages into sub-page regions. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 2 ++ fs/nfs/write.c | 32 ++++++++++++++++++++------------ include/linux/nfs_page.h | 2 ++ 3 files changed, 24 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 18ee4e99347e..ceb4424614aa 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -397,6 +397,8 @@ static void nfs_free_request(struct nfs_page *req) WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags)); WARN_ON_ONCE(test_bit(PG_UNLOCKPAGE, &req->wb_flags)); WARN_ON_ONCE(test_bit(PG_UPTODATE, &req->wb_flags)); + WARN_ON_ONCE(test_bit(PG_WB_END, &req->wb_flags)); + WARN_ON_ONCE(test_bit(PG_REMOVE, &req->wb_flags)); /* Release struct file and open context */ nfs_clear_request(req); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d0f30f12a8b3..5d752766139d 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -201,12 +201,15 @@ static void nfs_set_page_writeback(struct page *page) } } -static void nfs_end_page_writeback(struct page *page) +static void nfs_end_page_writeback(struct nfs_page *req) { - struct inode *inode = page_file_mapping(page)->host; + struct inode *inode = page_file_mapping(req->wb_page)->host; struct nfs_server *nfss = NFS_SERVER(inode); - end_page_writeback(page); + if (!nfs_page_group_sync_on_bit(req, PG_WB_END)) + return; + + end_page_writeback(req->wb_page); if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); } @@ -397,15 +400,20 @@ static void nfs_inode_remove_request(struct nfs_page *req) { struct inode *inode = req->wb_context->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_page *head; - spin_lock(&inode->i_lock); - if (likely(!PageSwapCache(req->wb_page))) { - set_page_private(req->wb_page, 0); - ClearPagePrivate(req->wb_page); - clear_bit(PG_MAPPED, &req->wb_flags); + if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { + head = req->wb_head; + + spin_lock(&inode->i_lock); + if (likely(!PageSwapCache(head->wb_page))) { + set_page_private(head->wb_page, 0); + ClearPagePrivate(head->wb_page); + clear_bit(PG_MAPPED, &head->wb_flags); + } + nfsi->npages--; + spin_unlock(&inode->i_lock); } - nfsi->npages--; - spin_unlock(&inode->i_lock); nfs_release_request(req); } @@ -599,7 +607,7 @@ remove_req: nfs_inode_remove_request(req); next: nfs_unlock_request(req); - nfs_end_page_writeback(req->wb_page); + nfs_end_page_writeback(req); do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags); nfs_release_request(req); } @@ -964,7 +972,7 @@ static void nfs_redirty_request(struct nfs_page *req) { nfs_mark_request_dirty(req); nfs_unlock_request(req); - nfs_end_page_writeback(req->wb_page); + nfs_end_page_writeback(req); nfs_release_request(req); } diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 6385175a127b..7d9096d95d4a 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -31,6 +31,8 @@ enum { PG_TEARDOWN, /* page group sync for destroy */ PG_UNLOCKPAGE, /* page group sync bit in read path */ PG_UPTODATE, /* page group sync bit in read path */ + PG_WB_END, /* page group sync bit in write path */ + PG_REMOVE, /* page group sync bit in write path */ }; struct nfs_inode; -- cgit v1.2.3-71-gd317 From 7f714720fac03383d687dbe39494cc96b845bd46 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:53 -0400 Subject: nfs: remove data list from pgio header Since the ability to split pages into subpage requests has been added, nfs_pgio_header->rpc_list only ever has one pgio data. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 39 ++++++--------------------------------- fs/nfs/pnfs.c | 41 +++++++++++++++-------------------------- include/linux/nfs_xdr.h | 5 +++-- 3 files changed, 24 insertions(+), 61 deletions(-) (limited to 'include') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ec4311df05d9..fab78d13ee14 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -470,7 +470,6 @@ struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops) struct nfs_pgio_header *hdr = &header->header; INIT_LIST_HEAD(&hdr->pages); - INIT_LIST_HEAD(&hdr->rpc_list); spin_lock_init(&hdr->lock); atomic_set(&hdr->refcnt, 0); hdr->rw_ops = ops; @@ -648,27 +647,6 @@ out: } EXPORT_SYMBOL_GPL(nfs_initiate_pgio); -static int nfs_do_multiple_pgios(struct list_head *head, - const struct rpc_call_ops *call_ops, - int how) -{ - struct nfs_pgio_data *data; - int ret = 0; - - while (!list_empty(head)) { - int ret2; - - data = list_first_entry(head, struct nfs_pgio_data, list); - list_del_init(&data->list); - - ret2 = nfs_initiate_pgio(NFS_CLIENT(data->header->inode), - data, call_ops, how, 0); - if (ret == 0) - ret = ret2; - } - return ret; -} - /** * nfs_pgio_error - Clean up from a pageio error * @desc: IO descriptor @@ -677,14 +655,9 @@ static int nfs_do_multiple_pgios(struct list_head *head, static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { - struct nfs_pgio_data *data; - set_bit(NFS_IOHDR_REDO, &hdr->flags); - while (!list_empty(&hdr->rpc_list)) { - data = list_first_entry(&hdr->rpc_list, struct nfs_pgio_data, list); - list_del(&data->list); - nfs_pgio_data_release(data); - } + nfs_pgio_data_release(hdr->data); + hdr->data = NULL; desc->pg_completion_ops->error_cleanup(&desc->pg_list); return -ENOMEM; } @@ -794,7 +767,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, /* Set up the argument struct */ nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); - list_add(&data->list, &hdr->rpc_list); + hdr->data = data; desc->pg_rpc_callops = &nfs_pgio_common_ops; return 0; } @@ -816,9 +789,9 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) atomic_inc(&hdr->refcnt); ret = nfs_generic_pgio(desc, hdr); if (ret == 0) - ret = nfs_do_multiple_pgios(&hdr->rpc_list, - desc->pg_rpc_callops, - desc->pg_ioflags); + ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), + hdr->data, desc->pg_rpc_callops, + desc->pg_ioflags, 0); if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); return ret; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 354c53cd4095..6ef108b1d85f 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1573,23 +1573,18 @@ pnfs_try_to_write_data(struct nfs_pgio_data *wdata, } static void -pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how) +pnfs_do_write(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr, int how) { - struct nfs_pgio_data *data; + struct nfs_pgio_data *data = hdr->data; const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; struct pnfs_layout_segment *lseg = desc->pg_lseg; + enum pnfs_try_status trypnfs; desc->pg_lseg = NULL; - while (!list_empty(head)) { - enum pnfs_try_status trypnfs; - - data = list_first_entry(head, struct nfs_pgio_data, list); - list_del_init(&data->list); - - trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); - if (trypnfs == PNFS_NOT_ATTEMPTED) - pnfs_write_through_mds(desc, data); - } + trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); + if (trypnfs == PNFS_NOT_ATTEMPTED) + pnfs_write_through_mds(desc, data); pnfs_put_lseg(lseg); } @@ -1623,7 +1618,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; } else - pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags); + pnfs_do_write(desc, hdr, desc->pg_ioflags); if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); return ret; @@ -1731,23 +1726,17 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata, } static void -pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head) +pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { - struct nfs_pgio_data *data; + struct nfs_pgio_data *data = hdr->data; const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; struct pnfs_layout_segment *lseg = desc->pg_lseg; + enum pnfs_try_status trypnfs; desc->pg_lseg = NULL; - while (!list_empty(head)) { - enum pnfs_try_status trypnfs; - - data = list_first_entry(head, struct nfs_pgio_data, list); - list_del_init(&data->list); - - trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); - if (trypnfs == PNFS_NOT_ATTEMPTED) - pnfs_read_through_mds(desc, data); - } + trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); + if (trypnfs == PNFS_NOT_ATTEMPTED) + pnfs_read_through_mds(desc, data); pnfs_put_lseg(lseg); } @@ -1782,7 +1771,7 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; } else - pnfs_do_multiple_reads(desc, &hdr->rpc_list); + pnfs_do_read(desc, hdr); if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); return ret; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index adef7bd2d06d..ae636013fb1f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1256,11 +1256,13 @@ enum { NFS_IOHDR_NEED_RESCHED, }; +struct nfs_pgio_data; + struct nfs_pgio_header { struct inode *inode; struct rpc_cred *cred; struct list_head pages; - struct list_head rpc_list; + struct nfs_pgio_data *data; atomic_t refcnt; struct nfs_page *req; struct nfs_writeverf verf; /* Used for writes */ @@ -1282,7 +1284,6 @@ struct nfs_pgio_header { struct nfs_pgio_data { struct nfs_pgio_header *header; - struct list_head list; struct rpc_task task; struct nfs_fattr fattr; struct nfs_writeverf verf; /* Used for writes */ -- cgit v1.2.3-71-gd317 From 5002c58639d41b93e800c8a4b7eca49c40d57822 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:54 -0400 Subject: pnfs: support multiple verfs per direct req Support direct requests that span multiple pnfs data servers by comparing nfs_pgio_header->verf to a cached verf in pnfs_commit_bucket. Continue to use dreq->verf if the MDS is used / non-pNFS. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 102 +++++++++++++++++++++++++++++++++++++++++++++--- fs/nfs/nfs4filelayout.c | 6 +++ include/linux/nfs.h | 5 ++- include/linux/nfs_xdr.h | 2 + 4 files changed, 109 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 2c0e08f4cf71..4ad7bc388679 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -108,6 +108,97 @@ static inline int put_dreq(struct nfs_direct_req *dreq) return atomic_dec_and_test(&dreq->io_count); } +/* + * nfs_direct_select_verf - select the right verifier + * @dreq - direct request possibly spanning multiple servers + * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs + * @ds_idx - index of data server in data server list, only valid if ds_clp set + * + * returns the correct verifier to use given the role of the server + */ +static struct nfs_writeverf * +nfs_direct_select_verf(struct nfs_direct_req *dreq, + struct nfs_client *ds_clp, + int ds_idx) +{ + struct nfs_writeverf *verfp = &dreq->verf; + +#ifdef CONFIG_NFS_V4_1 + if (ds_clp) { + /* pNFS is in use, use the DS verf */ + if (ds_idx >= 0 && ds_idx < dreq->ds_cinfo.nbuckets) + verfp = &dreq->ds_cinfo.buckets[ds_idx].direct_verf; + else + WARN_ON_ONCE(1); + } +#endif + return verfp; +} + + +/* + * nfs_direct_set_hdr_verf - set the write/commit verifier + * @dreq - direct request possibly spanning multiple servers + * @hdr - pageio header to validate against previously seen verfs + * + * Set the server's (MDS or DS) "seen" verifier + */ +static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq, + struct nfs_pgio_header *hdr) +{ + struct nfs_writeverf *verfp; + + verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, + hdr->data->ds_idx); + WARN_ON_ONCE(verfp->committed >= 0); + memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); + WARN_ON_ONCE(verfp->committed < 0); +} + +/* + * nfs_direct_cmp_hdr_verf - compare verifier for pgio header + * @dreq - direct request possibly spanning multiple servers + * @hdr - pageio header to validate against previously seen verf + * + * set the server's "seen" verf if not initialized. + * returns result of comparison between @hdr->verf and the "seen" + * verf of the server used by @hdr (DS or MDS) + */ +static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq, + struct nfs_pgio_header *hdr) +{ + struct nfs_writeverf *verfp; + + verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, + hdr->data->ds_idx); + if (verfp->committed < 0) { + nfs_direct_set_hdr_verf(dreq, hdr); + return 0; + } + return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); +} + +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) +/* + * nfs_direct_cmp_commit_data_verf - compare verifier for commit data + * @dreq - direct request possibly spanning multiple servers + * @data - commit data to validate against previously seen verf + * + * returns result of comparison between @data->verf and the verf of + * the server used by @data (DS or MDS) + */ +static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, + struct nfs_commit_data *data) +{ + struct nfs_writeverf *verfp; + + verfp = nfs_direct_select_verf(dreq, data->ds_clp, + data->ds_commit_index); + WARN_ON_ONCE(verfp->committed < 0); + return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf)); +} +#endif + /** * nfs_direct_IO - NFS address space operation for direct I/O * @rw: direction (read or write) @@ -168,6 +259,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) kref_get(&dreq->kref); init_completion(&dreq->completion); INIT_LIST_HEAD(&dreq->mds_cinfo.list); + dreq->verf.committed = NFS_INVALID_STABLE_HOW; /* not set yet */ INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); spin_lock_init(&dreq->lock); @@ -602,7 +694,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) dprintk("NFS: %5u commit failed with error %d.\n", data->task.tk_pid, status); dreq->flags = NFS_ODIRECT_RESCHED_WRITES; - } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { + } else if (nfs_direct_cmp_commit_data_verf(dreq, data)) { dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid); dreq->flags = NFS_ODIRECT_RESCHED_WRITES; } @@ -811,13 +903,13 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) bit = NFS_IOHDR_NEED_RESCHED; else if (dreq->flags == 0) { - memcpy(&dreq->verf, &hdr->verf, - sizeof(dreq->verf)); + nfs_direct_set_hdr_verf(dreq, hdr); bit = NFS_IOHDR_NEED_COMMIT; dreq->flags = NFS_ODIRECT_DO_COMMIT; } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { - if (memcmp(&dreq->verf, &hdr->verf, sizeof(dreq->verf))) { - dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) { + dreq->flags = + NFS_ODIRECT_RESCHED_WRITES; bit = NFS_IOHDR_NEED_RESCHED; } else bit = NFS_IOHDR_NEED_COMMIT; diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 7a665e0f35b7..0ebc521ea6fc 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -560,6 +560,7 @@ filelayout_read_pagelist(struct nfs_pgio_data *data) /* No multipath support. Use first DS */ atomic_inc(&ds->ds_clp->cl_count); data->ds_clp = ds->ds_clp; + data->ds_idx = idx; fh = nfs4_fl_select_ds_fh(lseg, j); if (fh) data->args.fh = fh; @@ -603,6 +604,7 @@ filelayout_write_pagelist(struct nfs_pgio_data *data, int sync) data->pgio_done_cb = filelayout_write_done_cb; atomic_inc(&ds->ds_clp->cl_count); data->ds_clp = ds->ds_clp; + data->ds_idx = idx; fh = nfs4_fl_select_ds_fh(lseg, j); if (fh) data->args.fh = fh; @@ -875,6 +877,8 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, for (i = 0; i < size; i++) { INIT_LIST_HEAD(&buckets[i].written); INIT_LIST_HEAD(&buckets[i].committing); + /* mark direct verifier as unset */ + buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW; } spin_lock(cinfo->lock); @@ -885,6 +889,8 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, &buckets[i].written); list_splice(&cinfo->ds->buckets[i].committing, &buckets[i].committing); + buckets[i].direct_verf.committed = + cinfo->ds->buckets[i].direct_verf.committed; buckets[i].wlseg = cinfo->ds->buckets[i].wlseg; buckets[i].clseg = cinfo->ds->buckets[i].clseg; } diff --git a/include/linux/nfs.h b/include/linux/nfs.h index 3e794c12e90a..610af5155ef2 100644 --- a/include/linux/nfs.h +++ b/include/linux/nfs.h @@ -46,6 +46,9 @@ static inline void nfs_copy_fh(struct nfs_fh *target, const struct nfs_fh *sourc enum nfs3_stable_how { NFS_UNSTABLE = 0, NFS_DATA_SYNC = 1, - NFS_FILE_SYNC = 2 + NFS_FILE_SYNC = 2, + + /* used by direct.c to mark verf as invalid */ + NFS_INVALID_STABLE_HOW = -1 }; #endif /* _LINUX_NFS_H */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index ae636013fb1f..9a1396e70310 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1112,6 +1112,7 @@ struct pnfs_commit_bucket { struct list_head committing; struct pnfs_layout_segment *wlseg; struct pnfs_layout_segment *clseg; + struct nfs_writeverf direct_verf; }; struct pnfs_ds_commit_info { @@ -1294,6 +1295,7 @@ struct nfs_pgio_data { __u64 mds_offset; /* Filelayout dense stripe */ struct nfs_page_array pages; struct nfs_client *ds_clp; /* pNFS data server */ + int ds_idx; /* ds index if ds_clp is set */ }; struct nfs_rw_header { -- cgit v1.2.3-71-gd317 From 86f6cf41272de9d6ffa05ab46028b15d160a6f3e Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Sat, 24 May 2014 09:34:26 +0200 Subject: net: of_mdio: add of_mdiobus_link_phydev() Add a function to walk the list of subnodes of a mdio bus and look for a node that matches the phy's address with its 'reg' property. If found, set the of_node pointer for the phy. This allows auto-probed pyh devices to be augmented by information passed in via DT. Signed-off-by: Daniel Mack Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 6 ++++++ drivers/of/of_mdio.c | 33 +++++++++++++++++++++++++++++++++ include/linux/of_mdio.h | 8 ++++++++ 3 files changed, 47 insertions(+) (limited to 'include') diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index a6284964b711..2e58aa54484c 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -300,6 +300,12 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr) if (IS_ERR(phydev) || phydev == NULL) return phydev; + /* + * For DT, see if the auto-probed phy has a correspoding child + * in the bus node, and set the of_node pointer in this case. + */ + of_mdiobus_link_phydev(bus, phydev); + err = phy_device_register(phydev); if (err) { phy_device_free(phydev); diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 731d3d9052d7..7c8c142e4eb8 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -183,6 +183,39 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) } EXPORT_SYMBOL(of_mdiobus_register); +/** + * of_mdiobus_link_phydev - Find a device node for a phy + * @mdio: pointer to mii_bus structure + * @phydev: phydev for which the of_node pointer should be set + * + * Walk the list of subnodes of a mdio bus and look for a node that matches the + * phy's address with its 'reg' property. If found, set the of_node pointer for + * the phy. This allows auto-probed pyh devices to be supplied with information + * passed in via DT. + */ +void of_mdiobus_link_phydev(struct mii_bus *mdio, + struct phy_device *phydev) +{ + struct device *dev = &phydev->dev; + struct device_node *child; + + if (dev->of_node || !mdio->dev.of_node) + return; + + for_each_available_child_of_node(mdio->dev.of_node, child) { + int addr; + + addr = of_mdio_parse_addr(&mdio->dev, child); + if (addr < 0) + continue; + + if (addr == phydev->addr) { + dev->of_node = child; + return; + } + } +} + /* Helper function for of_phy_find_device */ static int of_phy_match(struct device *dev, void *phy_np) { diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index d449018d0726..a70c9493d55a 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -25,6 +25,9 @@ struct phy_device *of_phy_attach(struct net_device *dev, extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np); +extern void of_mdiobus_link_phydev(struct mii_bus *mdio, + struct phy_device *phydev); + #else /* CONFIG_OF */ static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) { @@ -60,6 +63,11 @@ static inline struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np) { return NULL; } + +static inline void of_mdiobus_link_phydev(struct mii_bus *mdio, + struct phy_device *phydev) +{ +} #endif /* CONFIG_OF */ #if defined(CONFIG_OF) && defined(CONFIG_FIXED_PHY) -- cgit v1.2.3-71-gd317 From 2abdd3137e78adca69b0722307aa2ef89f2cf3b6 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 14 May 2014 16:58:19 +0300 Subject: drm: store connector name in connector struct (v2) This makes drm_get_connector_name() thread safe. [airlied: fix to build.] Reference: http://lkml.kernel.org/r/645ee6e22cad47d38a2b35c21c8d5fe3@DC1-MBX-01.ptsecurity.ru Signed-off-by: Jani Nikula Reviewed-by: David Herrmann Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc.c | 36 ++++++++++++++++++++---------------- include/drm/drm_crtc.h | 2 ++ 2 files changed, 22 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 34f0bf18d80d..293eb4c24f35 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -277,21 +277,11 @@ EXPORT_SYMBOL(drm_get_encoder_name); /** * drm_get_connector_name - return a string for connector - * @connector: connector to compute name of - * - * Note that the buffer used by this function is globally shared and owned by - * the function itself. - * - * FIXME: This isn't really multithreading safe. + * @connector: the connector to get name for */ const char *drm_get_connector_name(const struct drm_connector *connector) { - static char buf[32]; - - snprintf(buf, 32, "%s-%d", - drm_connector_enum_list[connector->connector_type].name, - connector->connector_type_id); - return buf; + return connector->name; } EXPORT_SYMBOL(drm_get_connector_name); @@ -824,7 +814,7 @@ int drm_connector_init(struct drm_device *dev, ret = drm_mode_object_get(dev, &connector->base, DRM_MODE_OBJECT_CONNECTOR); if (ret) - goto out; + goto out_unlock; connector->base.properties = &connector->properties; connector->dev = dev; @@ -834,9 +824,17 @@ int drm_connector_init(struct drm_device *dev, ida_simple_get(connector_ida, 1, 0, GFP_KERNEL); if (connector->connector_type_id < 0) { ret = connector->connector_type_id; - drm_mode_object_put(dev, &connector->base); - goto out; + goto out_put; } + connector->name = + kasprintf(GFP_KERNEL, "%s-%d", + drm_connector_enum_list[connector_type].name, + connector->connector_type_id); + if (!connector->name) { + ret = -ENOMEM; + goto out_put; + } + INIT_LIST_HEAD(&connector->probed_modes); INIT_LIST_HEAD(&connector->modes); connector->edid_blob_ptr = NULL; @@ -853,7 +851,11 @@ int drm_connector_init(struct drm_device *dev, drm_object_attach_property(&connector->base, dev->mode_config.dpms_property, 0); - out: +out_put: + if (ret) + drm_mode_object_put(dev, &connector->base); + +out_unlock: drm_modeset_unlock_all(dev); return ret; @@ -881,6 +883,8 @@ void drm_connector_cleanup(struct drm_connector *connector) connector->connector_type_id); drm_mode_object_put(dev, &connector->base); + kfree(connector->name); + connector->name = NULL; list_del(&connector->head); dev->mode_config.num_connector--; } diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 698d54e27f39..b5c97d5b2654 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -444,6 +444,7 @@ struct drm_encoder { * @attr: sysfs attributes * @head: list management * @base: base KMS object + * @name: connector name * @connector_type: one of the %DRM_MODE_CONNECTOR_ types from drm_mode.h * @connector_type_id: index into connector type enum * @interlace_allowed: can this connector handle interlaced modes? @@ -482,6 +483,7 @@ struct drm_connector { struct drm_mode_object base; + char *name; int connector_type; int connector_type_id; bool interlace_allowed; -- cgit v1.2.3-71-gd317 From e5748946e907f767e7bb73e5ed31584981f0ff65 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 14 May 2014 16:58:20 +0300 Subject: drm: store encoder name in encoder struct This makes drm_get_encoder_name() thread safe. Reference: http://lkml.kernel.org/r/645ee6e22cad47d38a2b35c21c8d5fe3@DC1-MBX-01\ .ptsecurity.ru Signed-off-by: Jani Nikula Reviewed-by: David Herrmann Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc.c | 31 +++++++++++++++++-------------- include/drm/drm_crtc.h | 2 ++ 2 files changed, 19 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 293eb4c24f35..37a3e0791ddf 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -257,21 +257,11 @@ void drm_connector_ida_destroy(void) /** * drm_get_encoder_name - return a string for encoder - * @encoder: encoder to compute name of - * - * Note that the buffer used by this function is globally shared and owned by - * the function itself. - * - * FIXME: This isn't really multithreading safe. + * @encoder: the encoder to get name for */ const char *drm_get_encoder_name(const struct drm_encoder *encoder) { - static char buf[32]; - - snprintf(buf, 32, "%s-%d", - drm_encoder_enum_list[encoder->encoder_type].name, - encoder->base.id); - return buf; + return encoder->name; } EXPORT_SYMBOL(drm_get_encoder_name); @@ -986,16 +976,27 @@ int drm_encoder_init(struct drm_device *dev, ret = drm_mode_object_get(dev, &encoder->base, DRM_MODE_OBJECT_ENCODER); if (ret) - goto out; + goto out_unlock; encoder->dev = dev; encoder->encoder_type = encoder_type; encoder->funcs = funcs; + encoder->name = kasprintf(GFP_KERNEL, "%s-%d", + drm_encoder_enum_list[encoder_type].name, + encoder->base.id); + if (!encoder->name) { + ret = -ENOMEM; + goto out_put; + } list_add_tail(&encoder->head, &dev->mode_config.encoder_list); dev->mode_config.num_encoder++; - out: +out_put: + if (ret) + drm_mode_object_put(dev, &encoder->base); + +out_unlock: drm_modeset_unlock_all(dev); return ret; @@ -1013,6 +1014,8 @@ void drm_encoder_cleanup(struct drm_encoder *encoder) struct drm_device *dev = encoder->dev; drm_modeset_lock_all(dev); drm_mode_object_put(dev, &encoder->base); + kfree(encoder->name); + encoder->name = NULL; list_del(&encoder->head); dev->mode_config.num_encoder--; drm_modeset_unlock_all(dev); diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index b5c97d5b2654..5c1c31cc11cd 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -400,6 +400,7 @@ struct drm_encoder_funcs { * @dev: parent DRM device * @head: list management * @base: base KMS object + * @name: encoder name * @encoder_type: one of the %DRM_MODE_ENCODER_ types in drm_mode.h * @possible_crtcs: bitmask of potential CRTC bindings * @possible_clones: bitmask of potential sibling encoders for cloning @@ -416,6 +417,7 @@ struct drm_encoder { struct list_head head; struct drm_mode_object base; + char *name; int encoder_type; uint32_t possible_crtcs; uint32_t possible_clones; -- cgit v1.2.3-71-gd317 From 182407a6ed5333fc37dd980a8de91a8f826a94f6 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 2 May 2014 11:09:54 +1000 Subject: drm: add DP MST encoder type This adds an encoder type for DP MST encoders. Reviewed-by: Todd Previte Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc.c | 1 + include/uapi/drm/drm_mode.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 37a3e0791ddf..edee61bccd14 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -227,6 +227,7 @@ static const struct drm_prop_enum_list drm_encoder_enum_list[] = { DRM_MODE_ENCODER_TVDAC, "TV" }, { DRM_MODE_ENCODER_VIRTUAL, "Virtual" }, { DRM_MODE_ENCODER_DSI, "DSI" }, + { DRM_MODE_ENCODER_DPMST, "DP MST" }, }; static const struct drm_prop_enum_list drm_subpixel_enum_list[] = diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index f104c2603ebe..719add464f95 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -181,6 +181,7 @@ struct drm_mode_get_plane_res { #define DRM_MODE_ENCODER_TVDAC 4 #define DRM_MODE_ENCODER_VIRTUAL 5 #define DRM_MODE_ENCODER_DSI 6 +#define DRM_MODE_ENCODER_DPMST 7 struct drm_mode_get_encoder { __u32 encoder_id; -- cgit v1.2.3-71-gd317 From 97982f5a91e91dab26dd0246083b9adf3ba8b2e3 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 29 May 2014 16:31:02 +0300 Subject: IB/mlx4: Preparation for VFs to issue/receive SMI (QP0) requests/responses Currently, VFs in SRIOV VFs are denied QP0 access. The main reason for this decision is security, since Subnet Management Datagrams (SMPs) are not restricted by network partitioning and may affect the physical network topology. Moreover, even the SM may be denied access from portions of the network by setting management keys unknown to the SM. However, it is desirable to grant SMI access to certain privileged VFs, so that certain network management activities may be conducted within virtual machines instead of the hypervisor. This commit does the following: 1. Create QP0 tunnel QPs for all VFs. 2. Discard SMI mads sent-from/received-for non-privileged VFs in the hypervisor MAD multiplex/demultiplex logic. SMI mads from/for privileged VFs are allowed to pass. 3. MAD_IFC wrapper changes/fixes. For non-privileged VFs, only host-view MAD_IFC commands are allowed, and only for SMI LID-Routed GET mads. For privileged VFs, there are no restrictions. This commit does not allow privileged VFs as yet. To determine if a VF is privileged, it calls function mlx4_vf_smi_enabled(). This function returns 0 unconditionally for now. The next two commits allow defining and activating privileged VFs. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mad.c | 40 ++++++++++++++++----------- drivers/infiniband/hw/mlx4/qp.c | 16 +++++------ drivers/net/ethernet/mellanox/mlx4/cmd.c | 47 ++++++++++++++++++++++---------- include/linux/mlx4/device.h | 1 + 4 files changed, 66 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index fd36ec672632..287ad0564acd 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -478,10 +478,6 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE) return -EAGAIN; - /* QP0 forwarding only for Dom0 */ - if (!dest_qpt && (mlx4_master_func_num(dev->dev) != slave)) - return -EINVAL; - if (!dest_qpt) tun_qp = &tun_ctx->qp[0]; else @@ -667,6 +663,21 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, } /* Class-specific handling */ switch (mad->mad_hdr.mgmt_class) { + case IB_MGMT_CLASS_SUBN_LID_ROUTED: + case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: + /* 255 indicates the dom0 */ + if (slave != 255 && slave != mlx4_master_func_num(dev->dev)) { + if (!mlx4_vf_smi_enabled(dev->dev, slave, port)) + return -EPERM; + /* for a VF. drop unsolicited MADs */ + if (!(mad->mad_hdr.method & IB_MGMT_METHOD_RESP)) { + mlx4_ib_warn(ibdev, "demux QP0. rejecting unsolicited mad for slave %d class 0x%x, method 0x%x\n", + slave, mad->mad_hdr.mgmt_class, + mad->mad_hdr.method); + return -EINVAL; + } + } + break; case IB_MGMT_CLASS_SUBN_ADM: if (mlx4_ib_demux_sa_handler(ibdev, port, slave, (struct ib_sa_mad *) mad)) @@ -1165,10 +1176,6 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE) return -EAGAIN; - /* QP0 forwarding only for Dom0 */ - if (dest_qpt == IB_QPT_SMI && (mlx4_master_func_num(dev->dev) != slave)) - return -EINVAL; - if (dest_qpt == IB_QPT_SMI) { src_qpnum = 0; sqp = &sqp_ctx->qp[0]; @@ -1285,11 +1292,6 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc "belongs to another slave\n", wc->src_qp); return; } - if (slave != mlx4_master_func_num(dev->dev) && !(wc->src_qp & 0x2)) { - mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: " - "non-master trying to send QP0 packets\n", wc->src_qp); - return; - } /* Map transaction ID */ ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map, @@ -1317,6 +1319,12 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc /* Class-specific handling */ switch (tunnel->mad.mad_hdr.mgmt_class) { + case IB_MGMT_CLASS_SUBN_LID_ROUTED: + case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: + if (slave != mlx4_master_func_num(dev->dev) && + !mlx4_vf_smi_enabled(dev->dev, slave, ctx->port)) + return; + break; case IB_MGMT_CLASS_SUBN_ADM: if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave, (struct ib_sa_mad *) &tunnel->mad)) @@ -1749,9 +1757,9 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, return -EEXIST; ctx->state = DEMUX_PV_STATE_STARTING; - /* have QP0 only on port owner, and only if link layer is IB */ - if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) && - rdma_port_get_link_layer(ibdev, ctx->port) == IB_LINK_LAYER_INFINIBAND) + /* have QP0 only if link layer is IB */ + if (rdma_port_get_link_layer(ibdev, ctx->port) == + IB_LINK_LAYER_INFINIBAND) ctx->has_smi = 1; if (ctx->has_smi) { diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 41308af4163c..2e8c58806e2f 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2370,7 +2370,8 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, struct mlx4_wqe_datagram_seg *dseg, - struct ib_send_wr *wr, enum ib_qp_type qpt) + struct ib_send_wr *wr, + enum mlx4_ib_qp_type qpt) { union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av; struct mlx4_av sqp_av = {0}; @@ -2383,8 +2384,10 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, cpu_to_be32(0xf0000000); memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av)); - /* This function used only for sending on QP1 proxies */ - dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]); + if (qpt == MLX4_IB_QPT_PROXY_GSI) + dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]); + else + dseg->dqpn = cpu_to_be32(dev->dev->caps.qp0_tunnel[port - 1]); /* Use QKEY from the QP context, which is set by master */ dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY); } @@ -2700,16 +2703,13 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, size += seglen / 16; break; case MLX4_IB_QPT_PROXY_SMI: - /* don't allow QP0 sends on guests */ - err = -ENOSYS; - *bad_wr = wr; - goto out; case MLX4_IB_QPT_PROXY_GSI: /* If we are tunneling special qps, this is a UD qp. * In this case we first add a UD segment targeting * the tunnel qp, and then add a header with address * information */ - set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, ibqp->qp_type); + set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, + qp->mlx4_ib_qp_type); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; build_tunnel_header(wr, wqe, &seglen); diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 78099eab7673..b0e48d426fce 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -705,20 +705,28 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, struct ib_smp *smp = inbox->buf; u32 index; u8 port; + u8 opcode_modifier; u16 *table; int err; int vidx, pidx; + int network_view; struct mlx4_priv *priv = mlx4_priv(dev); struct ib_smp *outsmp = outbox->buf; __be16 *outtab = (__be16 *)(outsmp->data); __be32 slave_cap_mask; __be64 slave_node_guid; + port = vhcr->in_modifier; + /* network-view bit is for driver use only, and should not be passed to FW */ + opcode_modifier = vhcr->op_modifier & ~0x8; /* clear netw view bit */ + network_view = !!(vhcr->op_modifier & 0x8); + if (smp->base_version == 1 && smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED && smp->class_version == 1) { - if (smp->method == IB_MGMT_METHOD_GET) { + /* host view is paravirtualized */ + if (!network_view && smp->method == IB_MGMT_METHOD_GET) { if (smp->attr_id == IB_SMP_ATTR_PKEY_TABLE) { index = be32_to_cpu(smp->attr_mod); if (port < 1 || port > dev->caps.num_ports) @@ -743,7 +751,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, /*get the slave specific caps:*/ /*do the command */ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, - vhcr->in_modifier, vhcr->op_modifier, + vhcr->in_modifier, opcode_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); /* modify the response for slaves */ if (!err && slave != mlx4_master_func_num(dev)) { @@ -760,7 +768,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, smp->attr_mod = cpu_to_be32(slave / 8); /* execute cmd */ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, - vhcr->in_modifier, vhcr->op_modifier, + vhcr->in_modifier, opcode_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); if (!err) { /* if needed, move slave gid to index 0 */ @@ -774,7 +782,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, } if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) { err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, - vhcr->in_modifier, vhcr->op_modifier, + vhcr->in_modifier, opcode_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); if (!err) { slave_node_guid = mlx4_get_slave_node_guid(dev, slave); @@ -784,19 +792,24 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, } } } + + /* Non-privileged VFs are only allowed "host" view LID-routed 'Get' MADs. + * These are the MADs used by ib verbs (such as ib_query_gids). + */ if (slave != mlx4_master_func_num(dev) && - ((smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) || - (smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED && - smp->method == IB_MGMT_METHOD_SET))) { - mlx4_err(dev, "slave %d is trying to execute a Subnet MGMT MAD, " - "class 0x%x, method 0x%x for attr 0x%x. Rejecting\n", - slave, smp->method, smp->mgmt_class, - be16_to_cpu(smp->attr_id)); - return -EPERM; + !mlx4_vf_smi_enabled(dev, slave, port)) { + if (!(smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED && + smp->method == IB_MGMT_METHOD_GET) || network_view) { + mlx4_err(dev, "Unprivileged slave %d is trying to execute a Subnet MGMT MAD, class 0x%x, method 0x%x, view=%s for attr 0x%x. Rejecting\n", + slave, smp->method, smp->mgmt_class, + network_view ? "Network" : "Host", + be16_to_cpu(smp->attr_id)); + return -EPERM; + } } - /*default:*/ + return mlx4_cmd_box(dev, inbox->dma, outbox->dma, - vhcr->in_modifier, vhcr->op_modifier, + vhcr->in_modifier, opcode_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); } @@ -2537,3 +2550,9 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_stat return 0; } EXPORT_SYMBOL_GPL(mlx4_set_vf_link_state); + +int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port) +{ + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_vf_smi_enabled); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index ba87bd21295a..83612faa819c 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1234,4 +1234,5 @@ int mlx4_phys_to_slave_port(struct mlx4_dev *dev, int slave, int port); int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port); int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port); +int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port); #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3-71-gd317 From 99ec41d0a48cb6d14af25765f9449762f9d101f6 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 29 May 2014 16:31:03 +0300 Subject: mlx4: Add infrastructure for selecting VFs to enable QP0 via MLX proxy QPs This commit adds the infrastructure for enabling selected VFs to operate SMI (QP0) MADs without restriction. Additionally, for these enabled VFs, their QP0 proxy and tunnel QPs are MLX QPs. As such, they operate over VL15. Therefore, they are not affected by "credit" problems or changes in the VLArb table (which may shut down VL0). Non-enabled VFs may only create UD proxy QP0 qps (which are forced by the hypervisor to send packets using the q-key it assigns and places in the qp-context). Thus, non-enabled VFs will not pose a security risk. The hypervisor discards any privileged MADs it receives from these non-enabled VFs. By default, all VFs are NOT enabled, and must explicitly be enabled by the administrator. The sysfs interface which operates the VF enablement infrastructure is provided in the next commit. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 52 +++++++++++++++++----- drivers/net/ethernet/mellanox/mlx4/cmd.c | 13 +++++- drivers/net/ethernet/mellanox/mlx4/fw.c | 44 ++++++++++++------ drivers/net/ethernet/mellanox/mlx4/fw.h | 1 + drivers/net/ethernet/mellanox/mlx4/main.c | 16 +++++-- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 8 ++++ .../net/ethernet/mellanox/mlx4/resource_tracker.c | 26 ++++++++--- include/linux/mlx4/device.h | 1 + 8 files changed, 126 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 2e8c58806e2f..b25600997cb6 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -608,6 +608,16 @@ static int qp_has_rq(struct ib_qp_init_attr *attr) return !attr->srq; } +static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn) +{ + int i; + for (i = 0; i < dev->caps.num_ports; i++) { + if (qpn == dev->caps.qp0_proxy[i]) + return !!dev->caps.qp0_qkey[i]; + } + return 0; +} + static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp) @@ -625,10 +635,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) { if (init_attr->qp_type == IB_QPT_GSI) qp_type = MLX4_IB_QPT_PROXY_GSI; - else if (mlx4_is_master(dev->dev)) - qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER; - else - qp_type = MLX4_IB_QPT_PROXY_SMI; + else { + if (mlx4_is_master(dev->dev) || + qp0_enabled_vf(dev->dev, sqpn)) + qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER; + else + qp_type = MLX4_IB_QPT_PROXY_SMI; + } } qpn = sqpn; /* add extra sg entry for tunneling */ @@ -643,7 +656,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, return -EINVAL; if (tnl_init->proxy_qp_type == IB_QPT_GSI) qp_type = MLX4_IB_QPT_TUN_GSI; - else if (tnl_init->slave == mlx4_master_func_num(dev->dev)) + else if (tnl_init->slave == mlx4_master_func_num(dev->dev) || + mlx4_vf_smi_enabled(dev->dev, tnl_init->slave, + tnl_init->port)) qp_type = MLX4_IB_QPT_TUN_SMI_OWNER; else qp_type = MLX4_IB_QPT_TUN_SMI; @@ -1930,6 +1945,19 @@ out: return err; } +static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey) +{ + int i; + for (i = 0; i < dev->caps.num_ports; i++) { + if (qpn == dev->caps.qp0_proxy[i] || + qpn == dev->caps.qp0_tunnel[i]) { + *qkey = dev->caps.qp0_qkey[i]; + return 0; + } + } + return -EINVAL; +} + static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len) @@ -1987,8 +2015,13 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); - if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) - return -EINVAL; + if (mlx4_is_master(mdev->dev)) { + if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) + return -EINVAL; + } else { + if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) + return -EINVAL; + } sqp->ud_header.deth.qkey = cpu_to_be32(qkey); sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn); @@ -2682,11 +2715,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case MLX4_IB_QPT_PROXY_SMI_OWNER: - if (unlikely(!mlx4_is_master(to_mdev(ibqp->device)->dev))) { - err = -ENOSYS; - *bad_wr = wr; - goto out; - } err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen); if (unlikely(err)) { *bad_wr = wr; diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index b0e48d426fce..26c3ebaa49d1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1666,6 +1666,8 @@ static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) for (port = min_port; port <= max_port; port++) { if (!test_bit(port - 1, actv_ports.ports)) continue; + priv->mfunc.master.vf_oper[slave].smi_enabled[port] = + priv->mfunc.master.vf_admin[slave].enable_smi[port]; vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; vp_oper->state = *vp_admin; @@ -1717,6 +1719,8 @@ static void mlx4_master_deactivate_admin_state(struct mlx4_priv *priv, int slave for (port = min_port; port <= max_port; port++) { if (!test_bit(port - 1, actv_ports.ports)) continue; + priv->mfunc.master.vf_oper[slave].smi_enabled[port] = + MLX4_VF_SMI_DISABLED; vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; if (NO_INDX != vp_oper->vlan_idx) { __mlx4_unregister_vlan(&priv->dev, @@ -2553,6 +2557,13 @@ EXPORT_SYMBOL_GPL(mlx4_set_vf_link_state); int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port) { - return 0; + struct mlx4_priv *priv = mlx4_priv(dev); + + if (slave < 1 || slave >= dev->num_slaves || + port < 1 || port > MLX4_MAX_PORTS) + return 0; + + return priv->mfunc.master.vf_oper[slave].smi_enabled[port] == + MLX4_VF_SMI_ENABLED; } EXPORT_SYMBOL_GPL(mlx4_vf_smi_enabled); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index ef242e19766f..01e6dd61ee3c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -178,8 +178,8 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_info *cmd) { struct mlx4_priv *priv = mlx4_priv(dev); - u8 field; - u32 size; + u8 field, port; + u32 size, proxy_qp, qkey; int err = 0; #define QUERY_FUNC_CAP_FLAGS_OFFSET 0x0 @@ -209,6 +209,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, /* when opcode modifier = 1 */ #define QUERY_FUNC_CAP_PHYS_PORT_OFFSET 0x3 +#define QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET 0x4 #define QUERY_FUNC_CAP_FLAGS0_OFFSET 0x8 #define QUERY_FUNC_CAP_FLAGS1_OFFSET 0xc @@ -221,6 +222,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_FLAGS1_FORCE_MAC 0x40 #define QUERY_FUNC_CAP_FLAGS1_FORCE_VLAN 0x80 #define QUERY_FUNC_CAP_FLAGS1_NIC_INFO 0x10 +#define QUERY_FUNC_CAP_VF_ENABLE_QP0 0x08 #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80 @@ -234,28 +236,35 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, return -EINVAL; vhcr->in_modifier = converted_port; - /* Set nic_info bit to mark new fields support */ - field = QUERY_FUNC_CAP_FLAGS1_NIC_INFO; - MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS1_OFFSET); - /* phys-port = logical-port */ field = vhcr->in_modifier - find_first_bit(actv_ports.ports, dev->caps.num_ports); MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); - field = vhcr->in_modifier; + port = vhcr->in_modifier; + proxy_qp = dev->phys_caps.base_proxy_sqpn + 8 * slave + port - 1; + + /* Set nic_info bit to mark new fields support */ + field = QUERY_FUNC_CAP_FLAGS1_NIC_INFO; + + if (mlx4_vf_smi_enabled(dev, slave, port) && + !mlx4_get_parav_qkey(dev, proxy_qp, &qkey)) { + field |= QUERY_FUNC_CAP_VF_ENABLE_QP0; + MLX4_PUT(outbox->buf, qkey, + QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET); + } + MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS1_OFFSET); + /* size is now the QP number */ - size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + field - 1; + size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + port - 1; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_TUNNEL); size += 2; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_TUNNEL); - size = dev->phys_caps.base_proxy_sqpn + 8 * slave + field - 1; - MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_PROXY); - - size += 2; - MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_PROXY); + MLX4_PUT(outbox->buf, proxy_qp, QUERY_FUNC_CAP_QP0_PROXY); + proxy_qp += 2; + MLX4_PUT(outbox->buf, proxy_qp, QUERY_FUNC_CAP_QP1_PROXY); MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier], QUERY_FUNC_CAP_PHYS_PORT_ID); @@ -326,7 +335,7 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, struct mlx4_cmd_mailbox *mailbox; u32 *outbox; u8 field, op_modifier; - u32 size; + u32 size, qkey; int err = 0, quotas = 0; op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */ @@ -442,6 +451,13 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, goto out; } + if (func_cap->flags1 & QUERY_FUNC_CAP_VF_ENABLE_QP0) { + MLX4_GET(qkey, outbox, QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET); + func_cap->qp0_qkey = qkey; + } else { + func_cap->qp0_qkey = 0; + } + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL); func_cap->qp0_tunnel_qpn = size & 0xFFFFFF; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 6811ee00ba7c..1fce03ebe5c4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -134,6 +134,7 @@ struct mlx4_func_cap { int max_eq; int reserved_eq; int mcg_quota; + u32 qp0_qkey; u32 qp0_tunnel_qpn; u32 qp0_proxy_qpn; u32 qp1_tunnel_qpn; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 12a7ee2e6098..908326876ab5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -666,13 +666,15 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) return -ENODEV; } + dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL); dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || - !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { + !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy || + !dev->caps.qp0_qkey) { err = -ENOMEM; goto err_mem; } @@ -684,6 +686,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) " port %d, aborting (%d).\n", i, err); goto err_mem; } + dev->caps.qp0_qkey[i - 1] = func_cap.qp0_qkey; dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn; dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn; dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn; @@ -729,12 +732,16 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) return 0; err_mem: + kfree(dev->caps.qp0_qkey); kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); kfree(dev->caps.qp1_proxy); - dev->caps.qp0_tunnel = dev->caps.qp0_proxy = - dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; + dev->caps.qp0_qkey = NULL; + dev->caps.qp0_tunnel = NULL; + dev->caps.qp0_proxy = NULL; + dev->caps.qp1_tunnel = NULL; + dev->caps.qp1_proxy = NULL; return err; } @@ -1697,6 +1704,7 @@ unmap_bf: unmap_bf_area(dev); if (mlx4_is_slave(dev)) { + kfree(dev->caps.qp0_qkey); kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); @@ -2573,6 +2581,7 @@ err_master_mfunc: mlx4_multi_func_cleanup(dev); if (mlx4_is_slave(dev)) { + kfree(dev->caps.qp0_qkey); kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); @@ -2702,6 +2711,7 @@ static void __mlx4_remove_one(struct pci_dev *pdev) if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); + kfree(dev->caps.qp0_qkey); kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index f9c465101963..0efd1738fcb3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -133,6 +133,11 @@ enum { MLX4_COMM_CMD_FLR = 254 }; +enum { + MLX4_VF_SMI_DISABLED, + MLX4_VF_SMI_ENABLED +}; + /*The flag indicates that the slave should delay the RESET cmd*/ #define MLX4_DELAY_RESET_SLAVE 0xbbbbbbb /*indicates how many retries will be done if we are in the middle of FLR*/ @@ -488,6 +493,7 @@ struct mlx4_vport_state { struct mlx4_vf_admin_state { struct mlx4_vport_state vport[MLX4_MAX_PORTS + 1]; + u8 enable_smi[MLX4_MAX_PORTS + 1]; }; struct mlx4_vport_oper_state { @@ -495,8 +501,10 @@ struct mlx4_vport_oper_state { int mac_idx; int vlan_idx; }; + struct mlx4_vf_oper_state { struct mlx4_vport_oper_state vport[MLX4_MAX_PORTS + 1]; + u8 smi_enabled[MLX4_MAX_PORTS + 1]; }; struct slave_list { diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 1c3fdd4a1f7d..ad98162a8d79 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2827,10 +2827,12 @@ static int get_containing_mtt(struct mlx4_dev *dev, int slave, int start, } static int verify_qp_parameters(struct mlx4_dev *dev, + struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, enum qp_transition transition, u8 slave) { u32 qp_type; + u32 qpn; struct mlx4_qp_context *qp_ctx; enum mlx4_qp_optpar optpar; int port; @@ -2870,6 +2872,20 @@ static int verify_qp_parameters(struct mlx4_dev *dev, return -EINVAL; } break; + case MLX4_QP_ST_MLX: + qpn = vhcr->in_modifier & 0x7fffff; + port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; + if (transition == QP_TRANS_INIT2RTR && + slave != mlx4_master_func_num(dev) && + mlx4_is_qp_reserved(dev, qpn) && + !mlx4_vf_smi_enabled(dev, slave, port)) { + /* only enabled VFs may create MLX proxy QPs */ + mlx4_err(dev, "%s: unprivileged slave %d attempting to create an MLX proxy special QP on port %d\n", + __func__, slave, port); + return -EPERM; + } + break; + default: break; } @@ -3454,7 +3470,7 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, err = adjust_qp_sched_queue(dev, slave, qpc, inbox); if (err) return err; - err = verify_qp_parameters(dev, inbox, QP_TRANS_INIT2RTR, slave); + err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_INIT2RTR, slave); if (err) return err; @@ -3508,7 +3524,7 @@ int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, err = adjust_qp_sched_queue(dev, slave, context, inbox); if (err) return err; - err = verify_qp_parameters(dev, inbox, QP_TRANS_RTR2RTS, slave); + err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_RTR2RTS, slave); if (err) return err; @@ -3530,7 +3546,7 @@ int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, err = adjust_qp_sched_queue(dev, slave, context, inbox); if (err) return err; - err = verify_qp_parameters(dev, inbox, QP_TRANS_RTS2RTS, slave); + err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_RTS2RTS, slave); if (err) return err; @@ -3567,7 +3583,7 @@ int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave, err = adjust_qp_sched_queue(dev, slave, context, inbox); if (err) return err; - err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2SQD, slave); + err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_SQD2SQD, slave); if (err) return err; @@ -3589,7 +3605,7 @@ int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, err = adjust_qp_sched_queue(dev, slave, context, inbox); if (err) return err; - err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2RTS, slave); + err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_SQD2RTS, slave); if (err) return err; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 83612faa819c..e2fc7011314c 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -401,6 +401,7 @@ struct mlx4_caps { int max_rq_desc_sz; int max_qp_init_rdma; int max_qp_dest_rdma; + u32 *qp0_qkey; u32 *qp0_proxy; u32 *qp1_proxy; u32 *qp0_tunnel; -- cgit v1.2.3-71-gd317 From 65fed8a8c155271cf647651bd62eecb5928ae3a4 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 29 May 2014 16:31:04 +0300 Subject: IB/mlx4: Add interface for selecting VFs to enable QP0 via MLX proxy QPs This commit adds the sysfs interface for enabling QP0 on VFs for selected VF/port. By default, no VFs are enabled for QP0 operation. To enable QP0 operation on a VF/port, under /sys/class/infiniband/mlx4_x/iov//ports/x there are two new entries: - smi_enabled (read-only). Indicates whether smi is currently enabled for the indicated VF/port - enable_smi_admin (rw). Used by the admin to request that smi capability be enabled or disabled for the indicated VF/port. 0 = disable, 1 = enable. The requested enablement will occur at the next reset of the VF (e.g. driver restart on the VM which owns the VF). Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/sysfs.c | 105 ++++++++++++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx4/cmd.c | 34 ++++++++++ include/linux/mlx4/device.h | 3 + 3 files changed, 141 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index 5a38e43eca65..cb4c66e723b5 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -389,8 +389,10 @@ struct mlx4_port { struct mlx4_ib_dev *dev; struct attribute_group pkey_group; struct attribute_group gid_group; - u8 port_num; + struct device_attribute enable_smi_admin; + struct device_attribute smi_enabled; int slave; + u8 port_num; }; @@ -558,6 +560,101 @@ err: return NULL; } +static ssize_t sysfs_show_smi_enabled(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mlx4_port *p = + container_of(attr, struct mlx4_port, smi_enabled); + ssize_t len = 0; + + if (mlx4_vf_smi_enabled(p->dev->dev, p->slave, p->port_num)) + len = sprintf(buf, "%d\n", 1); + else + len = sprintf(buf, "%d\n", 0); + + return len; +} + +static ssize_t sysfs_show_enable_smi_admin(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlx4_port *p = + container_of(attr, struct mlx4_port, enable_smi_admin); + ssize_t len = 0; + + if (mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, p->port_num)) + len = sprintf(buf, "%d\n", 1); + else + len = sprintf(buf, "%d\n", 0); + + return len; +} + +static ssize_t sysfs_store_enable_smi_admin(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mlx4_port *p = + container_of(attr, struct mlx4_port, enable_smi_admin); + int enable; + + if (sscanf(buf, "%i", &enable) != 1 || + enable < 0 || enable > 1) + return -EINVAL; + + if (mlx4_vf_set_enable_smi_admin(p->dev->dev, p->slave, p->port_num, enable)) + return -EINVAL; + return count; +} + +static int add_vf_smi_entries(struct mlx4_port *p) +{ + int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) == + IB_LINK_LAYER_ETHERNET; + int ret; + + /* do not display entries if eth transport, or if master */ + if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev)) + return 0; + + sysfs_attr_init(&p->smi_enabled.attr); + p->smi_enabled.show = sysfs_show_smi_enabled; + p->smi_enabled.store = NULL; + p->smi_enabled.attr.name = "smi_enabled"; + p->smi_enabled.attr.mode = 0444; + ret = sysfs_create_file(&p->kobj, &p->smi_enabled.attr); + if (ret) { + pr_err("failed to create smi_enabled\n"); + return ret; + } + + sysfs_attr_init(&p->enable_smi_admin.attr); + p->enable_smi_admin.show = sysfs_show_enable_smi_admin; + p->enable_smi_admin.store = sysfs_store_enable_smi_admin; + p->enable_smi_admin.attr.name = "enable_smi_admin"; + p->enable_smi_admin.attr.mode = 0644; + ret = sysfs_create_file(&p->kobj, &p->enable_smi_admin.attr); + if (ret) { + pr_err("failed to create enable_smi_admin\n"); + sysfs_remove_file(&p->kobj, &p->smi_enabled.attr); + return ret; + } + return 0; +} + +static void remove_vf_smi_entries(struct mlx4_port *p) +{ + int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) == + IB_LINK_LAYER_ETHERNET; + + if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev)) + return; + + sysfs_remove_file(&p->kobj, &p->smi_enabled.attr); + sysfs_remove_file(&p->kobj, &p->enable_smi_admin.attr); +} + static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave) { struct mlx4_port *p; @@ -602,6 +699,10 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave) if (ret) goto err_free_gid; + ret = add_vf_smi_entries(p); + if (ret) + goto err_free_gid; + list_add_tail(&p->kobj.entry, &dev->pkeys.pkey_port_list[slave]); return 0; @@ -669,6 +770,7 @@ err_add: mport = container_of(p, struct mlx4_port, kobj); sysfs_remove_group(p, &mport->pkey_group); sysfs_remove_group(p, &mport->gid_group); + remove_vf_smi_entries(mport); kobject_put(p); } kobject_put(dev->dev_ports_parent[slave]); @@ -713,6 +815,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device) port = container_of(p, struct mlx4_port, kobj); sysfs_remove_group(p, &port->pkey_group); sysfs_remove_group(p, &port->gid_group); + remove_vf_smi_entries(port); kobject_put(p); kobject_put(device->dev_ports_parent[slave]); } diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 26c3ebaa49d1..3370ecb8c3d2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2567,3 +2567,37 @@ int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port) MLX4_VF_SMI_ENABLED; } EXPORT_SYMBOL_GPL(mlx4_vf_smi_enabled); + +int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + if (slave == mlx4_master_func_num(dev)) + return 1; + + if (slave < 1 || slave >= dev->num_slaves || + port < 1 || port > MLX4_MAX_PORTS) + return 0; + + return priv->mfunc.master.vf_admin[slave].enable_smi[port] == + MLX4_VF_SMI_ENABLED; +} +EXPORT_SYMBOL_GPL(mlx4_vf_get_enable_smi_admin); + +int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port, + int enabled) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + if (slave == mlx4_master_func_num(dev)) + return 0; + + if (slave < 1 || slave >= dev->num_slaves || + port < 1 || port > MLX4_MAX_PORTS || + enabled < 0 || enabled > 1) + return -EINVAL; + + priv->mfunc.master.vf_admin[slave].enable_smi[port] = enabled; + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_vf_set_enable_smi_admin); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index e2fc7011314c..dcabe11f37f7 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1236,4 +1236,7 @@ int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port); int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port); int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port); +int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port); +int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port, + int enable); #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3-71-gd317 From 90dfdc7ceb577c0d7b7635def3c62039a091e50d Mon Sep 17 00:00:00 2001 From: David Daney Date: Wed, 28 May 2014 23:52:12 +0200 Subject: MIPS: Add functions for hypervisor call Introduce kvm_hypercall[0-3]. Define three new hypercalls for MIPS: GET_CLOCK_FREQ, EXIT_VM, and CONSOLE_OUTPUT. [andreas.herrmann: * Properly define hypercalls and HC numbers for MIPS in kvm_para.h header files] Signed-off-by: David Daney Signed-off-by: Andreas Herrmann Cc: linux-mips@linux-mips.org Cc: James Hogan Cc: kvm@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/7005/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/kvm_para.h | 109 ++++++++++++++++++++++++++++++++++ arch/mips/include/uapi/asm/kvm_para.h | 6 +- include/uapi/linux/kvm_para.h | 3 + 3 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 arch/mips/include/asm/kvm_para.h (limited to 'include') diff --git a/arch/mips/include/asm/kvm_para.h b/arch/mips/include/asm/kvm_para.h new file mode 100644 index 000000000000..5a9aa918abe6 --- /dev/null +++ b/arch/mips/include/asm/kvm_para.h @@ -0,0 +1,109 @@ +#ifndef _ASM_MIPS_KVM_PARA_H +#define _ASM_MIPS_KVM_PARA_H + +#include + +#define KVM_HYPERCALL ".word 0x42000028" + +/* + * Hypercalls for KVM. + * + * Hypercall number is passed in v0. + * Return value will be placed in v0. + * Up to 3 arguments are passed in a0, a1, and a2. + */ +static inline unsigned long kvm_hypercall0(unsigned long num) +{ + register unsigned long n asm("v0"); + register unsigned long r asm("v0"); + + n = num; + __asm__ __volatile__( + KVM_HYPERCALL + : "=r" (r) : "r" (n) : "memory" + ); + + return r; +} + +static inline unsigned long kvm_hypercall1(unsigned long num, + unsigned long arg0) +{ + register unsigned long n asm("v0"); + register unsigned long r asm("v0"); + register unsigned long a0 asm("a0"); + + n = num; + a0 = arg0; + __asm__ __volatile__( + KVM_HYPERCALL + : "=r" (r) : "r" (n), "r" (a0) : "memory" + ); + + return r; +} + +static inline unsigned long kvm_hypercall2(unsigned long num, + unsigned long arg0, unsigned long arg1) +{ + register unsigned long n asm("v0"); + register unsigned long r asm("v0"); + register unsigned long a0 asm("a0"); + register unsigned long a1 asm("a1"); + + n = num; + a0 = arg0; + a1 = arg1; + __asm__ __volatile__( + KVM_HYPERCALL + : "=r" (r) : "r" (n), "r" (a0), "r" (a1) : "memory" + ); + + return r; +} + +static inline unsigned long kvm_hypercall3(unsigned long num, + unsigned long arg0, unsigned long arg1, unsigned long arg2) +{ + register unsigned long n asm("v0"); + register unsigned long r asm("v0"); + register unsigned long a0 asm("a0"); + register unsigned long a1 asm("a1"); + register unsigned long a2 asm("a2"); + + n = num; + a0 = arg0; + a1 = arg1; + a2 = arg2; + __asm__ __volatile__( + KVM_HYPERCALL + : "=r" (r) : "r" (n), "r" (a0), "r" (a1), "r" (a2) : "memory" + ); + + return r; +} + +static inline bool kvm_check_and_clear_guest_paused(void) +{ + return false; +} + +static inline unsigned int kvm_arch_para_features(void) +{ + return 0; +} + +#ifdef CONFIG_MIPS_PARAVIRT +static inline bool kvm_para_available(void) +{ + return true; +} +#else +static inline bool kvm_para_available(void) +{ + return false; +} +#endif + + +#endif /* _ASM_MIPS_KVM_PARA_H */ diff --git a/arch/mips/include/uapi/asm/kvm_para.h b/arch/mips/include/uapi/asm/kvm_para.h index 14fab8f0b957..7e16d7c42e65 100644 --- a/arch/mips/include/uapi/asm/kvm_para.h +++ b/arch/mips/include/uapi/asm/kvm_para.h @@ -1 +1,5 @@ -#include +#ifndef _UAPI_ASM_MIPS_KVM_PARA_H +#define _UAPI_ASM_MIPS_KVM_PARA_H + + +#endif /* _UAPI_ASM_MIPS_KVM_PARA_H */ diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h index 2841f86eae0b..bf6cd7d5cac2 100644 --- a/include/uapi/linux/kvm_para.h +++ b/include/uapi/linux/kvm_para.h @@ -20,6 +20,9 @@ #define KVM_HC_FEATURES 3 #define KVM_HC_PPC_MAP_MAGIC_PAGE 4 #define KVM_HC_KICK_CPU 5 +#define KVM_HC_MIPS_GET_CLOCK_FREQ 6 +#define KVM_HC_MIPS_EXIT_VM 7 +#define KVM_HC_MIPS_CONSOLE_OUTPUT 8 /* * hypercalls use architecture specific -- cgit v1.2.3-71-gd317 From 3e19ce762b537dd9aeefdd0849ba5f2f01ff83cf Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 25 Feb 2014 17:44:21 -0500 Subject: rpc: xdr_truncate_encode This will be used in the server side in a few cases: - when certain operations (read, readdir, readlink) fail after encoding a partial response. - when we run out of space after encoding a partial response. - in readlink, where we initially reserve PAGE_SIZE bytes for data, then truncate to the actual size. Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/xdr.h | 1 + net/sunrpc/xdr.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 15f9204ee70b..e7bb2e3bd0fb 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -215,6 +215,7 @@ typedef int (*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj); extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); +extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index dd97ba3c4456..352f3b35bbe5 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -508,6 +508,72 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes) } EXPORT_SYMBOL_GPL(xdr_reserve_space); +/** + * xdr_truncate_encode - truncate an encode buffer + * @xdr: pointer to xdr_stream + * @len: new length of buffer + * + * Truncates the xdr stream, so that xdr->buf->len == len, + * and xdr->p points at offset len from the start of the buffer, and + * head, tail, and page lengths are adjusted to correspond. + * + * If this means moving xdr->p to a different buffer, we assume that + * that the end pointer should be set to the end of the current page, + * except in the case of the head buffer when we assume the head + * buffer's current length represents the end of the available buffer. + * + * This is *not* safe to use on a buffer that already has inlined page + * cache pages (as in a zero-copy server read reply), except for the + * simple case of truncating from one position in the tail to another. + * + */ +void xdr_truncate_encode(struct xdr_stream *xdr, size_t len) +{ + struct xdr_buf *buf = xdr->buf; + struct kvec *head = buf->head; + struct kvec *tail = buf->tail; + int fraglen; + int new, old; + + if (len > buf->len) { + WARN_ON_ONCE(1); + return; + } + + fraglen = min_t(int, buf->len - len, tail->iov_len); + tail->iov_len -= fraglen; + buf->len -= fraglen; + if (tail->iov_len && buf->len == len) { + xdr->p = tail->iov_base + tail->iov_len; + /* xdr->end, xdr->iov should be set already */ + return; + } + WARN_ON_ONCE(fraglen); + fraglen = min_t(int, buf->len - len, buf->page_len); + buf->page_len -= fraglen; + buf->len -= fraglen; + + new = buf->page_base + buf->page_len; + old = new + fraglen; + xdr->page_ptr -= (old >> PAGE_SHIFT) - (new >> PAGE_SHIFT); + + if (buf->page_len && buf->len == len) { + xdr->p = page_address(*xdr->page_ptr); + xdr->end = (void *)xdr->p + PAGE_SIZE; + xdr->p = (void *)xdr->p + (new % PAGE_SIZE); + /* xdr->iov should already be NULL */ + return; + } + if (fraglen) + xdr->end = head->iov_base + head->iov_len; + /* (otherwise assume xdr->end is already set) */ + head->iov_len = len; + buf->len = len; + xdr->p = head->iov_base + head->iov_len; + xdr->iov = buf->head; +} +EXPORT_SYMBOL(xdr_truncate_encode); + /** * xdr_write_pages - Insert a list of pages into an XDR buffer for sending * @xdr: pointer to xdr_stream -- cgit v1.2.3-71-gd317 From 2825a7f90753012babe7ee292f4a1eadd3706f92 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 26 Aug 2013 16:04:46 -0400 Subject: nfsd4: allow encoding across page boundaries After this we can handle for example getattr of very large ACLs. Read, readdir, readlink are still special cases with their own limits. Also we can't handle a new operation starting close to the end of a page. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 4 +++ fs/nfsd/nfs4xdr.c | 60 +++++++++++++++++++++++++---------- include/linux/sunrpc/svc.h | 1 + include/linux/sunrpc/xdr.h | 1 + net/sunrpc/svc_xprt.c | 1 + net/sunrpc/xdr.c | 78 ++++++++++++++++++++++++++++++++++++++++++++-- 6 files changed, 126 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 3ce431b9b577..5d8f9158701d 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1264,6 +1264,10 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp, xdr->end = head->iov_base + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE; /* Tail and page_len should be zero at this point: */ buf->len = buf->head[0].iov_len; + xdr->scratch.iov_len = 0; + xdr->page_ptr = buf->pages; + buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages) + - 2 * RPC_MAX_AUTH_SIZE; } /* diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index bd529e523087..d3a576dbd99b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1624,6 +1624,7 @@ static int nfsd4_max_reply(u32 opnum) * the head and tail in another page: */ return 2 * PAGE_SIZE; + case OP_GETATTR: case OP_READ: return INT_MAX; default: @@ -2560,21 +2561,31 @@ out_resource: goto out; } +static void svcxdr_init_encode_from_buffer(struct xdr_stream *xdr, + struct xdr_buf *buf, __be32 *p, int bytes) +{ + xdr->scratch.iov_len = 0; + memset(buf, 0, sizeof(struct xdr_buf)); + buf->head[0].iov_base = p; + buf->head[0].iov_len = 0; + buf->len = 0; + xdr->buf = buf; + xdr->iov = buf->head; + xdr->p = p; + xdr->end = (void *)p + bytes; + buf->buflen = bytes; +} + __be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words, struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, u32 *bmval, struct svc_rqst *rqstp, int ignore_crossmnt) { - struct xdr_buf dummy = { - .head[0] = { - .iov_base = *p, - }, - .buflen = words << 2, - }; + struct xdr_buf dummy; struct xdr_stream xdr; __be32 ret; - xdr_init_encode(&xdr, &dummy, NULL); + svcxdr_init_encode_from_buffer(&xdr, &dummy, *p, words << 2); ret = nfsd4_encode_fattr(&xdr, fhp, exp, dentry, bmval, rqstp, ignore_crossmnt); *p = xdr.p; @@ -3064,8 +3075,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, if (nfserr) return nfserr; - if (resp->xdr.buf->page_len) - return nfserr_resource; p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ if (!p) @@ -3075,6 +3084,9 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, if (xdr->end - xdr->p < 1) return nfserr_resource; + if (resp->xdr.buf->page_len) + return nfserr_resource; + maxcount = svc_max_payload(resp->rqstp); if (maxcount > read->rd_length) maxcount = read->rd_length; @@ -3119,6 +3131,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, - (char *)resp->xdr.buf->head[0].iov_base); resp->xdr.buf->page_len = maxcount; xdr->buf->len += maxcount; + xdr->page_ptr += v; + xdr->buf->buflen = maxcount + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE; xdr->iov = xdr->buf->tail; /* Use rest of head for padding and remaining ops: */ @@ -3145,6 +3159,11 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd if (nfserr) return nfserr; + + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; + if (resp->xdr.buf->page_len) return nfserr_resource; if (!*resp->rqstp->rq_next_page) @@ -3154,10 +3173,6 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd maxcount = PAGE_SIZE; - p = xdr_reserve_space(xdr, 4); - if (!p) - return nfserr_resource; - if (xdr->end - xdr->p < 1) return nfserr_resource; @@ -3180,6 +3195,8 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd - (char *)resp->xdr.buf->head[0].iov_base; resp->xdr.buf->page_len = maxcount; xdr->buf->len += maxcount; + xdr->page_ptr += 1; + xdr->buf->buflen -= PAGE_SIZE; xdr->iov = xdr->buf->tail; /* Use rest of head for padding and remaining ops: */ @@ -3206,15 +3223,16 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 if (nfserr) return nfserr; - if (resp->xdr.buf->page_len) - return nfserr_resource; - if (!*resp->rqstp->rq_next_page) - return nfserr_resource; p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); if (!p) return nfserr_resource; + if (resp->xdr.buf->page_len) + return nfserr_resource; + if (!*resp->rqstp->rq_next_page) + return nfserr_resource; + /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ WRITE32(0); WRITE32(0); @@ -3266,6 +3284,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 xdr->iov = xdr->buf->tail; + xdr->page_ptr++; + xdr->buf->buflen -= PAGE_SIZE; + xdr->iov = xdr->buf->tail; + /* Use rest of head for padding and remaining ops: */ resp->xdr.buf->tail[0].iov_base = tailbase; resp->xdr.buf->tail[0].iov_len = 0; @@ -3800,6 +3822,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) !nfsd4_enc_ops[op->opnum]); encoder = nfsd4_enc_ops[op->opnum]; op->status = encoder(resp, op->status, &op->u); + xdr_commit_encode(xdr); + /* nfsd4_check_resp_size guarantees enough room for error status */ if (!op->status) { int space_needed = 0; @@ -3919,6 +3943,8 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len + buf->tail[0].iov_len); + rqstp->rq_next_page = resp->xdr.page_ptr + 1; + p = resp->tagp; *p++ = htonl(resp->taglen); memcpy(p, resp->tag, resp->taglen); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index a0dbbd1e00e9..85cb6472a423 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -244,6 +244,7 @@ struct svc_rqst { struct page * rq_pages[RPCSVC_MAXPAGES]; struct page * *rq_respages; /* points into rq_pages */ struct page * *rq_next_page; /* next reply page to use */ + struct page * *rq_page_end; /* one past the last page */ struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */ diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index e7bb2e3bd0fb..b23d69ffd5ec 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -215,6 +215,7 @@ typedef int (*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj); extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); +extern void xdr_commit_encode(struct xdr_stream *xdr); extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 29772e01b179..b4737fbdec13 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -597,6 +597,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) } rqstp->rq_pages[i] = p; } + rqstp->rq_page_end = &rqstp->rq_pages[i]; rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */ /* Make arg->head point to first page and arg->pages point to rest */ diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 352f3b35bbe5..2b546e8ce43d 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -462,6 +462,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) struct kvec *iov = buf->head; int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len; + xdr_set_scratch_buffer(xdr, NULL, 0); BUG_ON(scratch_len < 0); xdr->buf = buf; xdr->iov = iov; @@ -481,6 +482,74 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) } EXPORT_SYMBOL_GPL(xdr_init_encode); +/** + * xdr_commit_encode - Ensure all data is written to buffer + * @xdr: pointer to xdr_stream + * + * We handle encoding across page boundaries by giving the caller a + * temporary location to write to, then later copying the data into + * place; xdr_commit_encode does that copying. + * + * Normally the caller doesn't need to call this directly, as the + * following xdr_reserve_space will do it. But an explicit call may be + * required at the end of encoding, or any other time when the xdr_buf + * data might be read. + */ +void xdr_commit_encode(struct xdr_stream *xdr) +{ + int shift = xdr->scratch.iov_len; + void *page; + + if (shift == 0) + return; + page = page_address(*xdr->page_ptr); + memcpy(xdr->scratch.iov_base, page, shift); + memmove(page, page + shift, (void *)xdr->p - page); + xdr->scratch.iov_len = 0; +} +EXPORT_SYMBOL_GPL(xdr_commit_encode); + +__be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, size_t nbytes) +{ + static __be32 *p; + int space_left; + int frag1bytes, frag2bytes; + + if (nbytes > PAGE_SIZE) + return NULL; /* Bigger buffers require special handling */ + if (xdr->buf->len + nbytes > xdr->buf->buflen) + return NULL; /* Sorry, we're totally out of space */ + frag1bytes = (xdr->end - xdr->p) << 2; + frag2bytes = nbytes - frag1bytes; + if (xdr->iov) + xdr->iov->iov_len += frag1bytes; + else { + xdr->buf->page_len += frag1bytes; + xdr->page_ptr++; + } + xdr->iov = NULL; + /* + * If the last encode didn't end exactly on a page boundary, the + * next one will straddle boundaries. Encode into the next + * page, then copy it back later in xdr_commit_encode. We use + * the "scratch" iov to track any temporarily unused fragment of + * space at the end of the previous buffer: + */ + xdr->scratch.iov_base = xdr->p; + xdr->scratch.iov_len = frag1bytes; + p = page_address(*xdr->page_ptr); + /* + * Note this is where the next encode will start after we've + * shifted this one back: + */ + xdr->p = (void *)p + frag2bytes; + space_left = xdr->buf->buflen - xdr->buf->len; + xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE); + xdr->buf->page_len += frag2bytes; + xdr->buf->len += nbytes; + return p; +} + /** * xdr_reserve_space - Reserve buffer space for sending * @xdr: pointer to xdr_stream @@ -495,14 +564,18 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes) __be32 *p = xdr->p; __be32 *q; + xdr_commit_encode(xdr); /* align nbytes on the next 32-bit boundary */ nbytes += 3; nbytes &= ~3; q = p + (nbytes >> 2); if (unlikely(q > xdr->end || q < p)) - return NULL; + return xdr_get_next_encode_buffer(xdr, nbytes); xdr->p = q; - xdr->iov->iov_len += nbytes; + if (xdr->iov) + xdr->iov->iov_len += nbytes; + else + xdr->buf->page_len += nbytes; xdr->buf->len += nbytes; return p; } @@ -539,6 +612,7 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len) WARN_ON_ONCE(1); return; } + xdr_commit_encode(xdr); fraglen = min_t(int, buf->len - len, tail->iov_len); tail->iov_len -= fraglen; -- cgit v1.2.3-71-gd317 From db3f58a95beea6752d90fed03f9f198d282a3913 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 6 Mar 2014 13:22:18 -0500 Subject: rpc: define xdr_restrict_buflen With this xdr_reserve_space can help us enforce various limits. Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/xdr.h | 1 + net/sunrpc/xdr.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index b23d69ffd5ec..70c6b92e15a7 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -217,6 +217,7 @@ extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); extern void xdr_commit_encode(struct xdr_stream *xdr); extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); +extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 2b546e8ce43d..39928444c7fb 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -648,6 +648,35 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len) } EXPORT_SYMBOL(xdr_truncate_encode); +/** + * xdr_restrict_buflen - decrease available buffer space + * @xdr: pointer to xdr_stream + * @newbuflen: new maximum number of bytes available + * + * Adjust our idea of how much space is available in the buffer. + * If we've already used too much space in the buffer, returns -1. + * If the available space is already smaller than newbuflen, returns 0 + * and does nothing. Otherwise, adjusts xdr->buf->buflen to newbuflen + * and ensures xdr->end is set at most offset newbuflen from the start + * of the buffer. + */ +int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen) +{ + struct xdr_buf *buf = xdr->buf; + int left_in_this_buf = (void *)xdr->end - (void *)xdr->p; + int end_offset = buf->len + left_in_this_buf; + + if (newbuflen < 0 || newbuflen < buf->len) + return -1; + if (newbuflen > buf->buflen) + return 0; + if (newbuflen < end_offset) + xdr->end = (void *)xdr->end + newbuflen - end_offset; + buf->buflen = newbuflen; + return 0; +} +EXPORT_SYMBOL(xdr_restrict_buflen); + /** * xdr_write_pages - Insert a list of pages into an XDR buffer for sending * @xdr: pointer to xdr_stream -- cgit v1.2.3-71-gd317 From a5cddc885b99458df963a75abbe0b40cbef56c48 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 12 May 2014 18:10:58 -0400 Subject: nfsd4: better reservation of head space for krb5 RPC_MAX_AUTH_SIZE is scattered around several places. Better to set it once in the auth code, where this kind of estimate should be made. And while we're at it we can leave it zero when we're not using krb5i or krb5p. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 4 ++-- fs/nfsd/nfs4state.c | 2 +- fs/nfsd/nfs4xdr.c | 5 +++-- include/linux/sunrpc/svc.h | 11 +++++------ net/sunrpc/auth_gss/svcauth_gss.c | 2 ++ net/sunrpc/svcauth.c | 2 ++ 6 files changed, 15 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 2d786b813c7e..16e71d033ea5 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1261,13 +1261,13 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp, xdr->buf = buf; xdr->iov = head; xdr->p = head->iov_base + head->iov_len; - xdr->end = head->iov_base + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE; + xdr->end = head->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; /* Tail and page_len should be zero at this point: */ buf->len = buf->head[0].iov_len; xdr->scratch.iov_len = 0; xdr->page_ptr = buf->pages; buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages) - - 2 * RPC_MAX_AUTH_SIZE; + - rqstp->rq_auth_slack; } /* diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 62b882dc48ec..d0a016a502be 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2288,7 +2288,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, session->se_fchannel.maxresp_sz; status = (seq->cachethis) ? nfserr_rep_too_big_to_cache : nfserr_rep_too_big; - if (xdr_restrict_buflen(xdr, buflen - 2 * RPC_MAX_AUTH_SIZE)) + if (xdr_restrict_buflen(xdr, buflen - rqstp->rq_auth_slack)) goto out_put_session; svc_reserve(rqstp, buflen); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 3e347a1caec4..470fe8998c9b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1611,7 +1611,8 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) DECODE_HEAD; struct nfsd4_op *op; bool cachethis = false; - int max_reply = 2 * RPC_MAX_AUTH_SIZE + 8; /* opcnt, status */ + int auth_slack= argp->rqstp->rq_auth_slack; + int max_reply = auth_slack + 8; /* opcnt, status */ int readcount = 0; int readbytes = 0; int i; @@ -1677,7 +1678,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) svc_reserve(argp->rqstp, max_reply + readbytes); argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; - if (readcount > 1 || max_reply > PAGE_SIZE - 2*RPC_MAX_AUTH_SIZE) + if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack) argp->rqstp->rq_splice_ok = false; DECODE_TAIL; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 85cb6472a423..1bc7cd05b22e 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -260,7 +260,10 @@ struct svc_rqst { void * rq_argp; /* decoded arguments */ void * rq_resp; /* xdr'd results */ void * rq_auth_data; /* flavor-specific data */ - + int rq_auth_slack; /* extra space xdr code + * should leave in head + * for krb5i, krb5p. + */ int rq_reserved; /* space on socket outq * reserved for this request */ @@ -456,11 +459,7 @@ char * svc_print_addr(struct svc_rqst *, char *, size_t); */ static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space) { - int added_space = 0; - - if (rqstp->rq_authop->flavour) - added_space = RPC_MAX_AUTH_SIZE; - svc_reserve(rqstp, space + added_space); + svc_reserve(rqstp, space + rqstp->rq_auth_slack); } #endif /* SUNRPC_SVC_H */ diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 0f73f4507746..4ce5eccec1f6 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1503,6 +1503,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) if (unwrap_integ_data(rqstp, &rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) goto garbage_args; + rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE; break; case RPC_GSS_SVC_PRIVACY: /* placeholders for length and seq. number: */ @@ -1511,6 +1512,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) if (unwrap_priv_data(rqstp, &rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) goto garbage_args; + rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE * 2; break; default: goto auth_err; diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 2af7b0cba43a..79c0f3459b5c 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -54,6 +54,8 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp) } spin_unlock(&authtab_lock); + rqstp->rq_auth_slack = 0; + rqstp->rq_authop = aops; return aops->accept(rqstp, authp); } -- cgit v1.2.3-71-gd317 From 073ea2ae85629d4074596e2616588b13d4c665f7 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 7 May 2014 20:44:51 +0900 Subject: drm/exynos: dp: Use DPCD defines of drm_dp_helper.h Use DPCD defines of drm_dp_helper.h; thus, duplicated DPCD defines of exynos_dp_core.h can be removed. Also, DP_TEST_EDID_CHECKSUM define is added to drm_dp_helper.h. There is no functional change. Signed-off-by: Jingoo Han Reviewed-by: Sean Paul Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_dp_core.c | 98 ++++++++++++++++----------------- drivers/gpu/drm/exynos/exynos_dp_core.h | 59 ++------------------ include/drm/drm_dp_helper.h | 2 + 3 files changed, 55 insertions(+), 104 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/exynos/exynos_dp_core.c b/drivers/gpu/drm/exynos/exynos_dp_core.c index 2b30c55ab050..ff63901e14c7 100644 --- a/drivers/gpu/drm/exynos/exynos_dp_core.c +++ b/drivers/gpu/drm/exynos/exynos_dp_core.c @@ -144,15 +144,15 @@ static int exynos_dp_read_edid(struct exynos_dp_device *dp) return -EIO; } - exynos_dp_read_byte_from_dpcd(dp, DPCD_ADDR_TEST_REQUEST, + exynos_dp_read_byte_from_dpcd(dp, DP_TEST_REQUEST, &test_vector); - if (test_vector & DPCD_TEST_EDID_READ) { + if (test_vector & DP_TEST_LINK_EDID_READ) { exynos_dp_write_byte_to_dpcd(dp, - DPCD_ADDR_TEST_EDID_CHECKSUM, + DP_TEST_EDID_CHECKSUM, edid[EDID_BLOCK_LENGTH + EDID_CHECKSUM]); exynos_dp_write_byte_to_dpcd(dp, - DPCD_ADDR_TEST_RESPONSE, - DPCD_TEST_EDID_CHECKSUM_WRITE); + DP_TEST_RESPONSE, + DP_TEST_EDID_CHECKSUM_WRITE); } } else { dev_info(dp->dev, "EDID data does not include any extensions.\n"); @@ -174,15 +174,15 @@ static int exynos_dp_read_edid(struct exynos_dp_device *dp) } exynos_dp_read_byte_from_dpcd(dp, - DPCD_ADDR_TEST_REQUEST, + DP_TEST_REQUEST, &test_vector); - if (test_vector & DPCD_TEST_EDID_READ) { + if (test_vector & DP_TEST_LINK_EDID_READ) { exynos_dp_write_byte_to_dpcd(dp, - DPCD_ADDR_TEST_EDID_CHECKSUM, + DP_TEST_EDID_CHECKSUM, edid[EDID_CHECKSUM]); exynos_dp_write_byte_to_dpcd(dp, - DPCD_ADDR_TEST_RESPONSE, - DPCD_TEST_EDID_CHECKSUM_WRITE); + DP_TEST_RESPONSE, + DP_TEST_EDID_CHECKSUM_WRITE); } } @@ -196,8 +196,8 @@ static int exynos_dp_handle_edid(struct exynos_dp_device *dp) int i; int retval; - /* Read DPCD DPCD_ADDR_DPCD_REV~RECEIVE_PORT1_CAP_1 */ - retval = exynos_dp_read_bytes_from_dpcd(dp, DPCD_ADDR_DPCD_REV, + /* Read DPCD DP_DPCD_REV~RECEIVE_PORT1_CAP_1 */ + retval = exynos_dp_read_bytes_from_dpcd(dp, DP_DPCD_REV, 12, buf); if (retval) return retval; @@ -217,14 +217,14 @@ static void exynos_dp_enable_rx_to_enhanced_mode(struct exynos_dp_device *dp, { u8 data; - exynos_dp_read_byte_from_dpcd(dp, DPCD_ADDR_LANE_COUNT_SET, &data); + exynos_dp_read_byte_from_dpcd(dp, DP_LANE_COUNT_SET, &data); if (enable) - exynos_dp_write_byte_to_dpcd(dp, DPCD_ADDR_LANE_COUNT_SET, - DPCD_ENHANCED_FRAME_EN | + exynos_dp_write_byte_to_dpcd(dp, DP_LANE_COUNT_SET, + DP_LANE_COUNT_ENHANCED_FRAME_EN | DPCD_LANE_COUNT_SET(data)); else - exynos_dp_write_byte_to_dpcd(dp, DPCD_ADDR_LANE_COUNT_SET, + exynos_dp_write_byte_to_dpcd(dp, DP_LANE_COUNT_SET, DPCD_LANE_COUNT_SET(data)); } @@ -233,7 +233,7 @@ static int exynos_dp_is_enhanced_mode_available(struct exynos_dp_device *dp) u8 data; int retval; - exynos_dp_read_byte_from_dpcd(dp, DPCD_ADDR_MAX_LANE_COUNT, &data); + exynos_dp_read_byte_from_dpcd(dp, DP_MAX_LANE_COUNT, &data); retval = DPCD_ENHANCED_FRAME_CAP(data); return retval; @@ -253,8 +253,8 @@ static void exynos_dp_training_pattern_dis(struct exynos_dp_device *dp) exynos_dp_set_training_pattern(dp, DP_NONE); exynos_dp_write_byte_to_dpcd(dp, - DPCD_ADDR_TRAINING_PATTERN_SET, - DPCD_TRAINING_PATTERN_DISABLED); + DP_TRAINING_PATTERN_SET, + DP_TRAINING_PATTERN_DISABLE); } static void exynos_dp_set_lane_lane_pre_emphasis(struct exynos_dp_device *dp, @@ -298,7 +298,7 @@ static int exynos_dp_link_start(struct exynos_dp_device *dp) /* Setup RX configuration */ buf[0] = dp->link_train.link_rate; buf[1] = dp->link_train.lane_count; - retval = exynos_dp_write_bytes_to_dpcd(dp, DPCD_ADDR_LINK_BW_SET, + retval = exynos_dp_write_bytes_to_dpcd(dp, DP_LINK_BW_SET, 2, buf); if (retval) return retval; @@ -325,16 +325,16 @@ static int exynos_dp_link_start(struct exynos_dp_device *dp) /* Set RX training pattern */ retval = exynos_dp_write_byte_to_dpcd(dp, - DPCD_ADDR_TRAINING_PATTERN_SET, - DPCD_SCRAMBLING_DISABLED | DPCD_TRAINING_PATTERN_1); + DP_TRAINING_PATTERN_SET, + DP_LINK_SCRAMBLING_DISABLE | DP_TRAINING_PATTERN_1); if (retval) return retval; for (lane = 0; lane < lane_count; lane++) - buf[lane] = DPCD_PRE_EMPHASIS_PATTERN2_LEVEL0 | - DPCD_VOLTAGE_SWING_PATTERN1_LEVEL0; + buf[lane] = DP_TRAIN_PRE_EMPHASIS_0 | + DP_TRAIN_VOLTAGE_SWING_400; - retval = exynos_dp_write_bytes_to_dpcd(dp, DPCD_ADDR_TRAINING_LANE0_SET, + retval = exynos_dp_write_bytes_to_dpcd(dp, DP_TRAINING_LANE0_SET, lane_count, buf); return retval; @@ -355,7 +355,7 @@ static int exynos_dp_clock_recovery_ok(u8 link_status[2], int lane_count) for (lane = 0; lane < lane_count; lane++) { lane_status = exynos_dp_get_lane_status(link_status, lane); - if ((lane_status & DPCD_LANE_CR_DONE) == 0) + if ((lane_status & DP_LANE_CR_DONE) == 0) return -EINVAL; } return 0; @@ -367,13 +367,13 @@ static int exynos_dp_channel_eq_ok(u8 link_status[2], u8 link_align, int lane; u8 lane_status; - if ((link_align & DPCD_INTERLANE_ALIGN_DONE) == 0) + if ((link_align & DP_INTERLANE_ALIGN_DONE) == 0) return -EINVAL; for (lane = 0; lane < lane_count; lane++) { lane_status = exynos_dp_get_lane_status(link_status, lane); - lane_status &= DPCD_CHANNEL_EQ_BITS; - if (lane_status != DPCD_CHANNEL_EQ_BITS) + lane_status &= DP_CHANNEL_EQ_BITS; + if (lane_status != DP_CHANNEL_EQ_BITS) return -EINVAL; } @@ -471,9 +471,9 @@ static void exynos_dp_get_adjust_training_lane(struct exynos_dp_device *dp, DPCD_PRE_EMPHASIS_SET(pre_emphasis); if (voltage_swing == VOLTAGE_LEVEL_3) - training_lane |= DPCD_MAX_SWING_REACHED; + training_lane |= DP_TRAIN_MAX_SWING_REACHED; if (pre_emphasis == PRE_EMPHASIS_LEVEL_3) - training_lane |= DPCD_MAX_PRE_EMPHASIS_REACHED; + training_lane |= DP_TRAIN_MAX_PRE_EMPHASIS_REACHED; dp->link_train.training_lane[lane] = training_lane; } @@ -490,12 +490,12 @@ static int exynos_dp_process_clock_recovery(struct exynos_dp_device *dp) lane_count = dp->link_train.lane_count; retval = exynos_dp_read_bytes_from_dpcd(dp, - DPCD_ADDR_LANE0_1_STATUS, 2, link_status); + DP_LANE0_1_STATUS, 2, link_status); if (retval) return retval; retval = exynos_dp_read_bytes_from_dpcd(dp, - DPCD_ADDR_ADJUST_REQUEST_LANE0_1, 2, adjust_request); + DP_ADJUST_REQUEST_LANE0_1, 2, adjust_request); if (retval) return retval; @@ -504,9 +504,9 @@ static int exynos_dp_process_clock_recovery(struct exynos_dp_device *dp) exynos_dp_set_training_pattern(dp, TRAINING_PTN2); retval = exynos_dp_write_byte_to_dpcd(dp, - DPCD_ADDR_TRAINING_PATTERN_SET, - DPCD_SCRAMBLING_DISABLED | - DPCD_TRAINING_PATTERN_2); + DP_TRAINING_PATTERN_SET, + DP_LINK_SCRAMBLING_DISABLE | + DP_TRAINING_PATTERN_2); if (retval) return retval; @@ -546,7 +546,7 @@ static int exynos_dp_process_clock_recovery(struct exynos_dp_device *dp) dp->link_train.training_lane[lane], lane); retval = exynos_dp_write_bytes_to_dpcd(dp, - DPCD_ADDR_TRAINING_LANE0_SET, lane_count, + DP_TRAINING_LANE0_SET, lane_count, dp->link_train.training_lane); if (retval) return retval; @@ -565,7 +565,7 @@ static int exynos_dp_process_equalizer_training(struct exynos_dp_device *dp) lane_count = dp->link_train.lane_count; retval = exynos_dp_read_bytes_from_dpcd(dp, - DPCD_ADDR_LANE0_1_STATUS, 2, link_status); + DP_LANE0_1_STATUS, 2, link_status); if (retval) return retval; @@ -575,12 +575,12 @@ static int exynos_dp_process_equalizer_training(struct exynos_dp_device *dp) } retval = exynos_dp_read_bytes_from_dpcd(dp, - DPCD_ADDR_ADJUST_REQUEST_LANE0_1, 2, adjust_request); + DP_ADJUST_REQUEST_LANE0_1, 2, adjust_request); if (retval) return retval; retval = exynos_dp_read_byte_from_dpcd(dp, - DPCD_ADDR_LANE_ALIGN_STATUS_UPDATED, &link_align); + DP_LANE_ALIGN_STATUS_UPDATED, &link_align); if (retval) return retval; @@ -622,7 +622,7 @@ static int exynos_dp_process_equalizer_training(struct exynos_dp_device *dp) exynos_dp_set_lane_link_training(dp, dp->link_train.training_lane[lane], lane); - retval = exynos_dp_write_bytes_to_dpcd(dp, DPCD_ADDR_TRAINING_LANE0_SET, + retval = exynos_dp_write_bytes_to_dpcd(dp, DP_TRAINING_LANE0_SET, lane_count, dp->link_train.training_lane); return retval; @@ -637,7 +637,7 @@ static void exynos_dp_get_max_rx_bandwidth(struct exynos_dp_device *dp, * For DP rev.1.1, Maximum link rate of Main Link lanes * 0x06 = 1.62 Gbps, 0x0a = 2.7 Gbps */ - exynos_dp_read_byte_from_dpcd(dp, DPCD_ADDR_MAX_LINK_RATE, &data); + exynos_dp_read_byte_from_dpcd(dp, DP_MAX_LINK_RATE, &data); *bandwidth = data; } @@ -650,7 +650,7 @@ static void exynos_dp_get_max_rx_lane_count(struct exynos_dp_device *dp, * For DP rev.1.1, Maximum number of Main Link lanes * 0x01 = 1 lane, 0x02 = 2 lanes, 0x04 = 4 lanes */ - exynos_dp_read_byte_from_dpcd(dp, DPCD_ADDR_MAX_LANE_COUNT, &data); + exynos_dp_read_byte_from_dpcd(dp, DP_MAX_LANE_COUNT, &data); *lane_count = DPCD_MAX_LANE_COUNT(data); } @@ -822,20 +822,20 @@ static void exynos_dp_enable_scramble(struct exynos_dp_device *dp, bool enable) exynos_dp_enable_scrambling(dp); exynos_dp_read_byte_from_dpcd(dp, - DPCD_ADDR_TRAINING_PATTERN_SET, + DP_TRAINING_PATTERN_SET, &data); exynos_dp_write_byte_to_dpcd(dp, - DPCD_ADDR_TRAINING_PATTERN_SET, - (u8)(data & ~DPCD_SCRAMBLING_DISABLED)); + DP_TRAINING_PATTERN_SET, + (u8)(data & ~DP_LINK_SCRAMBLING_DISABLE)); } else { exynos_dp_disable_scrambling(dp); exynos_dp_read_byte_from_dpcd(dp, - DPCD_ADDR_TRAINING_PATTERN_SET, + DP_TRAINING_PATTERN_SET, &data); exynos_dp_write_byte_to_dpcd(dp, - DPCD_ADDR_TRAINING_PATTERN_SET, - (u8)(data | DPCD_SCRAMBLING_DISABLED)); + DP_TRAINING_PATTERN_SET, + (u8)(data | DP_LINK_SCRAMBLING_DISABLE)); } } diff --git a/drivers/gpu/drm/exynos/exynos_dp_core.h b/drivers/gpu/drm/exynos/exynos_dp_core.h index 56fa43eb7133..02cc4f9ab903 100644 --- a/drivers/gpu/drm/exynos/exynos_dp_core.h +++ b/drivers/gpu/drm/exynos/exynos_dp_core.h @@ -14,6 +14,7 @@ #define _EXYNOS_DP_CORE_H #include +#include #include #define DP_TIMEOUT_LOOP_COUNT 100 @@ -262,69 +263,17 @@ void exynos_dp_disable_scrambling(struct exynos_dp_device *dp); #define EDID_EXTENSION_FLAG 0x7e #define EDID_CHECKSUM 0x7f -/* Definition for DPCD Register */ -#define DPCD_ADDR_DPCD_REV 0x0000 -#define DPCD_ADDR_MAX_LINK_RATE 0x0001 -#define DPCD_ADDR_MAX_LANE_COUNT 0x0002 -#define DPCD_ADDR_LINK_BW_SET 0x0100 -#define DPCD_ADDR_LANE_COUNT_SET 0x0101 -#define DPCD_ADDR_TRAINING_PATTERN_SET 0x0102 -#define DPCD_ADDR_TRAINING_LANE0_SET 0x0103 -#define DPCD_ADDR_LANE0_1_STATUS 0x0202 -#define DPCD_ADDR_LANE_ALIGN_STATUS_UPDATED 0x0204 -#define DPCD_ADDR_ADJUST_REQUEST_LANE0_1 0x0206 -#define DPCD_ADDR_ADJUST_REQUEST_LANE2_3 0x0207 -#define DPCD_ADDR_TEST_REQUEST 0x0218 -#define DPCD_ADDR_TEST_RESPONSE 0x0260 -#define DPCD_ADDR_TEST_EDID_CHECKSUM 0x0261 -#define DPCD_ADDR_SINK_POWER_STATE 0x0600 - -/* DPCD_ADDR_MAX_LANE_COUNT */ +/* DP_MAX_LANE_COUNT */ #define DPCD_ENHANCED_FRAME_CAP(x) (((x) >> 7) & 0x1) #define DPCD_MAX_LANE_COUNT(x) ((x) & 0x1f) -/* DPCD_ADDR_LANE_COUNT_SET */ -#define DPCD_ENHANCED_FRAME_EN (0x1 << 7) +/* DP_LANE_COUNT_SET */ #define DPCD_LANE_COUNT_SET(x) ((x) & 0x1f) -/* DPCD_ADDR_TRAINING_PATTERN_SET */ -#define DPCD_SCRAMBLING_DISABLED (0x1 << 5) -#define DPCD_SCRAMBLING_ENABLED (0x0 << 5) -#define DPCD_TRAINING_PATTERN_2 (0x2 << 0) -#define DPCD_TRAINING_PATTERN_1 (0x1 << 0) -#define DPCD_TRAINING_PATTERN_DISABLED (0x0 << 0) - -/* DPCD_ADDR_TRAINING_LANE0_SET */ -#define DPCD_MAX_PRE_EMPHASIS_REACHED (0x1 << 5) +/* DP_TRAINING_LANE0_SET */ #define DPCD_PRE_EMPHASIS_SET(x) (((x) & 0x3) << 3) #define DPCD_PRE_EMPHASIS_GET(x) (((x) >> 3) & 0x3) -#define DPCD_PRE_EMPHASIS_PATTERN2_LEVEL0 (0x0 << 3) -#define DPCD_MAX_SWING_REACHED (0x1 << 2) #define DPCD_VOLTAGE_SWING_SET(x) (((x) & 0x3) << 0) #define DPCD_VOLTAGE_SWING_GET(x) (((x) >> 0) & 0x3) -#define DPCD_VOLTAGE_SWING_PATTERN1_LEVEL0 (0x0 << 0) - -/* DPCD_ADDR_LANE0_1_STATUS */ -#define DPCD_LANE_SYMBOL_LOCKED (0x1 << 2) -#define DPCD_LANE_CHANNEL_EQ_DONE (0x1 << 1) -#define DPCD_LANE_CR_DONE (0x1 << 0) -#define DPCD_CHANNEL_EQ_BITS (DPCD_LANE_CR_DONE| \ - DPCD_LANE_CHANNEL_EQ_DONE|\ - DPCD_LANE_SYMBOL_LOCKED) - -/* DPCD_ADDR_LANE_ALIGN__STATUS_UPDATED */ -#define DPCD_LINK_STATUS_UPDATED (0x1 << 7) -#define DPCD_DOWNSTREAM_PORT_STATUS_CHANGED (0x1 << 6) -#define DPCD_INTERLANE_ALIGN_DONE (0x1 << 0) - -/* DPCD_ADDR_TEST_REQUEST */ -#define DPCD_TEST_EDID_READ (0x1 << 2) - -/* DPCD_ADDR_TEST_RESPONSE */ -#define DPCD_TEST_EDID_CHECKSUM_WRITE (0x1 << 2) - -/* DPCD_ADDR_SINK_POWER_STATE */ -#define DPCD_SET_POWER_STATE_D0 (0x1 << 0) -#define DPCD_SET_POWER_STATE_D4 (0x2 << 0) #endif /* _EXYNOS_DP_CORE_H */ diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index efcde2c38cc1..488b41635dec 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -310,6 +310,8 @@ # define DP_TEST_NAK (1 << 1) # define DP_TEST_EDID_CHECKSUM_WRITE (1 << 2) +#define DP_TEST_EDID_CHECKSUM 0x261 + #define DP_TEST_SINK 0x270 #define DP_TEST_SINK_START (1 << 0) -- cgit v1.2.3-71-gd317 From c8865b64b05b2f4eeefd369373e9c8aeb069e7a1 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Sun, 25 May 2014 17:47:26 +0300 Subject: cpumask: Utility function to set n'th cpu - local cpu first This function sets the n'th cpu - local cpu's first. For example: in a 16 cores server with even cpu's local, will get the following values: cpumask_set_cpu_local_first(0, numa, cpumask) => cpu 0 is set cpumask_set_cpu_local_first(1, numa, cpumask) => cpu 2 is set ... cpumask_set_cpu_local_first(7, numa, cpumask) => cpu 14 is set cpumask_set_cpu_local_first(8, numa, cpumask) => cpu 1 is set cpumask_set_cpu_local_first(9, numa, cpumask) => cpu 3 is set ... cpumask_set_cpu_local_first(15, numa, cpumask) => cpu 15 is set Curently this function will be used by multi queue networking devices to calculate the irq affinity mask, such that as many local cpu's as possible will be utilized to handle the mq device irq's. Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/cpumask.h | 2 ++ lib/cpumask.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index d08e4d2a9b92..3551d667ef9f 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -257,6 +257,8 @@ static inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) set_bit(cpumask_check(cpu), cpumask_bits(dstp)); } +int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp); + /** * cpumask_clear_cpu - clear a cpu in a cpumask * @cpu: cpu number (< nr_cpu_ids) diff --git a/lib/cpumask.c b/lib/cpumask.c index b810b753c607..14049a96f04a 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -163,4 +163,68 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask) { memblock_free_early(__pa(mask), cpumask_size()); } + +/** + * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first + * + * @i: index number + * @numa_node: local numa_node + * @dstp: cpumask with the relevant cpu bit set according to the policy + * + * This function sets the cpumask according to a numa aware policy. + * cpumask could be used as an affinity hint for the IRQ related to a + * queue. When the policy is to spread queues across cores - local cores + * first. + * + * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set + * the cpu bit and need to re-call the function. + */ +int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) +{ + cpumask_var_t mask; + int cpu; + int ret = 0; + + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + i %= num_online_cpus(); + + if (!cpumask_of_node(numa_node)) { + /* Use all online cpu's for non numa aware system */ + cpumask_copy(mask, cpu_online_mask); + } else { + int n; + + cpumask_and(mask, + cpumask_of_node(numa_node), cpu_online_mask); + + n = cpumask_weight(mask); + if (i >= n) { + i -= n; + + /* If index > number of local cpu's, mask out local + * cpu's + */ + cpumask_andnot(mask, cpu_online_mask, mask); + } + } + + for_each_cpu(cpu, mask) { + if (--i < 0) + goto out; + } + + ret = -EAGAIN; + +out: + free_cpumask_var(mask); + + if (!ret) + cpumask_set_cpu(cpu, dstp); + + return ret; +} +EXPORT_SYMBOL(cpumask_set_cpu_local_first); + #endif -- cgit v1.2.3-71-gd317 From 70a640d0dae3a9b1b222ce673eb5d92c263ddd61 Mon Sep 17 00:00:00 2001 From: Yuval Atias Date: Sun, 25 May 2014 17:47:27 +0300 Subject: net/mlx4_en: Use affinity hint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The “affinity hint” mechanism is used by the user space daemon, irqbalancer, to indicate a preferred CPU mask for irqs. Irqbalancer can use this hint to balance the irqs between the cpus indicated by the mask. We wish the HCA to preferentially map the IRQs it uses to numa cores close to it. To accomplish this, we use cpumask_set_cpu_local_first(), that sets the affinity hint according the following policy: First it maps IRQs to “close” numa cores. If these are exhausted, the remaining IRQs are mapped to “far” numa cores. Signed-off-by: Yuval Atias Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 6 +++++- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 30 ++++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/eq.c | 13 ++++++++++- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 + include/linux/mlx4/device.h | 2 +- 6 files changed, 50 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 199c7896f081..58b1f239ac2b 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1897,7 +1897,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) i, j, dev->pdev->bus->name); /* Set IRQ for specific name (per ring) */ if (mlx4_assign_eq(dev, name, NULL, - &ibdev->eq_table[eq])) { + &ibdev->eq_table[eq], NULL)) { /* Use legacy (same as mlx4_en driver) */ pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq); ibdev->eq_table[eq] = diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 636963db598a..ea2cd72e5368 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -118,11 +118,15 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, if (cq->is_tx == RX) { if (mdev->dev->caps.comp_pool) { if (!cq->vector) { + struct mlx4_en_rx_ring *ring = + priv->rx_ring[cq->ring]; + sprintf(name, "%s-%d", priv->dev->name, cq->ring); /* Set IRQ for specific name (per ring) */ if (mlx4_assign_eq(mdev->dev, name, rmap, - &cq->vector)) { + &cq->vector, + ring->affinity_mask)) { cq->vector = (cq->ring + 1 + priv->port) % mdev->dev->caps.num_comp_vectors; mlx4_warn(mdev, "Failed assigning an EQ to %s, falling back to legacy EQ's\n", diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 58209bd0c94c..05d135572abc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1526,6 +1526,32 @@ static void mlx4_en_linkstate(struct work_struct *work) mutex_unlock(&mdev->state_lock); } +static void mlx4_en_init_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) +{ + struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx]; + int numa_node = priv->mdev->dev->numa_node; + + if (numa_node == -1) + return; + + if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL)) { + en_err(priv, "Failed to allocate core mask\n"); + return; + } + + if (cpumask_set_cpu_local_first(ring_idx, numa_node, + ring->affinity_mask)) { + en_err(priv, "Failed setting affinity hint\n"); + free_cpumask_var(ring->affinity_mask); + ring->affinity_mask = NULL; + } +} + +static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) +{ + free_cpumask_var(priv->rx_ring[ring_idx]->affinity_mask); + priv->rx_ring[ring_idx]->affinity_mask = NULL; +} int mlx4_en_start_port(struct net_device *dev) { @@ -1567,6 +1593,8 @@ int mlx4_en_start_port(struct net_device *dev) mlx4_en_cq_init_lock(cq); + mlx4_en_init_affinity_hint(priv, i); + err = mlx4_en_activate_cq(priv, cq, i); if (err) { en_err(priv, "Failed activating Rx CQ\n"); @@ -1847,6 +1875,8 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) msleep(1); mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); mlx4_en_deactivate_cq(priv, cq); + + mlx4_en_free_affinity_hint(priv, i); } } diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index d954ec1eac17..f91659e5fa13 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -1376,7 +1376,7 @@ int mlx4_test_interrupts(struct mlx4_dev *dev) EXPORT_SYMBOL(mlx4_test_interrupts); int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, - int *vector) + int *vector, cpumask_var_t cpu_hint_mask) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1411,6 +1411,15 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, } mlx4_assign_irq_notifier(priv, dev, priv->eq_table.eq[vec].irq); + if (cpu_hint_mask) { + err = irq_set_affinity_hint( + priv->eq_table.eq[vec].irq, + cpu_hint_mask); + if (err) { + mlx4_warn(dev, "Failed setting affinity hint\n"); + /*we dont want to break here*/ + } + } eq_set_ci(&priv->eq_table.eq[vec], 1); } @@ -1441,6 +1450,8 @@ void mlx4_release_eq(struct mlx4_dev *dev, int vec) irq_set_affinity_notifier( priv->eq_table.eq[vec].irq, NULL); + irq_set_affinity_hint(priv->eq_table.eq[vec].irq, + NULL); free_irq(priv->eq_table.eq[vec].irq, &priv->eq_table.eq[vec]); priv->msix_ctl.pool_bm &= ~(1ULL << i); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index b5db1bf361dc..0e15295bedd6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -313,6 +313,7 @@ struct mlx4_en_rx_ring { unsigned long csum_ok; unsigned long csum_none; int hwtstamp_rx_filter; + cpumask_var_t affinity_mask; }; struct mlx4_en_cq { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index ca38871a585c..b9b70e00e3c1 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1161,7 +1161,7 @@ int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr); int mlx4_SYNC_TPT(struct mlx4_dev *dev); int mlx4_test_interrupts(struct mlx4_dev *dev); int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, - int *vector); + int *vector, cpumask_t *cpu_hint_mask); void mlx4_release_eq(struct mlx4_dev *dev, int vec); int mlx4_get_phys_port_id(struct mlx4_dev *dev); -- cgit v1.2.3-71-gd317 From ee39facbf82e73e468c504d2b40e83e2d223c28c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 1 Jun 2014 21:58:02 -0700 Subject: net: Revert mlx4 cpumask changes. This reverts commit 70a640d0dae3a9b1b222ce673eb5d92c263ddd61 ("net/mlx4_en: Use affinity hint") and commit c8865b64b05b2f4eeefd369373e9c8aeb069e7a1 ("cpumask: Utility function to set n'th cpu - local cpu first") because these changes break the build when SMP is disabled amongst other things. Reported-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851.c | 50 +++++++++++++++------------- include/linux/cpumask.h | 2 -- lib/cpumask.c | 64 ------------------------------------ 3 files changed, 28 insertions(+), 88 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c index 66d4ab703f45..e72918970a58 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851.c @@ -1441,30 +1441,32 @@ static int ks8851_probe(struct spi_device *spi) } } - ks->vdd_io = devm_regulator_get(&spi->dev, "vdd-io"); + ks->vdd_io = devm_regulator_get_optional(&spi->dev, "vdd-io"); if (IS_ERR(ks->vdd_io)) { ret = PTR_ERR(ks->vdd_io); - goto err_reg_io; - } - - ret = regulator_enable(ks->vdd_io); - if (ret) { - dev_err(&spi->dev, "regulator vdd_io enable fail: %d\n", - ret); - goto err_reg_io; + if (ret == -EPROBE_DEFER) + goto err_reg_io; + } else { + ret = regulator_enable(ks->vdd_io); + if (ret) { + dev_err(&spi->dev, "regulator vdd_io enable fail: %d\n", + ret); + goto err_reg_io; + } } - ks->vdd_reg = devm_regulator_get(&spi->dev, "vdd"); + ks->vdd_reg = devm_regulator_get_optional(&spi->dev, "vdd"); if (IS_ERR(ks->vdd_reg)) { ret = PTR_ERR(ks->vdd_reg); - goto err_reg; - } - - ret = regulator_enable(ks->vdd_reg); - if (ret) { - dev_err(&spi->dev, "regulator vdd enable fail: %d\n", - ret); - goto err_reg; + if (ret == -EPROBE_DEFER) + goto err_reg; + } else { + ret = regulator_enable(ks->vdd_reg); + if (ret) { + dev_err(&spi->dev, "regulator vdd enable fail: %d\n", + ret); + goto err_reg; + } } if (gpio_is_valid(gpio)) { @@ -1570,9 +1572,11 @@ err_irq: if (gpio_is_valid(gpio)) gpio_set_value(gpio, 0); err_id: - regulator_disable(ks->vdd_reg); + if (!IS_ERR(ks->vdd_reg)) + regulator_disable(ks->vdd_reg); err_reg: - regulator_disable(ks->vdd_io); + if (!IS_ERR(ks->vdd_io)) + regulator_disable(ks->vdd_io); err_reg_io: err_gpio: free_netdev(ndev); @@ -1590,8 +1594,10 @@ static int ks8851_remove(struct spi_device *spi) free_irq(spi->irq, priv); if (gpio_is_valid(priv->gpio)) gpio_set_value(priv->gpio, 0); - regulator_disable(priv->vdd_reg); - regulator_disable(priv->vdd_io); + if (!IS_ERR(priv->vdd_reg)) + regulator_disable(priv->vdd_reg); + if (!IS_ERR(priv->vdd_io)) + regulator_disable(priv->vdd_io); free_netdev(priv->netdev); return 0; diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 3551d667ef9f..d08e4d2a9b92 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -257,8 +257,6 @@ static inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) set_bit(cpumask_check(cpu), cpumask_bits(dstp)); } -int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp); - /** * cpumask_clear_cpu - clear a cpu in a cpumask * @cpu: cpu number (< nr_cpu_ids) diff --git a/lib/cpumask.c b/lib/cpumask.c index 14049a96f04a..b810b753c607 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -163,68 +163,4 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask) { memblock_free_early(__pa(mask), cpumask_size()); } - -/** - * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first - * - * @i: index number - * @numa_node: local numa_node - * @dstp: cpumask with the relevant cpu bit set according to the policy - * - * This function sets the cpumask according to a numa aware policy. - * cpumask could be used as an affinity hint for the IRQ related to a - * queue. When the policy is to spread queues across cores - local cores - * first. - * - * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set - * the cpu bit and need to re-call the function. - */ -int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) -{ - cpumask_var_t mask; - int cpu; - int ret = 0; - - if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) - return -ENOMEM; - - i %= num_online_cpus(); - - if (!cpumask_of_node(numa_node)) { - /* Use all online cpu's for non numa aware system */ - cpumask_copy(mask, cpu_online_mask); - } else { - int n; - - cpumask_and(mask, - cpumask_of_node(numa_node), cpu_online_mask); - - n = cpumask_weight(mask); - if (i >= n) { - i -= n; - - /* If index > number of local cpu's, mask out local - * cpu's - */ - cpumask_andnot(mask, cpu_online_mask, mask); - } - } - - for_each_cpu(cpu, mask) { - if (--i < 0) - goto out; - } - - ret = -EAGAIN; - -out: - free_cpumask_var(mask); - - if (!ret) - cpumask_set_cpu(cpu, dstp); - - return ret; -} -EXPORT_SYMBOL(cpumask_set_cpu_local_first); - #endif -- cgit v1.2.3-71-gd317 From 3480593131e0b781287dae0139bf7ccee7cba7ff Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 29 May 2014 10:22:50 +0200 Subject: net: filter: get rid of BPF_S_* enum This patch finally allows us to get rid of the BPF_S_* enum. Currently, the code performs unnecessary encode and decode workarounds in seccomp and filter migration itself when a filter is being attached in order to overcome BPF_S_* encoding which is not used anymore by the new interpreter resp. JIT compilers. Keeping it around would mean that also in future we would need to extend and maintain this enum and related encoders/decoders. We can get rid of all that and save us these operations during filter attaching. Naturally, also JIT compilers need to be updated by this. Before JIT conversion is being done, each compiler checks if A is being loaded at startup to obtain information if it needs to emit instructions to clear A first. Since BPF extensions are a subset of BPF_LD | BPF_{W,H,B} | BPF_ABS variants, case statements for extensions can be removed at that point. To ease and minimalize code changes in the classic JITs, we have introduced bpf_anc_helper(). Tested with test_bpf on x86_64 (JIT, int), s390x (JIT, int), arm (JIT, int), i368 (int), ppc64 (JIT, int); for sparc we unfortunately didn't have access, but changes are analogous to the rest. Joint work with Alexei Starovoitov. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Cc: Benjamin Herrenschmidt Cc: Martin Schwidefsky Cc: Mircea Gherzan Cc: Kees Cook Acked-by: Chema Gonzalez Signed-off-by: David S. Miller --- arch/arm/net/bpf_jit_32.c | 139 ++++++++-------- arch/powerpc/net/bpf_jit_64.S | 2 +- arch/powerpc/net/bpf_jit_comp.c | 157 +++++++++--------- arch/s390/net/bpf_jit_comp.c | 163 +++++++++---------- arch/sparc/net/bpf_jit_comp.c | 154 +++++++++--------- include/linux/filter.h | 108 +++++-------- kernel/seccomp.c | 83 +++++----- net/core/filter.c | 341 +++++++++++++++------------------------- 8 files changed, 498 insertions(+), 649 deletions(-) (limited to 'include') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 6f879c319a9d..fb5503ce016f 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -136,7 +136,7 @@ static u16 saved_regs(struct jit_ctx *ctx) u16 ret = 0; if ((ctx->skf->len > 1) || - (ctx->skf->insns[0].code == BPF_S_RET_A)) + (ctx->skf->insns[0].code == (BPF_RET | BPF_A))) ret |= 1 << r_A; #ifdef CONFIG_FRAME_POINTER @@ -164,18 +164,10 @@ static inline int mem_words_used(struct jit_ctx *ctx) static inline bool is_load_to_a(u16 inst) { switch (inst) { - case BPF_S_LD_W_LEN: - case BPF_S_LD_W_ABS: - case BPF_S_LD_H_ABS: - case BPF_S_LD_B_ABS: - case BPF_S_ANC_CPU: - case BPF_S_ANC_IFINDEX: - case BPF_S_ANC_MARK: - case BPF_S_ANC_PROTOCOL: - case BPF_S_ANC_RXHASH: - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: - case BPF_S_ANC_QUEUE: + case BPF_LD | BPF_W | BPF_LEN: + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_B | BPF_ABS: return true; default: return false; @@ -215,7 +207,7 @@ static void build_prologue(struct jit_ctx *ctx) emit(ARM_MOV_I(r_X, 0), ctx); /* do not leak kernel data to userspace */ - if ((first_inst != BPF_S_RET_K) && !(is_load_to_a(first_inst))) + if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst))) emit(ARM_MOV_I(r_A, 0), ctx); /* stack space for the BPF_MEM words */ @@ -480,36 +472,39 @@ static int build_body(struct jit_ctx *ctx) u32 k; for (i = 0; i < prog->len; i++) { + u16 code; + inst = &(prog->insns[i]); /* K as an immediate value operand */ k = inst->k; + code = bpf_anc_helper(inst); /* compute offsets only in the fake pass */ if (ctx->target == NULL) ctx->offsets[i] = ctx->idx * 4; - switch (inst->code) { - case BPF_S_LD_IMM: + switch (code) { + case BPF_LD | BPF_IMM: emit_mov_i(r_A, k, ctx); break; - case BPF_S_LD_W_LEN: + case BPF_LD | BPF_W | BPF_LEN: ctx->seen |= SEEN_SKB; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); emit(ARM_LDR_I(r_A, r_skb, offsetof(struct sk_buff, len)), ctx); break; - case BPF_S_LD_MEM: + case BPF_LD | BPF_MEM: /* A = scratch[k] */ ctx->seen |= SEEN_MEM_WORD(k); emit(ARM_LDR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx); break; - case BPF_S_LD_W_ABS: + case BPF_LD | BPF_W | BPF_ABS: load_order = 2; goto load; - case BPF_S_LD_H_ABS: + case BPF_LD | BPF_H | BPF_ABS: load_order = 1; goto load; - case BPF_S_LD_B_ABS: + case BPF_LD | BPF_B | BPF_ABS: load_order = 0; load: /* the interpreter will deal with the negative K */ @@ -552,31 +547,31 @@ load_common: emit_err_ret(ARM_COND_NE, ctx); emit(ARM_MOV_R(r_A, ARM_R0), ctx); break; - case BPF_S_LD_W_IND: + case BPF_LD | BPF_W | BPF_IND: load_order = 2; goto load_ind; - case BPF_S_LD_H_IND: + case BPF_LD | BPF_H | BPF_IND: load_order = 1; goto load_ind; - case BPF_S_LD_B_IND: + case BPF_LD | BPF_B | BPF_IND: load_order = 0; load_ind: OP_IMM3(ARM_ADD, r_off, r_X, k, ctx); goto load_common; - case BPF_S_LDX_IMM: + case BPF_LDX | BPF_IMM: ctx->seen |= SEEN_X; emit_mov_i(r_X, k, ctx); break; - case BPF_S_LDX_W_LEN: + case BPF_LDX | BPF_W | BPF_LEN: ctx->seen |= SEEN_X | SEEN_SKB; emit(ARM_LDR_I(r_X, r_skb, offsetof(struct sk_buff, len)), ctx); break; - case BPF_S_LDX_MEM: + case BPF_LDX | BPF_MEM: ctx->seen |= SEEN_X | SEEN_MEM_WORD(k); emit(ARM_LDR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx); break; - case BPF_S_LDX_B_MSH: + case BPF_LDX | BPF_B | BPF_MSH: /* x = ((*(frame + k)) & 0xf) << 2; */ ctx->seen |= SEEN_X | SEEN_DATA | SEEN_CALL; /* the interpreter should deal with the negative K */ @@ -606,113 +601,113 @@ load_ind: emit(ARM_AND_I(r_X, ARM_R0, 0x00f), ctx); emit(ARM_LSL_I(r_X, r_X, 2), ctx); break; - case BPF_S_ST: + case BPF_ST: ctx->seen |= SEEN_MEM_WORD(k); emit(ARM_STR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx); break; - case BPF_S_STX: + case BPF_STX: update_on_xread(ctx); ctx->seen |= SEEN_MEM_WORD(k); emit(ARM_STR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx); break; - case BPF_S_ALU_ADD_K: + case BPF_ALU | BPF_ADD | BPF_K: /* A += K */ OP_IMM3(ARM_ADD, r_A, r_A, k, ctx); break; - case BPF_S_ALU_ADD_X: + case BPF_ALU | BPF_ADD | BPF_X: update_on_xread(ctx); emit(ARM_ADD_R(r_A, r_A, r_X), ctx); break; - case BPF_S_ALU_SUB_K: + case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */ OP_IMM3(ARM_SUB, r_A, r_A, k, ctx); break; - case BPF_S_ALU_SUB_X: + case BPF_ALU | BPF_SUB | BPF_X: update_on_xread(ctx); emit(ARM_SUB_R(r_A, r_A, r_X), ctx); break; - case BPF_S_ALU_MUL_K: + case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ emit_mov_i(r_scratch, k, ctx); emit(ARM_MUL(r_A, r_A, r_scratch), ctx); break; - case BPF_S_ALU_MUL_X: + case BPF_ALU | BPF_MUL | BPF_X: update_on_xread(ctx); emit(ARM_MUL(r_A, r_A, r_X), ctx); break; - case BPF_S_ALU_DIV_K: + case BPF_ALU | BPF_DIV | BPF_K: if (k == 1) break; emit_mov_i(r_scratch, k, ctx); emit_udiv(r_A, r_A, r_scratch, ctx); break; - case BPF_S_ALU_DIV_X: + case BPF_ALU | BPF_DIV | BPF_X: update_on_xread(ctx); emit(ARM_CMP_I(r_X, 0), ctx); emit_err_ret(ARM_COND_EQ, ctx); emit_udiv(r_A, r_A, r_X, ctx); break; - case BPF_S_ALU_OR_K: + case BPF_ALU | BPF_OR | BPF_K: /* A |= K */ OP_IMM3(ARM_ORR, r_A, r_A, k, ctx); break; - case BPF_S_ALU_OR_X: + case BPF_ALU | BPF_OR | BPF_X: update_on_xread(ctx); emit(ARM_ORR_R(r_A, r_A, r_X), ctx); break; - case BPF_S_ALU_XOR_K: + case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K; */ OP_IMM3(ARM_EOR, r_A, r_A, k, ctx); break; - case BPF_S_ANC_ALU_XOR_X: - case BPF_S_ALU_XOR_X: + case BPF_ANC | SKF_AD_ALU_XOR_X: + case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */ update_on_xread(ctx); emit(ARM_EOR_R(r_A, r_A, r_X), ctx); break; - case BPF_S_ALU_AND_K: + case BPF_ALU | BPF_AND | BPF_K: /* A &= K */ OP_IMM3(ARM_AND, r_A, r_A, k, ctx); break; - case BPF_S_ALU_AND_X: + case BPF_ALU | BPF_AND | BPF_X: update_on_xread(ctx); emit(ARM_AND_R(r_A, r_A, r_X), ctx); break; - case BPF_S_ALU_LSH_K: + case BPF_ALU | BPF_LSH | BPF_K: if (unlikely(k > 31)) return -1; emit(ARM_LSL_I(r_A, r_A, k), ctx); break; - case BPF_S_ALU_LSH_X: + case BPF_ALU | BPF_LSH | BPF_X: update_on_xread(ctx); emit(ARM_LSL_R(r_A, r_A, r_X), ctx); break; - case BPF_S_ALU_RSH_K: + case BPF_ALU | BPF_RSH | BPF_K: if (unlikely(k > 31)) return -1; emit(ARM_LSR_I(r_A, r_A, k), ctx); break; - case BPF_S_ALU_RSH_X: + case BPF_ALU | BPF_RSH | BPF_X: update_on_xread(ctx); emit(ARM_LSR_R(r_A, r_A, r_X), ctx); break; - case BPF_S_ALU_NEG: + case BPF_ALU | BPF_NEG: /* A = -A */ emit(ARM_RSB_I(r_A, r_A, 0), ctx); break; - case BPF_S_JMP_JA: + case BPF_JMP | BPF_JA: /* pc += K */ emit(ARM_B(b_imm(i + k + 1, ctx)), ctx); break; - case BPF_S_JMP_JEQ_K: + case BPF_JMP | BPF_JEQ | BPF_K: /* pc += (A == K) ? pc->jt : pc->jf */ condt = ARM_COND_EQ; goto cmp_imm; - case BPF_S_JMP_JGT_K: + case BPF_JMP | BPF_JGT | BPF_K: /* pc += (A > K) ? pc->jt : pc->jf */ condt = ARM_COND_HI; goto cmp_imm; - case BPF_S_JMP_JGE_K: + case BPF_JMP | BPF_JGE | BPF_K: /* pc += (A >= K) ? pc->jt : pc->jf */ condt = ARM_COND_HS; cmp_imm: @@ -731,22 +726,22 @@ cond_jump: _emit(condt ^ 1, ARM_B(b_imm(i + inst->jf + 1, ctx)), ctx); break; - case BPF_S_JMP_JEQ_X: + case BPF_JMP | BPF_JEQ | BPF_X: /* pc += (A == X) ? pc->jt : pc->jf */ condt = ARM_COND_EQ; goto cmp_x; - case BPF_S_JMP_JGT_X: + case BPF_JMP | BPF_JGT | BPF_X: /* pc += (A > X) ? pc->jt : pc->jf */ condt = ARM_COND_HI; goto cmp_x; - case BPF_S_JMP_JGE_X: + case BPF_JMP | BPF_JGE | BPF_X: /* pc += (A >= X) ? pc->jt : pc->jf */ condt = ARM_COND_CS; cmp_x: update_on_xread(ctx); emit(ARM_CMP_R(r_A, r_X), ctx); goto cond_jump; - case BPF_S_JMP_JSET_K: + case BPF_JMP | BPF_JSET | BPF_K: /* pc += (A & K) ? pc->jt : pc->jf */ condt = ARM_COND_NE; /* not set iff all zeroes iff Z==1 iff EQ */ @@ -759,16 +754,16 @@ cmp_x: emit(ARM_TST_I(r_A, imm12), ctx); } goto cond_jump; - case BPF_S_JMP_JSET_X: + case BPF_JMP | BPF_JSET | BPF_X: /* pc += (A & X) ? pc->jt : pc->jf */ update_on_xread(ctx); condt = ARM_COND_NE; emit(ARM_TST_R(r_A, r_X), ctx); goto cond_jump; - case BPF_S_RET_A: + case BPF_RET | BPF_A: emit(ARM_MOV_R(ARM_R0, r_A), ctx); goto b_epilogue; - case BPF_S_RET_K: + case BPF_RET | BPF_K: if ((k == 0) && (ctx->ret0_fp_idx < 0)) ctx->ret0_fp_idx = i; emit_mov_i(ARM_R0, k, ctx); @@ -776,17 +771,17 @@ b_epilogue: if (i != ctx->skf->len - 1) emit(ARM_B(b_imm(prog->len, ctx)), ctx); break; - case BPF_S_MISC_TAX: + case BPF_MISC | BPF_TAX: /* X = A */ ctx->seen |= SEEN_X; emit(ARM_MOV_R(r_X, r_A), ctx); break; - case BPF_S_MISC_TXA: + case BPF_MISC | BPF_TXA: /* A = X */ update_on_xread(ctx); emit(ARM_MOV_R(r_A, r_X), ctx); break; - case BPF_S_ANC_PROTOCOL: + case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol) */ ctx->seen |= SEEN_SKB; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, @@ -795,7 +790,7 @@ b_epilogue: emit(ARM_LDRH_I(r_scratch, r_skb, off), ctx); emit_swap16(r_A, r_scratch, ctx); break; - case BPF_S_ANC_CPU: + case BPF_ANC | SKF_AD_CPU: /* r_scratch = current_thread_info() */ OP_IMM3(ARM_BIC, r_scratch, ARM_SP, THREAD_SIZE - 1, ctx); /* A = current_thread_info()->cpu */ @@ -803,7 +798,7 @@ b_epilogue: off = offsetof(struct thread_info, cpu); emit(ARM_LDR_I(r_A, r_scratch, off), ctx); break; - case BPF_S_ANC_IFINDEX: + case BPF_ANC | SKF_AD_IFINDEX: /* A = skb->dev->ifindex */ ctx->seen |= SEEN_SKB; off = offsetof(struct sk_buff, dev); @@ -817,30 +812,30 @@ b_epilogue: off = offsetof(struct net_device, ifindex); emit(ARM_LDR_I(r_A, r_scratch, off), ctx); break; - case BPF_S_ANC_MARK: + case BPF_ANC | SKF_AD_MARK: ctx->seen |= SEEN_SKB; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); off = offsetof(struct sk_buff, mark); emit(ARM_LDR_I(r_A, r_skb, off), ctx); break; - case BPF_S_ANC_RXHASH: + case BPF_ANC | SKF_AD_RXHASH: ctx->seen |= SEEN_SKB; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); off = offsetof(struct sk_buff, hash); emit(ARM_LDR_I(r_A, r_skb, off), ctx); break; - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: + case BPF_ANC | SKF_AD_VLAN_TAG: + case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: ctx->seen |= SEEN_SKB; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); off = offsetof(struct sk_buff, vlan_tci); emit(ARM_LDRH_I(r_A, r_skb, off), ctx); - if (inst->code == BPF_S_ANC_VLAN_TAG) + if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) OP_IMM3(ARM_AND, r_A, r_A, VLAN_VID_MASK, ctx); else OP_IMM3(ARM_AND, r_A, r_A, VLAN_TAG_PRESENT, ctx); break; - case BPF_S_ANC_QUEUE: + case BPF_ANC | SKF_AD_QUEUE: ctx->seen |= SEEN_SKB; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); diff --git a/arch/powerpc/net/bpf_jit_64.S b/arch/powerpc/net/bpf_jit_64.S index e76eba74d9da..8f87d9217122 100644 --- a/arch/powerpc/net/bpf_jit_64.S +++ b/arch/powerpc/net/bpf_jit_64.S @@ -78,7 +78,7 @@ sk_load_byte_positive_offset: blr /* - * BPF_S_LDX_B_MSH: ldxb 4*([offset]&0xf) + * BPF_LDX | BPF_B | BPF_MSH: ldxb 4*([offset]&0xf) * r_addr is the offset value */ .globl sk_load_byte_msh diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 808ce1cae21a..6dcdadefd8d0 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -79,19 +79,11 @@ static void bpf_jit_build_prologue(struct sk_filter *fp, u32 *image, } switch (filter[0].code) { - case BPF_S_RET_K: - case BPF_S_LD_W_LEN: - case BPF_S_ANC_PROTOCOL: - case BPF_S_ANC_IFINDEX: - case BPF_S_ANC_MARK: - case BPF_S_ANC_RXHASH: - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: - case BPF_S_ANC_CPU: - case BPF_S_ANC_QUEUE: - case BPF_S_LD_W_ABS: - case BPF_S_LD_H_ABS: - case BPF_S_LD_B_ABS: + case BPF_RET | BPF_K: + case BPF_LD | BPF_W | BPF_LEN: + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_B | BPF_ABS: /* first instruction sets A register (or is RET 'constant') */ break; default: @@ -144,6 +136,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, for (i = 0; i < flen; i++) { unsigned int K = filter[i].k; + u16 code = bpf_anc_helper(&filter[i]); /* * addrs[] maps a BPF bytecode address into a real offset from @@ -151,35 +144,35 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, */ addrs[i] = ctx->idx * 4; - switch (filter[i].code) { + switch (code) { /*** ALU ops ***/ - case BPF_S_ALU_ADD_X: /* A += X; */ + case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */ ctx->seen |= SEEN_XREG; PPC_ADD(r_A, r_A, r_X); break; - case BPF_S_ALU_ADD_K: /* A += K; */ + case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */ if (!K) break; PPC_ADDI(r_A, r_A, IMM_L(K)); if (K >= 32768) PPC_ADDIS(r_A, r_A, IMM_HA(K)); break; - case BPF_S_ALU_SUB_X: /* A -= X; */ + case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */ ctx->seen |= SEEN_XREG; PPC_SUB(r_A, r_A, r_X); break; - case BPF_S_ALU_SUB_K: /* A -= K */ + case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */ if (!K) break; PPC_ADDI(r_A, r_A, IMM_L(-K)); if (K >= 32768) PPC_ADDIS(r_A, r_A, IMM_HA(-K)); break; - case BPF_S_ALU_MUL_X: /* A *= X; */ + case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */ ctx->seen |= SEEN_XREG; PPC_MUL(r_A, r_A, r_X); break; - case BPF_S_ALU_MUL_K: /* A *= K */ + case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ if (K < 32768) PPC_MULI(r_A, r_A, K); else { @@ -187,7 +180,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_MUL(r_A, r_A, r_scratch1); } break; - case BPF_S_ALU_MOD_X: /* A %= X; */ + case BPF_ALU | BPF_MOD | BPF_X: /* A %= X; */ ctx->seen |= SEEN_XREG; PPC_CMPWI(r_X, 0); if (ctx->pc_ret0 != -1) { @@ -201,13 +194,13 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_MUL(r_scratch1, r_X, r_scratch1); PPC_SUB(r_A, r_A, r_scratch1); break; - case BPF_S_ALU_MOD_K: /* A %= K; */ + case BPF_ALU | BPF_MOD | BPF_K: /* A %= K; */ PPC_LI32(r_scratch2, K); PPC_DIVWU(r_scratch1, r_A, r_scratch2); PPC_MUL(r_scratch1, r_scratch2, r_scratch1); PPC_SUB(r_A, r_A, r_scratch1); break; - case BPF_S_ALU_DIV_X: /* A /= X; */ + case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */ ctx->seen |= SEEN_XREG; PPC_CMPWI(r_X, 0); if (ctx->pc_ret0 != -1) { @@ -223,17 +216,17 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, } PPC_DIVWU(r_A, r_A, r_X); break; - case BPF_S_ALU_DIV_K: /* A /= K */ + case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */ if (K == 1) break; PPC_LI32(r_scratch1, K); PPC_DIVWU(r_A, r_A, r_scratch1); break; - case BPF_S_ALU_AND_X: + case BPF_ALU | BPF_AND | BPF_X: ctx->seen |= SEEN_XREG; PPC_AND(r_A, r_A, r_X); break; - case BPF_S_ALU_AND_K: + case BPF_ALU | BPF_AND | BPF_K: if (!IMM_H(K)) PPC_ANDI(r_A, r_A, K); else { @@ -241,51 +234,51 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_AND(r_A, r_A, r_scratch1); } break; - case BPF_S_ALU_OR_X: + case BPF_ALU | BPF_OR | BPF_X: ctx->seen |= SEEN_XREG; PPC_OR(r_A, r_A, r_X); break; - case BPF_S_ALU_OR_K: + case BPF_ALU | BPF_OR | BPF_K: if (IMM_L(K)) PPC_ORI(r_A, r_A, IMM_L(K)); if (K >= 65536) PPC_ORIS(r_A, r_A, IMM_H(K)); break; - case BPF_S_ANC_ALU_XOR_X: - case BPF_S_ALU_XOR_X: /* A ^= X */ + case BPF_ANC | SKF_AD_ALU_XOR_X: + case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */ ctx->seen |= SEEN_XREG; PPC_XOR(r_A, r_A, r_X); break; - case BPF_S_ALU_XOR_K: /* A ^= K */ + case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */ if (IMM_L(K)) PPC_XORI(r_A, r_A, IMM_L(K)); if (K >= 65536) PPC_XORIS(r_A, r_A, IMM_H(K)); break; - case BPF_S_ALU_LSH_X: /* A <<= X; */ + case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */ ctx->seen |= SEEN_XREG; PPC_SLW(r_A, r_A, r_X); break; - case BPF_S_ALU_LSH_K: + case BPF_ALU | BPF_LSH | BPF_K: if (K == 0) break; else PPC_SLWI(r_A, r_A, K); break; - case BPF_S_ALU_RSH_X: /* A >>= X; */ + case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */ ctx->seen |= SEEN_XREG; PPC_SRW(r_A, r_A, r_X); break; - case BPF_S_ALU_RSH_K: /* A >>= K; */ + case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */ if (K == 0) break; else PPC_SRWI(r_A, r_A, K); break; - case BPF_S_ALU_NEG: + case BPF_ALU | BPF_NEG: PPC_NEG(r_A, r_A); break; - case BPF_S_RET_K: + case BPF_RET | BPF_K: PPC_LI32(r_ret, K); if (!K) { if (ctx->pc_ret0 == -1) @@ -312,7 +305,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_BLR(); } break; - case BPF_S_RET_A: + case BPF_RET | BPF_A: PPC_MR(r_ret, r_A); if (i != flen - 1) { if (ctx->seen) @@ -321,53 +314,53 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_BLR(); } break; - case BPF_S_MISC_TAX: /* X = A */ + case BPF_MISC | BPF_TAX: /* X = A */ PPC_MR(r_X, r_A); break; - case BPF_S_MISC_TXA: /* A = X */ + case BPF_MISC | BPF_TXA: /* A = X */ ctx->seen |= SEEN_XREG; PPC_MR(r_A, r_X); break; /*** Constant loads/M[] access ***/ - case BPF_S_LD_IMM: /* A = K */ + case BPF_LD | BPF_IMM: /* A = K */ PPC_LI32(r_A, K); break; - case BPF_S_LDX_IMM: /* X = K */ + case BPF_LDX | BPF_IMM: /* X = K */ PPC_LI32(r_X, K); break; - case BPF_S_LD_MEM: /* A = mem[K] */ + case BPF_LD | BPF_MEM: /* A = mem[K] */ PPC_MR(r_A, r_M + (K & 0xf)); ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); break; - case BPF_S_LDX_MEM: /* X = mem[K] */ + case BPF_LDX | BPF_MEM: /* X = mem[K] */ PPC_MR(r_X, r_M + (K & 0xf)); ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); break; - case BPF_S_ST: /* mem[K] = A */ + case BPF_ST: /* mem[K] = A */ PPC_MR(r_M + (K & 0xf), r_A); ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); break; - case BPF_S_STX: /* mem[K] = X */ + case BPF_STX: /* mem[K] = X */ PPC_MR(r_M + (K & 0xf), r_X); ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf)); break; - case BPF_S_LD_W_LEN: /* A = skb->len; */ + case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len)); break; - case BPF_S_LDX_W_LEN: /* X = skb->len; */ + case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */ PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len)); break; /*** Ancillary info loads ***/ - case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */ + case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff, protocol)); break; - case BPF_S_ANC_IFINDEX: + case BPF_ANC | SKF_AD_IFINDEX: PPC_LD_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, dev)); PPC_CMPDI(r_scratch1, 0); @@ -384,33 +377,33 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_LWZ_OFFS(r_A, r_scratch1, offsetof(struct net_device, ifindex)); break; - case BPF_S_ANC_MARK: + case BPF_ANC | SKF_AD_MARK: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, mark)); break; - case BPF_S_ANC_RXHASH: + case BPF_ANC | SKF_AD_RXHASH: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, hash)); break; - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: + case BPF_ANC | SKF_AD_VLAN_TAG: + case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, vlan_tci)); - if (filter[i].code == BPF_S_ANC_VLAN_TAG) + if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) PPC_ANDI(r_A, r_A, VLAN_VID_MASK); else PPC_ANDI(r_A, r_A, VLAN_TAG_PRESENT); break; - case BPF_S_ANC_QUEUE: + case BPF_ANC | SKF_AD_QUEUE: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, queue_mapping)); break; - case BPF_S_ANC_CPU: + case BPF_ANC | SKF_AD_CPU: #ifdef CONFIG_SMP /* * PACA ptr is r13: @@ -426,13 +419,13 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, break; /*** Absolute loads from packet header/data ***/ - case BPF_S_LD_W_ABS: + case BPF_LD | BPF_W | BPF_ABS: func = CHOOSE_LOAD_FUNC(K, sk_load_word); goto common_load; - case BPF_S_LD_H_ABS: + case BPF_LD | BPF_H | BPF_ABS: func = CHOOSE_LOAD_FUNC(K, sk_load_half); goto common_load; - case BPF_S_LD_B_ABS: + case BPF_LD | BPF_B | BPF_ABS: func = CHOOSE_LOAD_FUNC(K, sk_load_byte); common_load: /* Load from [K]. */ @@ -449,13 +442,13 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, break; /*** Indirect loads from packet header/data ***/ - case BPF_S_LD_W_IND: + case BPF_LD | BPF_W | BPF_IND: func = sk_load_word; goto common_load_ind; - case BPF_S_LD_H_IND: + case BPF_LD | BPF_H | BPF_IND: func = sk_load_half; goto common_load_ind; - case BPF_S_LD_B_IND: + case BPF_LD | BPF_B | BPF_IND: func = sk_load_byte; common_load_ind: /* @@ -473,31 +466,31 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_BCC(COND_LT, exit_addr); break; - case BPF_S_LDX_B_MSH: + case BPF_LDX | BPF_B | BPF_MSH: func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh); goto common_load; break; /*** Jump and branches ***/ - case BPF_S_JMP_JA: + case BPF_JMP | BPF_JA: if (K != 0) PPC_JMP(addrs[i + 1 + K]); break; - case BPF_S_JMP_JGT_K: - case BPF_S_JMP_JGT_X: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: true_cond = COND_GT; goto cond_branch; - case BPF_S_JMP_JGE_K: - case BPF_S_JMP_JGE_X: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: true_cond = COND_GE; goto cond_branch; - case BPF_S_JMP_JEQ_K: - case BPF_S_JMP_JEQ_X: + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_X: true_cond = COND_EQ; goto cond_branch; - case BPF_S_JMP_JSET_K: - case BPF_S_JMP_JSET_X: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: true_cond = COND_NE; /* Fall through */ cond_branch: @@ -508,20 +501,20 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, break; } - switch (filter[i].code) { - case BPF_S_JMP_JGT_X: - case BPF_S_JMP_JGE_X: - case BPF_S_JMP_JEQ_X: + switch (code) { + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JEQ | BPF_X: ctx->seen |= SEEN_XREG; PPC_CMPLW(r_A, r_X); break; - case BPF_S_JMP_JSET_X: + case BPF_JMP | BPF_JSET | BPF_X: ctx->seen |= SEEN_XREG; PPC_AND_DOT(r_scratch1, r_A, r_X); break; - case BPF_S_JMP_JEQ_K: - case BPF_S_JMP_JGT_K: - case BPF_S_JMP_JGE_K: + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: if (K < 32768) PPC_CMPLWI(r_A, K); else { @@ -529,7 +522,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_CMPLW(r_A, r_scratch1); } break; - case BPF_S_JMP_JSET_K: + case BPF_JMP | BPF_JSET | BPF_K: if (K < 32768) /* PPC_ANDI is /only/ dot-form */ PPC_ANDI(r_scratch1, r_A, K); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index e9f8fa9337fe..a2cbd875543a 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -269,27 +269,17 @@ static void bpf_jit_noleaks(struct bpf_jit *jit, struct sock_filter *filter) EMIT4(0xa7c80000); /* Clear A if the first register does not set it. */ switch (filter[0].code) { - case BPF_S_LD_W_ABS: - case BPF_S_LD_H_ABS: - case BPF_S_LD_B_ABS: - case BPF_S_LD_W_LEN: - case BPF_S_LD_W_IND: - case BPF_S_LD_H_IND: - case BPF_S_LD_B_IND: - case BPF_S_LD_IMM: - case BPF_S_LD_MEM: - case BPF_S_MISC_TXA: - case BPF_S_ANC_PROTOCOL: - case BPF_S_ANC_PKTTYPE: - case BPF_S_ANC_IFINDEX: - case BPF_S_ANC_MARK: - case BPF_S_ANC_QUEUE: - case BPF_S_ANC_HATYPE: - case BPF_S_ANC_RXHASH: - case BPF_S_ANC_CPU: - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: - case BPF_S_RET_K: + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_B | BPF_ABS: + case BPF_LD | BPF_W | BPF_LEN: + case BPF_LD | BPF_W | BPF_IND: + case BPF_LD | BPF_H | BPF_IND: + case BPF_LD | BPF_B | BPF_IND: + case BPF_LD | BPF_IMM: + case BPF_LD | BPF_MEM: + case BPF_MISC | BPF_TXA: + case BPF_RET | BPF_K: /* first instruction sets A register */ break; default: /* A = 0 */ @@ -304,15 +294,18 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, unsigned int K; int offset; unsigned int mask; + u16 code; K = filter->k; - switch (filter->code) { - case BPF_S_ALU_ADD_X: /* A += X */ + code = bpf_anc_helper(filter); + + switch (code) { + case BPF_ALU | BPF_ADD | BPF_X: /* A += X */ jit->seen |= SEEN_XREG; /* ar %r5,%r12 */ EMIT2(0x1a5c); break; - case BPF_S_ALU_ADD_K: /* A += K */ + case BPF_ALU | BPF_ADD | BPF_K: /* A += K */ if (!K) break; if (K <= 16383) @@ -325,12 +318,12 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* a %r5,(%r13) */ EMIT4_DISP(0x5a50d000, EMIT_CONST(K)); break; - case BPF_S_ALU_SUB_X: /* A -= X */ + case BPF_ALU | BPF_SUB | BPF_X: /* A -= X */ jit->seen |= SEEN_XREG; /* sr %r5,%r12 */ EMIT2(0x1b5c); break; - case BPF_S_ALU_SUB_K: /* A -= K */ + case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */ if (!K) break; if (K <= 16384) @@ -343,12 +336,12 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* s %r5,(%r13) */ EMIT4_DISP(0x5b50d000, EMIT_CONST(K)); break; - case BPF_S_ALU_MUL_X: /* A *= X */ + case BPF_ALU | BPF_MUL | BPF_X: /* A *= X */ jit->seen |= SEEN_XREG; /* msr %r5,%r12 */ EMIT4(0xb252005c); break; - case BPF_S_ALU_MUL_K: /* A *= K */ + case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ if (K <= 16383) /* mhi %r5,K */ EMIT4_IMM(0xa75c0000, K); @@ -359,7 +352,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* ms %r5,(%r13) */ EMIT4_DISP(0x7150d000, EMIT_CONST(K)); break; - case BPF_S_ALU_DIV_X: /* A /= X */ + case BPF_ALU | BPF_DIV | BPF_X: /* A /= X */ jit->seen |= SEEN_XREG | SEEN_RET0; /* ltr %r12,%r12 */ EMIT2(0x12cc); @@ -370,7 +363,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* dlr %r4,%r12 */ EMIT4(0xb997004c); break; - case BPF_S_ALU_DIV_K: /* A /= K */ + case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */ if (K == 1) break; /* lhi %r4,0 */ @@ -378,7 +371,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* dl %r4,(%r13) */ EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K)); break; - case BPF_S_ALU_MOD_X: /* A %= X */ + case BPF_ALU | BPF_MOD | BPF_X: /* A %= X */ jit->seen |= SEEN_XREG | SEEN_RET0; /* ltr %r12,%r12 */ EMIT2(0x12cc); @@ -391,7 +384,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* lr %r5,%r4 */ EMIT2(0x1854); break; - case BPF_S_ALU_MOD_K: /* A %= K */ + case BPF_ALU | BPF_MOD | BPF_K: /* A %= K */ if (K == 1) { /* lhi %r5,0 */ EMIT4(0xa7580000); @@ -404,12 +397,12 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* lr %r5,%r4 */ EMIT2(0x1854); break; - case BPF_S_ALU_AND_X: /* A &= X */ + case BPF_ALU | BPF_AND | BPF_X: /* A &= X */ jit->seen |= SEEN_XREG; /* nr %r5,%r12 */ EMIT2(0x145c); break; - case BPF_S_ALU_AND_K: /* A &= K */ + case BPF_ALU | BPF_AND | BPF_K: /* A &= K */ if (test_facility(21)) /* nilf %r5, */ EMIT6_IMM(0xc05b0000, K); @@ -417,12 +410,12 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* n %r5,(%r13) */ EMIT4_DISP(0x5450d000, EMIT_CONST(K)); break; - case BPF_S_ALU_OR_X: /* A |= X */ + case BPF_ALU | BPF_OR | BPF_X: /* A |= X */ jit->seen |= SEEN_XREG; /* or %r5,%r12 */ EMIT2(0x165c); break; - case BPF_S_ALU_OR_K: /* A |= K */ + case BPF_ALU | BPF_OR | BPF_K: /* A |= K */ if (test_facility(21)) /* oilf %r5, */ EMIT6_IMM(0xc05d0000, K); @@ -430,55 +423,55 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* o %r5,(%r13) */ EMIT4_DISP(0x5650d000, EMIT_CONST(K)); break; - case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */ - case BPF_S_ALU_XOR_X: + case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */ + case BPF_ALU | BPF_XOR | BPF_X: jit->seen |= SEEN_XREG; /* xr %r5,%r12 */ EMIT2(0x175c); break; - case BPF_S_ALU_XOR_K: /* A ^= K */ + case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */ if (!K) break; /* x %r5,(%r13) */ EMIT4_DISP(0x5750d000, EMIT_CONST(K)); break; - case BPF_S_ALU_LSH_X: /* A <<= X; */ + case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */ jit->seen |= SEEN_XREG; /* sll %r5,0(%r12) */ EMIT4(0x8950c000); break; - case BPF_S_ALU_LSH_K: /* A <<= K */ + case BPF_ALU | BPF_LSH | BPF_K: /* A <<= K */ if (K == 0) break; /* sll %r5,K */ EMIT4_DISP(0x89500000, K); break; - case BPF_S_ALU_RSH_X: /* A >>= X; */ + case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */ jit->seen |= SEEN_XREG; /* srl %r5,0(%r12) */ EMIT4(0x8850c000); break; - case BPF_S_ALU_RSH_K: /* A >>= K; */ + case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */ if (K == 0) break; /* srl %r5,K */ EMIT4_DISP(0x88500000, K); break; - case BPF_S_ALU_NEG: /* A = -A */ + case BPF_ALU | BPF_NEG: /* A = -A */ /* lnr %r5,%r5 */ EMIT2(0x1155); break; - case BPF_S_JMP_JA: /* ip += K */ + case BPF_JMP | BPF_JA: /* ip += K */ offset = addrs[i + K] + jit->start - jit->prg; EMIT4_PCREL(0xa7f40000, offset); break; - case BPF_S_JMP_JGT_K: /* ip += (A > K) ? jt : jf */ + case BPF_JMP | BPF_JGT | BPF_K: /* ip += (A > K) ? jt : jf */ mask = 0x200000; /* jh */ goto kbranch; - case BPF_S_JMP_JGE_K: /* ip += (A >= K) ? jt : jf */ + case BPF_JMP | BPF_JGE | BPF_K: /* ip += (A >= K) ? jt : jf */ mask = 0xa00000; /* jhe */ goto kbranch; - case BPF_S_JMP_JEQ_K: /* ip += (A == K) ? jt : jf */ + case BPF_JMP | BPF_JEQ | BPF_K: /* ip += (A == K) ? jt : jf */ mask = 0x800000; /* je */ kbranch: /* Emit compare if the branch targets are different */ if (filter->jt != filter->jf) { @@ -511,7 +504,7 @@ branch: if (filter->jt == filter->jf) { EMIT4_PCREL(0xa7040000 | (mask ^ 0xf00000), offset); } break; - case BPF_S_JMP_JSET_K: /* ip += (A & K) ? jt : jf */ + case BPF_JMP | BPF_JSET | BPF_K: /* ip += (A & K) ? jt : jf */ mask = 0x700000; /* jnz */ /* Emit test if the branch targets are different */ if (filter->jt != filter->jf) { @@ -525,13 +518,13 @@ branch: if (filter->jt == filter->jf) { EMIT4_IMM(0xa7510000, K); } goto branch; - case BPF_S_JMP_JGT_X: /* ip += (A > X) ? jt : jf */ + case BPF_JMP | BPF_JGT | BPF_X: /* ip += (A > X) ? jt : jf */ mask = 0x200000; /* jh */ goto xbranch; - case BPF_S_JMP_JGE_X: /* ip += (A >= X) ? jt : jf */ + case BPF_JMP | BPF_JGE | BPF_X: /* ip += (A >= X) ? jt : jf */ mask = 0xa00000; /* jhe */ goto xbranch; - case BPF_S_JMP_JEQ_X: /* ip += (A == X) ? jt : jf */ + case BPF_JMP | BPF_JEQ | BPF_X: /* ip += (A == X) ? jt : jf */ mask = 0x800000; /* je */ xbranch: /* Emit compare if the branch targets are different */ if (filter->jt != filter->jf) { @@ -540,7 +533,7 @@ xbranch: /* Emit compare if the branch targets are different */ EMIT2(0x195c); } goto branch; - case BPF_S_JMP_JSET_X: /* ip += (A & X) ? jt : jf */ + case BPF_JMP | BPF_JSET | BPF_X: /* ip += (A & X) ? jt : jf */ mask = 0x700000; /* jnz */ /* Emit test if the branch targets are different */ if (filter->jt != filter->jf) { @@ -551,15 +544,15 @@ xbranch: /* Emit compare if the branch targets are different */ EMIT2(0x144c); } goto branch; - case BPF_S_LD_W_ABS: /* A = *(u32 *) (skb->data+K) */ + case BPF_LD | BPF_W | BPF_ABS: /* A = *(u32 *) (skb->data+K) */ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_WORD; offset = jit->off_load_word; goto load_abs; - case BPF_S_LD_H_ABS: /* A = *(u16 *) (skb->data+K) */ + case BPF_LD | BPF_H | BPF_ABS: /* A = *(u16 *) (skb->data+K) */ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_HALF; offset = jit->off_load_half; goto load_abs; - case BPF_S_LD_B_ABS: /* A = *(u8 *) (skb->data+K) */ + case BPF_LD | BPF_B | BPF_ABS: /* A = *(u8 *) (skb->data+K) */ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_BYTE; offset = jit->off_load_byte; load_abs: if ((int) K < 0) @@ -573,19 +566,19 @@ call_fn: /* lg %r1,(%r13) */ /* jnz */ EMIT4_PCREL(0xa7740000, (jit->ret0_ip - jit->prg)); break; - case BPF_S_LD_W_IND: /* A = *(u32 *) (skb->data+K+X) */ + case BPF_LD | BPF_W | BPF_IND: /* A = *(u32 *) (skb->data+K+X) */ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IWORD; offset = jit->off_load_iword; goto call_fn; - case BPF_S_LD_H_IND: /* A = *(u16 *) (skb->data+K+X) */ + case BPF_LD | BPF_H | BPF_IND: /* A = *(u16 *) (skb->data+K+X) */ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IHALF; offset = jit->off_load_ihalf; goto call_fn; - case BPF_S_LD_B_IND: /* A = *(u8 *) (skb->data+K+X) */ + case BPF_LD | BPF_B | BPF_IND: /* A = *(u8 *) (skb->data+K+X) */ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IBYTE; offset = jit->off_load_ibyte; goto call_fn; - case BPF_S_LDX_B_MSH: + case BPF_LDX | BPF_B | BPF_MSH: /* X = (*(u8 *)(skb->data+K) & 0xf) << 2 */ jit->seen |= SEEN_RET0; if ((int) K < 0) { @@ -596,17 +589,17 @@ call_fn: /* lg %r1,(%r13) */ jit->seen |= SEEN_DATAREF | SEEN_LOAD_BMSH; offset = jit->off_load_bmsh; goto call_fn; - case BPF_S_LD_W_LEN: /* A = skb->len; */ + case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); /* l %r5,(%r2) */ EMIT4_DISP(0x58502000, offsetof(struct sk_buff, len)); break; - case BPF_S_LDX_W_LEN: /* X = skb->len; */ + case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */ jit->seen |= SEEN_XREG; /* l %r12,(%r2) */ EMIT4_DISP(0x58c02000, offsetof(struct sk_buff, len)); break; - case BPF_S_LD_IMM: /* A = K */ + case BPF_LD | BPF_IMM: /* A = K */ if (K <= 16383) /* lhi %r5,K */ EMIT4_IMM(0xa7580000, K); @@ -617,7 +610,7 @@ call_fn: /* lg %r1,(%r13) */ /* l %r5,(%r13) */ EMIT4_DISP(0x5850d000, EMIT_CONST(K)); break; - case BPF_S_LDX_IMM: /* X = K */ + case BPF_LDX | BPF_IMM: /* X = K */ jit->seen |= SEEN_XREG; if (K <= 16383) /* lhi %r12, */ @@ -629,29 +622,29 @@ call_fn: /* lg %r1,(%r13) */ /* l %r12,(%r13) */ EMIT4_DISP(0x58c0d000, EMIT_CONST(K)); break; - case BPF_S_LD_MEM: /* A = mem[K] */ + case BPF_LD | BPF_MEM: /* A = mem[K] */ jit->seen |= SEEN_MEM; /* l %r5,(%r15) */ EMIT4_DISP(0x5850f000, (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); break; - case BPF_S_LDX_MEM: /* X = mem[K] */ + case BPF_LDX | BPF_MEM: /* X = mem[K] */ jit->seen |= SEEN_XREG | SEEN_MEM; /* l %r12,(%r15) */ EMIT4_DISP(0x58c0f000, (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); break; - case BPF_S_MISC_TAX: /* X = A */ + case BPF_MISC | BPF_TAX: /* X = A */ jit->seen |= SEEN_XREG; /* lr %r12,%r5 */ EMIT2(0x18c5); break; - case BPF_S_MISC_TXA: /* A = X */ + case BPF_MISC | BPF_TXA: /* A = X */ jit->seen |= SEEN_XREG; /* lr %r5,%r12 */ EMIT2(0x185c); break; - case BPF_S_RET_K: + case BPF_RET | BPF_K: if (K == 0) { jit->seen |= SEEN_RET0; if (last) @@ -671,33 +664,33 @@ call_fn: /* lg %r1,(%r13) */ EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg); } break; - case BPF_S_RET_A: + case BPF_RET | BPF_A: /* llgfr %r2,%r5 */ EMIT4(0xb9160025); /* j */ EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg); break; - case BPF_S_ST: /* mem[K] = A */ + case BPF_ST: /* mem[K] = A */ jit->seen |= SEEN_MEM; /* st %r5,(%r15) */ EMIT4_DISP(0x5050f000, (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); break; - case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */ + case BPF_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */ jit->seen |= SEEN_XREG | SEEN_MEM; /* st %r12,(%r15) */ EMIT4_DISP(0x50c0f000, (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); break; - case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */ + case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); /* lhi %r5,0 */ EMIT4(0xa7580000); /* icm %r5,3,(%r2) */ EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, protocol)); break; - case BPF_S_ANC_IFINDEX: /* if (!skb->dev) return 0; - * A = skb->dev->ifindex */ + case BPF_ANC | SKF_AD_IFINDEX: /* if (!skb->dev) return 0; + * A = skb->dev->ifindex */ BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); jit->seen |= SEEN_RET0; /* lg %r1,(%r2) */ @@ -709,20 +702,20 @@ call_fn: /* lg %r1,(%r13) */ /* l %r5,(%r1) */ EMIT4_DISP(0x58501000, offsetof(struct net_device, ifindex)); break; - case BPF_S_ANC_MARK: /* A = skb->mark */ + case BPF_ANC | SKF_AD_MARK: /* A = skb->mark */ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); /* l %r5,(%r2) */ EMIT4_DISP(0x58502000, offsetof(struct sk_buff, mark)); break; - case BPF_S_ANC_QUEUE: /* A = skb->queue_mapping */ + case BPF_ANC | SKF_AD_QUEUE: /* A = skb->queue_mapping */ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); /* lhi %r5,0 */ EMIT4(0xa7580000); /* icm %r5,3,(%r2) */ EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, queue_mapping)); break; - case BPF_S_ANC_HATYPE: /* if (!skb->dev) return 0; - * A = skb->dev->type */ + case BPF_ANC | SKF_AD_HATYPE: /* if (!skb->dev) return 0; + * A = skb->dev->type */ BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); jit->seen |= SEEN_RET0; /* lg %r1,(%r2) */ @@ -736,20 +729,20 @@ call_fn: /* lg %r1,(%r13) */ /* icm %r5,3,(%r1) */ EMIT4_DISP(0xbf531000, offsetof(struct net_device, type)); break; - case BPF_S_ANC_RXHASH: /* A = skb->hash */ + case BPF_ANC | SKF_AD_RXHASH: /* A = skb->hash */ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); /* l %r5,(%r2) */ EMIT4_DISP(0x58502000, offsetof(struct sk_buff, hash)); break; - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: + case BPF_ANC | SKF_AD_VLAN_TAG: + case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); /* lhi %r5,0 */ EMIT4(0xa7580000); /* icm %r5,3,(%r2) */ EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, vlan_tci)); - if (filter->code == BPF_S_ANC_VLAN_TAG) { + if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) { /* nill %r5,0xefff */ EMIT4_IMM(0xa5570000, ~VLAN_TAG_PRESENT); } else { @@ -759,7 +752,7 @@ call_fn: /* lg %r1,(%r13) */ EMIT4_DISP(0x88500000, 12); } break; - case BPF_S_ANC_PKTTYPE: + case BPF_ANC | SKF_AD_PKTTYPE: if (pkt_type_offset < 0) goto out; /* lhi %r5,0 */ @@ -769,7 +762,7 @@ call_fn: /* lg %r1,(%r13) */ /* srl %r5,5 */ EMIT4_DISP(0x88500000, 5); break; - case BPF_S_ANC_CPU: /* A = smp_processor_id() */ + case BPF_ANC | SKF_AD_CPU: /* A = smp_processor_id() */ #ifdef CONFIG_SMP /* l %r5, */ EMIT4_DISP(0x58500000, offsetof(struct _lowcore, cpu_nr)); diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index a82c6b2a9780..c88cf147deed 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -415,20 +415,11 @@ void bpf_jit_compile(struct sk_filter *fp) emit_reg_move(O7, r_saved_O7); switch (filter[0].code) { - case BPF_S_RET_K: - case BPF_S_LD_W_LEN: - case BPF_S_ANC_PROTOCOL: - case BPF_S_ANC_PKTTYPE: - case BPF_S_ANC_IFINDEX: - case BPF_S_ANC_MARK: - case BPF_S_ANC_RXHASH: - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: - case BPF_S_ANC_CPU: - case BPF_S_ANC_QUEUE: - case BPF_S_LD_W_ABS: - case BPF_S_LD_H_ABS: - case BPF_S_LD_B_ABS: + case BPF_RET | BPF_K: + case BPF_LD | BPF_W | BPF_LEN: + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_B | BPF_ABS: /* The first instruction sets the A register (or is * a "RET 'constant'") */ @@ -445,59 +436,60 @@ void bpf_jit_compile(struct sk_filter *fp) unsigned int t_offset; unsigned int f_offset; u32 t_op, f_op; + u16 code = bpf_anc_helper(&filter[i]); int ilen; - switch (filter[i].code) { - case BPF_S_ALU_ADD_X: /* A += X; */ + switch (code) { + case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */ emit_alu_X(ADD); break; - case BPF_S_ALU_ADD_K: /* A += K; */ + case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */ emit_alu_K(ADD, K); break; - case BPF_S_ALU_SUB_X: /* A -= X; */ + case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */ emit_alu_X(SUB); break; - case BPF_S_ALU_SUB_K: /* A -= K */ + case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */ emit_alu_K(SUB, K); break; - case BPF_S_ALU_AND_X: /* A &= X */ + case BPF_ALU | BPF_AND | BPF_X: /* A &= X */ emit_alu_X(AND); break; - case BPF_S_ALU_AND_K: /* A &= K */ + case BPF_ALU | BPF_AND | BPF_K: /* A &= K */ emit_alu_K(AND, K); break; - case BPF_S_ALU_OR_X: /* A |= X */ + case BPF_ALU | BPF_OR | BPF_X: /* A |= X */ emit_alu_X(OR); break; - case BPF_S_ALU_OR_K: /* A |= K */ + case BPF_ALU | BPF_OR | BPF_K: /* A |= K */ emit_alu_K(OR, K); break; - case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */ - case BPF_S_ALU_XOR_X: + case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */ + case BPF_ALU | BPF_XOR | BPF_X: emit_alu_X(XOR); break; - case BPF_S_ALU_XOR_K: /* A ^= K */ + case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */ emit_alu_K(XOR, K); break; - case BPF_S_ALU_LSH_X: /* A <<= X */ + case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X */ emit_alu_X(SLL); break; - case BPF_S_ALU_LSH_K: /* A <<= K */ + case BPF_ALU | BPF_LSH | BPF_K: /* A <<= K */ emit_alu_K(SLL, K); break; - case BPF_S_ALU_RSH_X: /* A >>= X */ + case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X */ emit_alu_X(SRL); break; - case BPF_S_ALU_RSH_K: /* A >>= K */ + case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K */ emit_alu_K(SRL, K); break; - case BPF_S_ALU_MUL_X: /* A *= X; */ + case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */ emit_alu_X(MUL); break; - case BPF_S_ALU_MUL_K: /* A *= K */ + case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ emit_alu_K(MUL, K); break; - case BPF_S_ALU_DIV_K: /* A /= K with K != 0*/ + case BPF_ALU | BPF_DIV | BPF_K: /* A /= K with K != 0*/ if (K == 1) break; emit_write_y(G0); @@ -512,7 +504,7 @@ void bpf_jit_compile(struct sk_filter *fp) #endif emit_alu_K(DIV, K); break; - case BPF_S_ALU_DIV_X: /* A /= X; */ + case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */ emit_cmpi(r_X, 0); if (pc_ret0 > 0) { t_offset = addrs[pc_ret0 - 1]; @@ -544,10 +536,10 @@ void bpf_jit_compile(struct sk_filter *fp) #endif emit_alu_X(DIV); break; - case BPF_S_ALU_NEG: + case BPF_ALU | BPF_NEG: emit_neg(); break; - case BPF_S_RET_K: + case BPF_RET | BPF_K: if (!K) { if (pc_ret0 == -1) pc_ret0 = i; @@ -556,7 +548,7 @@ void bpf_jit_compile(struct sk_filter *fp) emit_loadimm(K, r_A); } /* Fallthrough */ - case BPF_S_RET_A: + case BPF_RET | BPF_A: if (seen_or_pass0) { if (i != flen - 1) { emit_jump(cleanup_addr); @@ -573,18 +565,18 @@ void bpf_jit_compile(struct sk_filter *fp) emit_jmpl(r_saved_O7, 8, G0); emit_reg_move(r_A, O0); /* delay slot */ break; - case BPF_S_MISC_TAX: + case BPF_MISC | BPF_TAX: seen |= SEEN_XREG; emit_reg_move(r_A, r_X); break; - case BPF_S_MISC_TXA: + case BPF_MISC | BPF_TXA: seen |= SEEN_XREG; emit_reg_move(r_X, r_A); break; - case BPF_S_ANC_CPU: + case BPF_ANC | SKF_AD_CPU: emit_load_cpu(r_A); break; - case BPF_S_ANC_PROTOCOL: + case BPF_ANC | SKF_AD_PROTOCOL: emit_skb_load16(protocol, r_A); break; #if 0 @@ -592,38 +584,38 @@ void bpf_jit_compile(struct sk_filter *fp) * a bit field even though we very much * know what we are doing here. */ - case BPF_S_ANC_PKTTYPE: + case BPF_ANC | SKF_AD_PKTTYPE: __emit_skb_load8(pkt_type, r_A); emit_alu_K(SRL, 5); break; #endif - case BPF_S_ANC_IFINDEX: + case BPF_ANC | SKF_AD_IFINDEX: emit_skb_loadptr(dev, r_A); emit_cmpi(r_A, 0); emit_branch(BNE_PTR, cleanup_addr + 4); emit_nop(); emit_load32(r_A, struct net_device, ifindex, r_A); break; - case BPF_S_ANC_MARK: + case BPF_ANC | SKF_AD_MARK: emit_skb_load32(mark, r_A); break; - case BPF_S_ANC_QUEUE: + case BPF_ANC | SKF_AD_QUEUE: emit_skb_load16(queue_mapping, r_A); break; - case BPF_S_ANC_HATYPE: + case BPF_ANC | SKF_AD_HATYPE: emit_skb_loadptr(dev, r_A); emit_cmpi(r_A, 0); emit_branch(BNE_PTR, cleanup_addr + 4); emit_nop(); emit_load16(r_A, struct net_device, type, r_A); break; - case BPF_S_ANC_RXHASH: + case BPF_ANC | SKF_AD_RXHASH: emit_skb_load32(hash, r_A); break; - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: + case BPF_ANC | SKF_AD_VLAN_TAG: + case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: emit_skb_load16(vlan_tci, r_A); - if (filter[i].code == BPF_S_ANC_VLAN_TAG) { + if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) { emit_andi(r_A, VLAN_VID_MASK, r_A); } else { emit_loadimm(VLAN_TAG_PRESENT, r_TMP); @@ -631,44 +623,44 @@ void bpf_jit_compile(struct sk_filter *fp) } break; - case BPF_S_LD_IMM: + case BPF_LD | BPF_IMM: emit_loadimm(K, r_A); break; - case BPF_S_LDX_IMM: + case BPF_LDX | BPF_IMM: emit_loadimm(K, r_X); break; - case BPF_S_LD_MEM: + case BPF_LD | BPF_MEM: emit_ldmem(K * 4, r_A); break; - case BPF_S_LDX_MEM: + case BPF_LDX | BPF_MEM: emit_ldmem(K * 4, r_X); break; - case BPF_S_ST: + case BPF_ST: emit_stmem(K * 4, r_A); break; - case BPF_S_STX: + case BPF_STX: emit_stmem(K * 4, r_X); break; #define CHOOSE_LOAD_FUNC(K, func) \ ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) - case BPF_S_LD_W_ABS: + case BPF_LD | BPF_W | BPF_ABS: func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_word); common_load: seen |= SEEN_DATAREF; emit_loadimm(K, r_OFF); emit_call(func); break; - case BPF_S_LD_H_ABS: + case BPF_LD | BPF_H | BPF_ABS: func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_half); goto common_load; - case BPF_S_LD_B_ABS: + case BPF_LD | BPF_B | BPF_ABS: func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte); goto common_load; - case BPF_S_LDX_B_MSH: + case BPF_LDX | BPF_B | BPF_MSH: func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte_msh); goto common_load; - case BPF_S_LD_W_IND: + case BPF_LD | BPF_W | BPF_IND: func = bpf_jit_load_word; common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG; if (K) { @@ -683,13 +675,13 @@ common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG; } emit_call(func); break; - case BPF_S_LD_H_IND: + case BPF_LD | BPF_H | BPF_IND: func = bpf_jit_load_half; goto common_load_ind; - case BPF_S_LD_B_IND: + case BPF_LD | BPF_B | BPF_IND: func = bpf_jit_load_byte; goto common_load_ind; - case BPF_S_JMP_JA: + case BPF_JMP | BPF_JA: emit_jump(addrs[i + K]); emit_nop(); break; @@ -700,14 +692,14 @@ common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG; f_op = FOP; \ goto cond_branch - COND_SEL(BPF_S_JMP_JGT_K, BGU, BLEU); - COND_SEL(BPF_S_JMP_JGE_K, BGEU, BLU); - COND_SEL(BPF_S_JMP_JEQ_K, BE, BNE); - COND_SEL(BPF_S_JMP_JSET_K, BNE, BE); - COND_SEL(BPF_S_JMP_JGT_X, BGU, BLEU); - COND_SEL(BPF_S_JMP_JGE_X, BGEU, BLU); - COND_SEL(BPF_S_JMP_JEQ_X, BE, BNE); - COND_SEL(BPF_S_JMP_JSET_X, BNE, BE); + COND_SEL(BPF_JMP | BPF_JGT | BPF_K, BGU, BLEU); + COND_SEL(BPF_JMP | BPF_JGE | BPF_K, BGEU, BLU); + COND_SEL(BPF_JMP | BPF_JEQ | BPF_K, BE, BNE); + COND_SEL(BPF_JMP | BPF_JSET | BPF_K, BNE, BE); + COND_SEL(BPF_JMP | BPF_JGT | BPF_X, BGU, BLEU); + COND_SEL(BPF_JMP | BPF_JGE | BPF_X, BGEU, BLU); + COND_SEL(BPF_JMP | BPF_JEQ | BPF_X, BE, BNE); + COND_SEL(BPF_JMP | BPF_JSET | BPF_X, BNE, BE); cond_branch: f_offset = addrs[i + filter[i].jf]; t_offset = addrs[i + filter[i].jt]; @@ -719,20 +711,20 @@ cond_branch: f_offset = addrs[i + filter[i].jf]; break; } - switch (filter[i].code) { - case BPF_S_JMP_JGT_X: - case BPF_S_JMP_JGE_X: - case BPF_S_JMP_JEQ_X: + switch (code) { + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JEQ | BPF_X: seen |= SEEN_XREG; emit_cmp(r_A, r_X); break; - case BPF_S_JMP_JSET_X: + case BPF_JMP | BPF_JSET | BPF_X: seen |= SEEN_XREG; emit_btst(r_A, r_X); break; - case BPF_S_JMP_JEQ_K: - case BPF_S_JMP_JGT_K: - case BPF_S_JMP_JGE_K: + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: if (is_simm13(K)) { emit_cmpi(r_A, K); } else { @@ -740,7 +732,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf]; emit_cmp(r_A, r_TMP); } break; - case BPF_S_JMP_JSET_K: + case BPF_JMP | BPF_JSET | BPF_K: if (is_simm13(K)) { emit_btsti(r_A, K); } else { diff --git a/include/linux/filter.h b/include/linux/filter.h index 625f4de9bdf2..49ef7a298c92 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -197,7 +197,6 @@ int sk_detach_filter(struct sock *sk); int sk_chk_filter(struct sock_filter *filter, unsigned int flen); int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned int len); -void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to); void sk_filter_charge(struct sock *sk, struct sk_filter *fp); void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); @@ -205,6 +204,41 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); void bpf_int_jit_compile(struct sk_filter *fp); +#define BPF_ANC BIT(15) + +static inline u16 bpf_anc_helper(const struct sock_filter *ftest) +{ + BUG_ON(ftest->code & BPF_ANC); + + switch (ftest->code) { + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_B | BPF_ABS: +#define BPF_ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \ + return BPF_ANC | SKF_AD_##CODE + switch (ftest->k) { + BPF_ANCILLARY(PROTOCOL); + BPF_ANCILLARY(PKTTYPE); + BPF_ANCILLARY(IFINDEX); + BPF_ANCILLARY(NLATTR); + BPF_ANCILLARY(NLATTR_NEST); + BPF_ANCILLARY(MARK); + BPF_ANCILLARY(QUEUE); + BPF_ANCILLARY(HATYPE); + BPF_ANCILLARY(RXHASH); + BPF_ANCILLARY(CPU); + BPF_ANCILLARY(ALU_XOR_X); + BPF_ANCILLARY(VLAN_TAG); + BPF_ANCILLARY(VLAN_TAG_PRESENT); + BPF_ANCILLARY(PAY_OFFSET); + BPF_ANCILLARY(RANDOM); + } + /* Fallthrough. */ + default: + return ftest->code; + } +} + #ifdef CONFIG_BPF_JIT #include #include @@ -224,86 +258,20 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, } #else #include + static inline void bpf_jit_compile(struct sk_filter *fp) { } + static inline void bpf_jit_free(struct sk_filter *fp) { kfree(fp); } -#endif +#endif /* CONFIG_BPF_JIT */ static inline int bpf_tell_extensions(void) { return SKF_AD_MAX; } -enum { - BPF_S_RET_K = 1, - BPF_S_RET_A, - BPF_S_ALU_ADD_K, - BPF_S_ALU_ADD_X, - BPF_S_ALU_SUB_K, - BPF_S_ALU_SUB_X, - BPF_S_ALU_MUL_K, - BPF_S_ALU_MUL_X, - BPF_S_ALU_DIV_X, - BPF_S_ALU_MOD_K, - BPF_S_ALU_MOD_X, - BPF_S_ALU_AND_K, - BPF_S_ALU_AND_X, - BPF_S_ALU_OR_K, - BPF_S_ALU_OR_X, - BPF_S_ALU_XOR_K, - BPF_S_ALU_XOR_X, - BPF_S_ALU_LSH_K, - BPF_S_ALU_LSH_X, - BPF_S_ALU_RSH_K, - BPF_S_ALU_RSH_X, - BPF_S_ALU_NEG, - BPF_S_LD_W_ABS, - BPF_S_LD_H_ABS, - BPF_S_LD_B_ABS, - BPF_S_LD_W_LEN, - BPF_S_LD_W_IND, - BPF_S_LD_H_IND, - BPF_S_LD_B_IND, - BPF_S_LD_IMM, - BPF_S_LDX_W_LEN, - BPF_S_LDX_B_MSH, - BPF_S_LDX_IMM, - BPF_S_MISC_TAX, - BPF_S_MISC_TXA, - BPF_S_ALU_DIV_K, - BPF_S_LD_MEM, - BPF_S_LDX_MEM, - BPF_S_ST, - BPF_S_STX, - BPF_S_JMP_JA, - BPF_S_JMP_JEQ_K, - BPF_S_JMP_JEQ_X, - BPF_S_JMP_JGE_K, - BPF_S_JMP_JGE_X, - BPF_S_JMP_JGT_K, - BPF_S_JMP_JGT_X, - BPF_S_JMP_JSET_K, - BPF_S_JMP_JSET_X, - /* Ancillary data */ - BPF_S_ANC_PROTOCOL, - BPF_S_ANC_PKTTYPE, - BPF_S_ANC_IFINDEX, - BPF_S_ANC_NLATTR, - BPF_S_ANC_NLATTR_NEST, - BPF_S_ANC_MARK, - BPF_S_ANC_QUEUE, - BPF_S_ANC_HATYPE, - BPF_S_ANC_RXHASH, - BPF_S_ANC_CPU, - BPF_S_ANC_ALU_XOR_X, - BPF_S_ANC_VLAN_TAG, - BPF_S_ANC_VLAN_TAG_PRESENT, - BPF_S_ANC_PAY_OFFSET, - BPF_S_ANC_RANDOM, -}; - #endif /* __LINUX_FILTER_H__ */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 1036b6f2fded..44e69483b383 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -103,60 +103,59 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) u32 k = ftest->k; switch (code) { - case BPF_S_LD_W_ABS: + case BPF_LD | BPF_W | BPF_ABS: ftest->code = BPF_LDX | BPF_W | BPF_ABS; /* 32-bit aligned and not out of bounds. */ if (k >= sizeof(struct seccomp_data) || k & 3) return -EINVAL; continue; - case BPF_S_LD_W_LEN: + case BPF_LD | BPF_W | BPF_LEN: ftest->code = BPF_LD | BPF_IMM; ftest->k = sizeof(struct seccomp_data); continue; - case BPF_S_LDX_W_LEN: + case BPF_LDX | BPF_W | BPF_LEN: ftest->code = BPF_LDX | BPF_IMM; ftest->k = sizeof(struct seccomp_data); continue; /* Explicitly include allowed calls. */ - case BPF_S_RET_K: - case BPF_S_RET_A: - case BPF_S_ALU_ADD_K: - case BPF_S_ALU_ADD_X: - case BPF_S_ALU_SUB_K: - case BPF_S_ALU_SUB_X: - case BPF_S_ALU_MUL_K: - case BPF_S_ALU_MUL_X: - case BPF_S_ALU_DIV_X: - case BPF_S_ALU_AND_K: - case BPF_S_ALU_AND_X: - case BPF_S_ALU_OR_K: - case BPF_S_ALU_OR_X: - case BPF_S_ALU_XOR_K: - case BPF_S_ALU_XOR_X: - case BPF_S_ALU_LSH_K: - case BPF_S_ALU_LSH_X: - case BPF_S_ALU_RSH_K: - case BPF_S_ALU_RSH_X: - case BPF_S_ALU_NEG: - case BPF_S_LD_IMM: - case BPF_S_LDX_IMM: - case BPF_S_MISC_TAX: - case BPF_S_MISC_TXA: - case BPF_S_ALU_DIV_K: - case BPF_S_LD_MEM: - case BPF_S_LDX_MEM: - case BPF_S_ST: - case BPF_S_STX: - case BPF_S_JMP_JA: - case BPF_S_JMP_JEQ_K: - case BPF_S_JMP_JEQ_X: - case BPF_S_JMP_JGE_K: - case BPF_S_JMP_JGE_X: - case BPF_S_JMP_JGT_K: - case BPF_S_JMP_JGT_X: - case BPF_S_JMP_JSET_K: - case BPF_S_JMP_JSET_X: - sk_decode_filter(ftest, ftest); + case BPF_RET | BPF_K: + case BPF_RET | BPF_A: + case BPF_ALU | BPF_ADD | BPF_K: + case BPF_ALU | BPF_ADD | BPF_X: + case BPF_ALU | BPF_SUB | BPF_K: + case BPF_ALU | BPF_SUB | BPF_X: + case BPF_ALU | BPF_MUL | BPF_K: + case BPF_ALU | BPF_MUL | BPF_X: + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU | BPF_AND | BPF_K: + case BPF_ALU | BPF_AND | BPF_X: + case BPF_ALU | BPF_OR | BPF_K: + case BPF_ALU | BPF_OR | BPF_X: + case BPF_ALU | BPF_XOR | BPF_K: + case BPF_ALU | BPF_XOR | BPF_X: + case BPF_ALU | BPF_LSH | BPF_K: + case BPF_ALU | BPF_LSH | BPF_X: + case BPF_ALU | BPF_RSH | BPF_K: + case BPF_ALU | BPF_RSH | BPF_X: + case BPF_ALU | BPF_NEG: + case BPF_LD | BPF_IMM: + case BPF_LDX | BPF_IMM: + case BPF_MISC | BPF_TAX: + case BPF_MISC | BPF_TXA: + case BPF_LD | BPF_MEM: + case BPF_LDX | BPF_MEM: + case BPF_ST: + case BPF_STX: + case BPF_JMP | BPF_JA: + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: continue; default: return -EINVAL; diff --git a/net/core/filter.c b/net/core/filter.c index 2c2d35d9d101..328aaf6ff4d1 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -536,11 +536,13 @@ load_word: * Output: * BPF_R0 - 8/16/32-bit skb data converted to cpu endianness */ + ptr = load_pointer((struct sk_buff *) ctx, off, 4, &tmp); if (likely(ptr != NULL)) { BPF_R0 = get_unaligned_be32(ptr); CONT; } + return 0; LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + K)) */ off = K; @@ -550,6 +552,7 @@ load_half: BPF_R0 = get_unaligned_be16(ptr); CONT; } + return 0; LD_ABS_B: /* BPF_R0 = *(u8 *) (ctx + K) */ off = K; @@ -559,6 +562,7 @@ load_byte: BPF_R0 = *(u8 *)ptr; CONT; } + return 0; LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + X + K)) */ off = K + X; @@ -1136,44 +1140,46 @@ err: */ static int check_load_and_stores(struct sock_filter *filter, int flen) { - u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */ + u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */ int pc, ret = 0; BUILD_BUG_ON(BPF_MEMWORDS > 16); + masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL); if (!masks) return -ENOMEM; + memset(masks, 0xff, flen * sizeof(*masks)); for (pc = 0; pc < flen; pc++) { memvalid &= masks[pc]; switch (filter[pc].code) { - case BPF_S_ST: - case BPF_S_STX: + case BPF_ST: + case BPF_STX: memvalid |= (1 << filter[pc].k); break; - case BPF_S_LD_MEM: - case BPF_S_LDX_MEM: + case BPF_LD | BPF_MEM: + case BPF_LDX | BPF_MEM: if (!(memvalid & (1 << filter[pc].k))) { ret = -EINVAL; goto error; } break; - case BPF_S_JMP_JA: - /* a jump must set masks on target */ + case BPF_JMP | BPF_JA: + /* A jump must set masks on target */ masks[pc + 1 + filter[pc].k] &= memvalid; memvalid = ~0; break; - case BPF_S_JMP_JEQ_K: - case BPF_S_JMP_JEQ_X: - case BPF_S_JMP_JGE_K: - case BPF_S_JMP_JGE_X: - case BPF_S_JMP_JGT_K: - case BPF_S_JMP_JGT_X: - case BPF_S_JMP_JSET_X: - case BPF_S_JMP_JSET_K: - /* a jump must set masks on targets */ + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: + /* A jump must set masks on targets */ masks[pc + 1 + filter[pc].jt] &= memvalid; masks[pc + 1 + filter[pc].jf] &= memvalid; memvalid = ~0; @@ -1185,6 +1191,72 @@ error: return ret; } +static bool chk_code_allowed(u16 code_to_probe) +{ + static const bool codes[] = { + /* 32 bit ALU operations */ + [BPF_ALU | BPF_ADD | BPF_K] = true, + [BPF_ALU | BPF_ADD | BPF_X] = true, + [BPF_ALU | BPF_SUB | BPF_K] = true, + [BPF_ALU | BPF_SUB | BPF_X] = true, + [BPF_ALU | BPF_MUL | BPF_K] = true, + [BPF_ALU | BPF_MUL | BPF_X] = true, + [BPF_ALU | BPF_DIV | BPF_K] = true, + [BPF_ALU | BPF_DIV | BPF_X] = true, + [BPF_ALU | BPF_MOD | BPF_K] = true, + [BPF_ALU | BPF_MOD | BPF_X] = true, + [BPF_ALU | BPF_AND | BPF_K] = true, + [BPF_ALU | BPF_AND | BPF_X] = true, + [BPF_ALU | BPF_OR | BPF_K] = true, + [BPF_ALU | BPF_OR | BPF_X] = true, + [BPF_ALU | BPF_XOR | BPF_K] = true, + [BPF_ALU | BPF_XOR | BPF_X] = true, + [BPF_ALU | BPF_LSH | BPF_K] = true, + [BPF_ALU | BPF_LSH | BPF_X] = true, + [BPF_ALU | BPF_RSH | BPF_K] = true, + [BPF_ALU | BPF_RSH | BPF_X] = true, + [BPF_ALU | BPF_NEG] = true, + /* Load instructions */ + [BPF_LD | BPF_W | BPF_ABS] = true, + [BPF_LD | BPF_H | BPF_ABS] = true, + [BPF_LD | BPF_B | BPF_ABS] = true, + [BPF_LD | BPF_W | BPF_LEN] = true, + [BPF_LD | BPF_W | BPF_IND] = true, + [BPF_LD | BPF_H | BPF_IND] = true, + [BPF_LD | BPF_B | BPF_IND] = true, + [BPF_LD | BPF_IMM] = true, + [BPF_LD | BPF_MEM] = true, + [BPF_LDX | BPF_W | BPF_LEN] = true, + [BPF_LDX | BPF_B | BPF_MSH] = true, + [BPF_LDX | BPF_IMM] = true, + [BPF_LDX | BPF_MEM] = true, + /* Store instructions */ + [BPF_ST] = true, + [BPF_STX] = true, + /* Misc instructions */ + [BPF_MISC | BPF_TAX] = true, + [BPF_MISC | BPF_TXA] = true, + /* Return instructions */ + [BPF_RET | BPF_K] = true, + [BPF_RET | BPF_A] = true, + /* Jump instructions */ + [BPF_JMP | BPF_JA] = true, + [BPF_JMP | BPF_JEQ | BPF_K] = true, + [BPF_JMP | BPF_JEQ | BPF_X] = true, + [BPF_JMP | BPF_JGE | BPF_K] = true, + [BPF_JMP | BPF_JGE | BPF_X] = true, + [BPF_JMP | BPF_JGT | BPF_K] = true, + [BPF_JMP | BPF_JGT | BPF_X] = true, + [BPF_JMP | BPF_JSET | BPF_K] = true, + [BPF_JMP | BPF_JSET | BPF_X] = true, + }; + + if (code_to_probe >= ARRAY_SIZE(codes)) + return false; + + return codes[code_to_probe]; +} + /** * sk_chk_filter - verify socket filter code * @filter: filter to verify @@ -1201,154 +1273,76 @@ error: */ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) { - /* - * Valid instructions are initialized to non-0. - * Invalid instructions are initialized to 0. - */ - static const u8 codes[] = { - [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K, - [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X, - [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K, - [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X, - [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K, - [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X, - [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X, - [BPF_ALU|BPF_MOD|BPF_K] = BPF_S_ALU_MOD_K, - [BPF_ALU|BPF_MOD|BPF_X] = BPF_S_ALU_MOD_X, - [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K, - [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X, - [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K, - [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X, - [BPF_ALU|BPF_XOR|BPF_K] = BPF_S_ALU_XOR_K, - [BPF_ALU|BPF_XOR|BPF_X] = BPF_S_ALU_XOR_X, - [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K, - [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X, - [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K, - [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X, - [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG, - [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS, - [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS, - [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS, - [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN, - [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND, - [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND, - [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND, - [BPF_LD|BPF_IMM] = BPF_S_LD_IMM, - [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN, - [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH, - [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM, - [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX, - [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA, - [BPF_RET|BPF_K] = BPF_S_RET_K, - [BPF_RET|BPF_A] = BPF_S_RET_A, - [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K, - [BPF_LD|BPF_MEM] = BPF_S_LD_MEM, - [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM, - [BPF_ST] = BPF_S_ST, - [BPF_STX] = BPF_S_STX, - [BPF_JMP|BPF_JA] = BPF_S_JMP_JA, - [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K, - [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X, - [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K, - [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X, - [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K, - [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X, - [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K, - [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X, - }; - int pc; bool anc_found; + int pc; if (flen == 0 || flen > BPF_MAXINSNS) return -EINVAL; - /* check the filter code now */ + /* Check the filter code now */ for (pc = 0; pc < flen; pc++) { struct sock_filter *ftest = &filter[pc]; - u16 code = ftest->code; - if (code >= ARRAY_SIZE(codes)) - return -EINVAL; - code = codes[code]; - if (!code) + /* May we actually operate on this code? */ + if (!chk_code_allowed(ftest->code)) return -EINVAL; + /* Some instructions need special checks */ - switch (code) { - case BPF_S_ALU_DIV_K: - case BPF_S_ALU_MOD_K: - /* check for division by zero */ + switch (ftest->code) { + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU | BPF_MOD | BPF_K: + /* Check for division by zero */ if (ftest->k == 0) return -EINVAL; break; - case BPF_S_LD_MEM: - case BPF_S_LDX_MEM: - case BPF_S_ST: - case BPF_S_STX: - /* check for invalid memory addresses */ + case BPF_LD | BPF_MEM: + case BPF_LDX | BPF_MEM: + case BPF_ST: + case BPF_STX: + /* Check for invalid memory addresses */ if (ftest->k >= BPF_MEMWORDS) return -EINVAL; break; - case BPF_S_JMP_JA: - /* - * Note, the large ftest->k might cause loops. + case BPF_JMP | BPF_JA: + /* Note, the large ftest->k might cause loops. * Compare this with conditional jumps below, * where offsets are limited. --ANK (981016) */ - if (ftest->k >= (unsigned int)(flen-pc-1)) + if (ftest->k >= (unsigned int)(flen - pc - 1)) return -EINVAL; break; - case BPF_S_JMP_JEQ_K: - case BPF_S_JMP_JEQ_X: - case BPF_S_JMP_JGE_K: - case BPF_S_JMP_JGE_X: - case BPF_S_JMP_JGT_K: - case BPF_S_JMP_JGT_X: - case BPF_S_JMP_JSET_X: - case BPF_S_JMP_JSET_K: - /* for conditionals both must be safe */ + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: + /* Both conditionals must be safe */ if (pc + ftest->jt + 1 >= flen || pc + ftest->jf + 1 >= flen) return -EINVAL; break; - case BPF_S_LD_W_ABS: - case BPF_S_LD_H_ABS: - case BPF_S_LD_B_ABS: + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_B | BPF_ABS: anc_found = false; -#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \ - code = BPF_S_ANC_##CODE; \ - anc_found = true; \ - break - switch (ftest->k) { - ANCILLARY(PROTOCOL); - ANCILLARY(PKTTYPE); - ANCILLARY(IFINDEX); - ANCILLARY(NLATTR); - ANCILLARY(NLATTR_NEST); - ANCILLARY(MARK); - ANCILLARY(QUEUE); - ANCILLARY(HATYPE); - ANCILLARY(RXHASH); - ANCILLARY(CPU); - ANCILLARY(ALU_XOR_X); - ANCILLARY(VLAN_TAG); - ANCILLARY(VLAN_TAG_PRESENT); - ANCILLARY(PAY_OFFSET); - ANCILLARY(RANDOM); - } - - /* ancillary operation unknown or unsupported */ + if (bpf_anc_helper(ftest) & BPF_ANC) + anc_found = true; + /* Ancillary operation unknown or unsupported */ if (anc_found == false && ftest->k >= SKF_AD_OFF) return -EINVAL; } - ftest->code = code; } - /* last instruction must be a RET code */ + /* Last instruction must be a RET code */ switch (filter[flen - 1].code) { - case BPF_S_RET_K: - case BPF_S_RET_A: + case BPF_RET | BPF_K: + case BPF_RET | BPF_A: return check_load_and_stores(filter, flen); } + return -EINVAL; } EXPORT_SYMBOL(sk_chk_filter); @@ -1448,7 +1442,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, { struct sock_filter *old_prog; struct sk_filter *old_fp; - int i, err, new_len, old_len = fp->len; + int err, new_len, old_len = fp->len; /* We are free to overwrite insns et al right here as it * won't be used at this point in time anymore internally @@ -1458,13 +1452,6 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, BUILD_BUG_ON(sizeof(struct sock_filter) != sizeof(struct sock_filter_int)); - /* For now, we need to unfiddle BPF_S_* identifiers in place. - * This can sooner or later on be subject to removal, e.g. when - * JITs have been converted. - */ - for (i = 0; i < fp->len; i++) - sk_decode_filter(&fp->insns[i], &fp->insns[i]); - /* Conversion cannot happen on overlapping memory areas, * so we need to keep the user BPF around until the 2nd * pass. At this time, the user BPF is stored in fp->insns. @@ -1706,84 +1693,6 @@ int sk_detach_filter(struct sock *sk) } EXPORT_SYMBOL_GPL(sk_detach_filter); -void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) -{ - static const u16 decodes[] = { - [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K, - [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X, - [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K, - [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X, - [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K, - [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X, - [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X, - [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K, - [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X, - [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K, - [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X, - [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K, - [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X, - [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K, - [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X, - [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K, - [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X, - [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K, - [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X, - [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG, - [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS, - [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS, - [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_RANDOM] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN, - [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND, - [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND, - [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND, - [BPF_S_LD_IMM] = BPF_LD|BPF_IMM, - [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN, - [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH, - [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM, - [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX, - [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA, - [BPF_S_RET_K] = BPF_RET|BPF_K, - [BPF_S_RET_A] = BPF_RET|BPF_A, - [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K, - [BPF_S_LD_MEM] = BPF_LD|BPF_MEM, - [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM, - [BPF_S_ST] = BPF_ST, - [BPF_S_STX] = BPF_STX, - [BPF_S_JMP_JA] = BPF_JMP|BPF_JA, - [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K, - [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X, - [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K, - [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X, - [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K, - [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X, - [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K, - [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X, - }; - u16 code; - - code = filt->code; - - to->code = decodes[code]; - to->jt = filt->jt; - to->jf = filt->jf; - to->k = filt->k; -} - int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) { -- cgit v1.2.3-71-gd317 From f8f6d679aaa78b989d9aee8d2935066fbdca2a30 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 29 May 2014 10:22:51 +0200 Subject: net: filter: improve filter block macros Commit 9739eef13c92 ("net: filter: make BPF conversion more readable") started to introduce helper macros similar to BPF_STMT()/BPF_JUMP() macros from classic BPF. However, quite some statements in the filter conversion functions remained in the old style which gives a mixture of block macros and non block macros in the code. This patch makes the block macros itself more readable by using explicit member initialization, and converts the remaining ones where possible to remain in a more consistent state. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 255 +++++++++++++++++++++++++++++++++++++++---------- net/core/filter.c | 196 ++++++++++++++----------------------- 2 files changed, 277 insertions(+), 174 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 49ef7a298c92..f0c2ad43b4af 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -76,56 +76,211 @@ enum { /* BPF program can access up to 512 bytes of stack space. */ #define MAX_BPF_STACK 512 -/* bpf_add|sub|...: a += x, bpf_mov: a = x */ -#define BPF_ALU64_REG(op, a, x) \ - ((struct sock_filter_int) {BPF_ALU64|BPF_OP(op)|BPF_X, a, x, 0, 0}) -#define BPF_ALU32_REG(op, a, x) \ - ((struct sock_filter_int) {BPF_ALU|BPF_OP(op)|BPF_X, a, x, 0, 0}) - -/* bpf_add|sub|...: a += imm, bpf_mov: a = imm */ -#define BPF_ALU64_IMM(op, a, imm) \ - ((struct sock_filter_int) {BPF_ALU64|BPF_OP(op)|BPF_K, a, 0, 0, imm}) -#define BPF_ALU32_IMM(op, a, imm) \ - ((struct sock_filter_int) {BPF_ALU|BPF_OP(op)|BPF_K, a, 0, 0, imm}) - -/* R0 = *(uint *) (skb->data + off) */ -#define BPF_LD_ABS(size, off) \ - ((struct sock_filter_int) {BPF_LD|BPF_SIZE(size)|BPF_ABS, 0, 0, 0, off}) - -/* R0 = *(uint *) (skb->data + x + off) */ -#define BPF_LD_IND(size, x, off) \ - ((struct sock_filter_int) {BPF_LD|BPF_SIZE(size)|BPF_IND, 0, x, 0, off}) - -/* a = *(uint *) (x + off) */ -#define BPF_LDX_MEM(sz, a, x, off) \ - ((struct sock_filter_int) {BPF_LDX|BPF_SIZE(sz)|BPF_MEM, a, x, off, 0}) - -/* if (a 'op' x) goto pc+off */ -#define BPF_JMP_REG(op, a, x, off) \ - ((struct sock_filter_int) {BPF_JMP|BPF_OP(op)|BPF_X, a, x, off, 0}) - -/* if (a 'op' imm) goto pc+off */ -#define BPF_JMP_IMM(op, a, imm, off) \ - ((struct sock_filter_int) {BPF_JMP|BPF_OP(op)|BPF_K, a, 0, off, imm}) - -#define BPF_EXIT_INSN() \ - ((struct sock_filter_int) {BPF_JMP|BPF_EXIT, 0, 0, 0, 0}) - -static inline int size_to_bpf(int size) -{ - switch (size) { - case 1: - return BPF_B; - case 2: - return BPF_H; - case 4: - return BPF_W; - case 8: - return BPF_DW; - default: - return -EINVAL; - } -} +/* Helper macros for filter block array initializers. */ + +/* ALU ops on registers, bpf_add|sub|...: A += X */ + +#define BPF_ALU64_REG(OP, A, X) \ + ((struct sock_filter_int) { \ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ + .a_reg = A, \ + .x_reg = X, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_ALU32_REG(OP, A, X) \ + ((struct sock_filter_int) { \ + .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ + .a_reg = A, \ + .x_reg = X, \ + .off = 0, \ + .imm = 0 }) + +/* ALU ops on immediates, bpf_add|sub|...: A += IMM */ + +#define BPF_ALU64_IMM(OP, A, IMM) \ + ((struct sock_filter_int) { \ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ + .a_reg = A, \ + .x_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_ALU32_IMM(OP, A, IMM) \ + ((struct sock_filter_int) { \ + .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ + .a_reg = A, \ + .x_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */ + +#define BPF_ENDIAN(TYPE, A, LEN) \ + ((struct sock_filter_int) { \ + .code = BPF_ALU | BPF_END | BPF_SRC(TYPE), \ + .a_reg = A, \ + .x_reg = 0, \ + .off = 0, \ + .imm = LEN }) + +/* Short form of mov, A = X */ + +#define BPF_MOV64_REG(A, X) \ + ((struct sock_filter_int) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_X, \ + .a_reg = A, \ + .x_reg = X, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_MOV32_REG(A, X) \ + ((struct sock_filter_int) { \ + .code = BPF_ALU | BPF_MOV | BPF_X, \ + .a_reg = A, \ + .x_reg = X, \ + .off = 0, \ + .imm = 0 }) + +/* Short form of mov, A = IMM */ + +#define BPF_MOV64_IMM(A, IMM) \ + ((struct sock_filter_int) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_K, \ + .a_reg = A, \ + .x_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_MOV32_IMM(A, IMM) \ + ((struct sock_filter_int) { \ + .code = BPF_ALU | BPF_MOV | BPF_K, \ + .a_reg = A, \ + .x_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* Short form of mov based on type, BPF_X: A = X, BPF_K: A = IMM */ + +#define BPF_MOV64_RAW(TYPE, A, X, IMM) \ + ((struct sock_filter_int) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_SRC(TYPE), \ + .a_reg = A, \ + .x_reg = X, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_MOV32_RAW(TYPE, A, X, IMM) \ + ((struct sock_filter_int) { \ + .code = BPF_ALU | BPF_MOV | BPF_SRC(TYPE), \ + .a_reg = A, \ + .x_reg = X, \ + .off = 0, \ + .imm = IMM }) + +/* Direct packet access, R0 = *(uint *) (skb->data + OFF) */ + +#define BPF_LD_ABS(SIZE, OFF) \ + ((struct sock_filter_int) { \ + .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ + .a_reg = 0, \ + .x_reg = 0, \ + .off = 0, \ + .imm = OFF }) + +/* Indirect packet access, R0 = *(uint *) (skb->data + X + OFF) */ + +#define BPF_LD_IND(SIZE, X, OFF) \ + ((struct sock_filter_int) { \ + .code = BPF_LD | BPF_SIZE(SIZE) | BPF_IND, \ + .a_reg = 0, \ + .x_reg = X, \ + .off = 0, \ + .imm = OFF }) + +/* Memory store, A = *(uint *) (X + OFF), and vice versa */ + +#define BPF_LDX_MEM(SIZE, A, X, OFF) \ + ((struct sock_filter_int) { \ + .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ + .a_reg = A, \ + .x_reg = X, \ + .off = OFF, \ + .imm = 0 }) + +#define BPF_STX_MEM(SIZE, A, X, OFF) \ + ((struct sock_filter_int) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ + .a_reg = A, \ + .x_reg = X, \ + .off = OFF, \ + .imm = 0 }) + +/* Conditional jumps against registers, if (A 'op' X) goto pc + OFF */ + +#define BPF_JMP_REG(OP, A, X, OFF) \ + ((struct sock_filter_int) { \ + .code = BPF_JMP | BPF_OP(OP) | BPF_X, \ + .a_reg = A, \ + .x_reg = X, \ + .off = OFF, \ + .imm = 0 }) + +/* Conditional jumps against immediates, if (A 'op' IMM) goto pc + OFF */ + +#define BPF_JMP_IMM(OP, A, IMM, OFF) \ + ((struct sock_filter_int) { \ + .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ + .a_reg = A, \ + .x_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +/* Function call */ + +#define BPF_EMIT_CALL(FUNC) \ + ((struct sock_filter_int) { \ + .code = BPF_JMP | BPF_CALL, \ + .a_reg = 0, \ + .x_reg = 0, \ + .off = 0, \ + .imm = ((FUNC) - __bpf_call_base) }) + +/* Raw code statement block */ + +#define BPF_RAW_INSN(CODE, A, X, OFF, IMM) \ + ((struct sock_filter_int) { \ + .code = CODE, \ + .a_reg = A, \ + .x_reg = X, \ + .off = OFF, \ + .imm = IMM }) + +/* Program exit */ + +#define BPF_EXIT_INSN() \ + ((struct sock_filter_int) { \ + .code = BPF_JMP | BPF_EXIT, \ + .a_reg = 0, \ + .x_reg = 0, \ + .off = 0, \ + .imm = 0 }) + +#define bytes_to_bpf_size(bytes) \ +({ \ + int bpf_size = -EINVAL; \ + \ + if (bytes == sizeof(u8)) \ + bpf_size = BPF_B; \ + else if (bytes == sizeof(u16)) \ + bpf_size = BPF_H; \ + else if (bytes == sizeof(u32)) \ + bpf_size = BPF_W; \ + else if (bytes == sizeof(u64)) \ + bpf_size = BPF_DW; \ + \ + bpf_size; \ +}) /* Macro to invoke filter function. */ #define SK_RUN_FILTER(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi) diff --git a/net/core/filter.c b/net/core/filter.c index 328aaf6ff4d1..842f8393121d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -672,14 +672,10 @@ static bool convert_bpf_extensions(struct sock_filter *fp, BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); /* A = *(u16 *) (ctx + offsetof(protocol)) */ - *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, - offsetof(struct sk_buff, protocol)); - insn++; - + *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, protocol)); /* A = ntohs(A) [emitting a nop or swap16] */ - insn->code = BPF_ALU | BPF_END | BPF_FROM_BE; - insn->a_reg = BPF_REG_A; - insn->imm = 16; + *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16); break; case SKF_AD_OFF + SKF_AD_PKTTYPE: @@ -688,37 +684,27 @@ static bool convert_bpf_extensions(struct sock_filter *fp, if (insn->off < 0) return false; insn++; - *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX); break; case SKF_AD_OFF + SKF_AD_IFINDEX: case SKF_AD_OFF + SKF_AD_HATYPE: - *insn = BPF_LDX_MEM(size_to_bpf(FIELD_SIZEOF(struct sk_buff, dev)), - BPF_REG_TMP, BPF_REG_CTX, - offsetof(struct sk_buff, dev)); - insn++; - - /* if (tmp != 0) goto pc+1 */ - *insn = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1); - insn++; - - *insn = BPF_EXIT_INSN(); - insn++; - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); - - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_TMP; - - if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) { - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->off = offsetof(struct net_device, ifindex); - } else { - insn->code = BPF_LDX | BPF_MEM | BPF_H; - insn->off = offsetof(struct net_device, type); - } + BUILD_BUG_ON(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)) < 0); + + *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)), + BPF_REG_TMP, BPF_REG_CTX, + offsetof(struct sk_buff, dev)); + /* if (tmp != 0) goto pc + 1 */ + *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1); + *insn++ = BPF_EXIT_INSN(); + if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) + *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP, + offsetof(struct net_device, ifindex)); + else + *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP, + offsetof(struct net_device, type)); break; case SKF_AD_OFF + SKF_AD_MARK: @@ -745,22 +731,17 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_VLAN_TAG: case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); - - /* A = *(u16 *) (ctx + offsetof(vlan_tci)) */ - *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, - offsetof(struct sk_buff, vlan_tci)); - insn++; - BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + /* A = *(u16 *) (ctx + offsetof(vlan_tci)) */ + *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, vlan_tci)); if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) { *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, ~VLAN_TAG_PRESENT); } else { /* A >>= 12 */ - *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12); - insn++; - + *insn++ = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12); /* A &= 1 */ *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1); } @@ -772,34 +753,27 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_CPU: case SKF_AD_OFF + SKF_AD_RANDOM: /* arg1 = ctx */ - *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG1, BPF_REG_CTX); - insn++; - + *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX); /* arg2 = A */ - *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG2, BPF_REG_A); - insn++; - + *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A); /* arg3 = X */ - *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG3, BPF_REG_X); - insn++; - + *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X); /* Emit call(ctx, arg2=A, arg3=X) */ - insn->code = BPF_JMP | BPF_CALL; switch (fp->k) { case SKF_AD_OFF + SKF_AD_PAY_OFFSET: - insn->imm = __skb_get_pay_offset - __bpf_call_base; + *insn = BPF_EMIT_CALL(__skb_get_pay_offset); break; case SKF_AD_OFF + SKF_AD_NLATTR: - insn->imm = __skb_get_nlattr - __bpf_call_base; + *insn = BPF_EMIT_CALL(__skb_get_nlattr); break; case SKF_AD_OFF + SKF_AD_NLATTR_NEST: - insn->imm = __skb_get_nlattr_nest - __bpf_call_base; + *insn = BPF_EMIT_CALL(__skb_get_nlattr_nest); break; case SKF_AD_OFF + SKF_AD_CPU: - insn->imm = __get_raw_cpu_id - __bpf_call_base; + *insn = BPF_EMIT_CALL(__get_raw_cpu_id); break; case SKF_AD_OFF + SKF_AD_RANDOM: - insn->imm = __get_random_u32 - __bpf_call_base; + *insn = BPF_EMIT_CALL(__get_random_u32); break; } break; @@ -871,9 +845,8 @@ do_pass: new_insn = new_prog; fp = prog; - if (new_insn) { - *new_insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_CTX, BPF_REG_ARG1); - } + if (new_insn) + *new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); new_insn++; for (i = 0; i < len; fp++, i++) { @@ -921,17 +894,16 @@ do_pass: convert_bpf_extensions(fp, &insn)) break; - insn->code = fp->code; - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_X; - insn->imm = fp->k; + *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k); break; - /* Jump opcodes map as-is, but offsets need adjustment. */ - case BPF_JMP | BPF_JA: - target = i + fp->k + 1; - insn->code = fp->code; -#define EMIT_JMP \ + /* Jump transformation cannot use BPF block macros + * everywhere as offset calculation and target updates + * require a bit more work than the rest, i.e. jump + * opcodes map as-is, but offsets need adjustment. + */ + +#define BPF_EMIT_JMP \ do { \ if (target >= len || target < 0) \ goto err; \ @@ -940,7 +912,10 @@ do_pass: insn->off -= insn - tmp_insns; \ } while (0) - EMIT_JMP; + case BPF_JMP | BPF_JA: + target = i + fp->k + 1; + insn->code = fp->code; + BPF_EMIT_JMP; break; case BPF_JMP | BPF_JEQ | BPF_K: @@ -956,10 +931,7 @@ do_pass: * immediate into tmp register and use it * in compare insn. */ - insn->code = BPF_ALU | BPF_MOV | BPF_K; - insn->a_reg = BPF_REG_TMP; - insn->imm = fp->k; - insn++; + *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k); insn->a_reg = BPF_REG_A; insn->x_reg = BPF_REG_TMP; @@ -975,7 +947,7 @@ do_pass: if (fp->jf == 0) { insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; target = i + fp->jt + 1; - EMIT_JMP; + BPF_EMIT_JMP; break; } @@ -983,116 +955,94 @@ do_pass: if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) { insn->code = BPF_JMP | BPF_JNE | bpf_src; target = i + fp->jf + 1; - EMIT_JMP; + BPF_EMIT_JMP; break; } /* Other jumps are mapped into two insns: Jxx and JA. */ target = i + fp->jt + 1; insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; - EMIT_JMP; + BPF_EMIT_JMP; insn++; insn->code = BPF_JMP | BPF_JA; target = i + fp->jf + 1; - EMIT_JMP; + BPF_EMIT_JMP; break; /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */ case BPF_LDX | BPF_MSH | BPF_B: /* tmp = A */ - *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_TMP, BPF_REG_A); - insn++; - + *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A); /* A = BPF_R0 = *(u8 *) (skb->data + K) */ - *insn = BPF_LD_ABS(BPF_B, fp->k); - insn++; - + *insn++ = BPF_LD_ABS(BPF_B, fp->k); /* A &= 0xf */ - *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf); - insn++; - + *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf); /* A <<= 2 */ - *insn = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2); - insn++; - + *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2); /* X = A */ - *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_X, BPF_REG_A); - insn++; - + *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); /* A = tmp */ - *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_A, BPF_REG_TMP); + *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP); break; /* RET_K, RET_A are remaped into 2 insns. */ case BPF_RET | BPF_A: case BPF_RET | BPF_K: - insn->code = BPF_ALU | BPF_MOV | - (BPF_RVAL(fp->code) == BPF_K ? - BPF_K : BPF_X); - insn->a_reg = 0; - insn->x_reg = BPF_REG_A; - insn->imm = fp->k; - insn++; - + *insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ? + BPF_K : BPF_X, BPF_REG_0, + BPF_REG_A, fp->k); *insn = BPF_EXIT_INSN(); break; /* Store to stack. */ case BPF_ST: case BPF_STX: - insn->code = BPF_STX | BPF_MEM | BPF_W; - insn->a_reg = BPF_REG_FP; - insn->x_reg = fp->code == BPF_ST ? - BPF_REG_A : BPF_REG_X; - insn->off = -(BPF_MEMWORDS - fp->k) * 4; + *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) == + BPF_ST ? BPF_REG_A : BPF_REG_X, + -(BPF_MEMWORDS - fp->k) * 4); break; /* Load from stack. */ case BPF_LD | BPF_MEM: case BPF_LDX | BPF_MEM: - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? - BPF_REG_A : BPF_REG_X; - insn->x_reg = BPF_REG_FP; - insn->off = -(BPF_MEMWORDS - fp->k) * 4; + *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? + BPF_REG_A : BPF_REG_X, BPF_REG_FP, + -(BPF_MEMWORDS - fp->k) * 4); break; /* A = K or X = K */ case BPF_LD | BPF_IMM: case BPF_LDX | BPF_IMM: - insn->code = BPF_ALU | BPF_MOV | BPF_K; - insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? - BPF_REG_A : BPF_REG_X; - insn->imm = fp->k; + *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ? + BPF_REG_A : BPF_REG_X, fp->k); break; /* X = A */ case BPF_MISC | BPF_TAX: - *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_X, BPF_REG_A); + *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); break; /* A = X */ case BPF_MISC | BPF_TXA: - *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_A, BPF_REG_X); + *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X); break; /* A = skb->len or X = skb->len */ case BPF_LD | BPF_W | BPF_LEN: case BPF_LDX | BPF_W | BPF_LEN: - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? - BPF_REG_A : BPF_REG_X; - insn->x_reg = BPF_REG_CTX; - insn->off = offsetof(struct sk_buff, len); + *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? + BPF_REG_A : BPF_REG_X, BPF_REG_CTX, + offsetof(struct sk_buff, len)); break; - /* access seccomp_data fields */ + /* Access seccomp_data fields. */ case BPF_LDX | BPF_ABS | BPF_W: /* A = *(u32 *) (ctx + K) */ *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k); break; + /* Unkown instruction. */ default: goto err; } @@ -1101,7 +1051,6 @@ do_pass: if (new_prog) memcpy(new_insn, tmp_insns, sizeof(*insn) * (insn - tmp_insns)); - new_insn += insn - tmp_insns; } @@ -1116,7 +1065,6 @@ do_pass: new_flen = new_insn - new_prog; if (pass > 2) goto err; - goto do_pass; } -- cgit v1.2.3-71-gd317 From 96b2e73c5471542cb9c622c4360716684f8797ed Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 2 Jun 2014 00:18:48 -0700 Subject: Revert "net/mlx4_en: Use affinity hint" This reverts commit 70a640d0dae3a9b1b222ce673eb5d92c263ddd61. Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 6 +----- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 30 -------------------------- drivers/net/ethernet/mellanox/mlx4/eq.c | 13 +---------- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 - include/linux/mlx4/device.h | 2 +- 6 files changed, 4 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 58b1f239ac2b..199c7896f081 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1897,7 +1897,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) i, j, dev->pdev->bus->name); /* Set IRQ for specific name (per ring) */ if (mlx4_assign_eq(dev, name, NULL, - &ibdev->eq_table[eq], NULL)) { + &ibdev->eq_table[eq])) { /* Use legacy (same as mlx4_en driver) */ pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq); ibdev->eq_table[eq] = diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index ea2cd72e5368..636963db598a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -118,15 +118,11 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, if (cq->is_tx == RX) { if (mdev->dev->caps.comp_pool) { if (!cq->vector) { - struct mlx4_en_rx_ring *ring = - priv->rx_ring[cq->ring]; - sprintf(name, "%s-%d", priv->dev->name, cq->ring); /* Set IRQ for specific name (per ring) */ if (mlx4_assign_eq(mdev->dev, name, rmap, - &cq->vector, - ring->affinity_mask)) { + &cq->vector)) { cq->vector = (cq->ring + 1 + priv->port) % mdev->dev->caps.num_comp_vectors; mlx4_warn(mdev, "Failed assigning an EQ to %s, falling back to legacy EQ's\n", diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 05d135572abc..58209bd0c94c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1526,32 +1526,6 @@ static void mlx4_en_linkstate(struct work_struct *work) mutex_unlock(&mdev->state_lock); } -static void mlx4_en_init_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) -{ - struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx]; - int numa_node = priv->mdev->dev->numa_node; - - if (numa_node == -1) - return; - - if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL)) { - en_err(priv, "Failed to allocate core mask\n"); - return; - } - - if (cpumask_set_cpu_local_first(ring_idx, numa_node, - ring->affinity_mask)) { - en_err(priv, "Failed setting affinity hint\n"); - free_cpumask_var(ring->affinity_mask); - ring->affinity_mask = NULL; - } -} - -static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) -{ - free_cpumask_var(priv->rx_ring[ring_idx]->affinity_mask); - priv->rx_ring[ring_idx]->affinity_mask = NULL; -} int mlx4_en_start_port(struct net_device *dev) { @@ -1593,8 +1567,6 @@ int mlx4_en_start_port(struct net_device *dev) mlx4_en_cq_init_lock(cq); - mlx4_en_init_affinity_hint(priv, i); - err = mlx4_en_activate_cq(priv, cq, i); if (err) { en_err(priv, "Failed activating Rx CQ\n"); @@ -1875,8 +1847,6 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) msleep(1); mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); mlx4_en_deactivate_cq(priv, cq); - - mlx4_en_free_affinity_hint(priv, i); } } diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index f91659e5fa13..d954ec1eac17 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -1376,7 +1376,7 @@ int mlx4_test_interrupts(struct mlx4_dev *dev) EXPORT_SYMBOL(mlx4_test_interrupts); int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, - int *vector, cpumask_var_t cpu_hint_mask) + int *vector) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1411,15 +1411,6 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, } mlx4_assign_irq_notifier(priv, dev, priv->eq_table.eq[vec].irq); - if (cpu_hint_mask) { - err = irq_set_affinity_hint( - priv->eq_table.eq[vec].irq, - cpu_hint_mask); - if (err) { - mlx4_warn(dev, "Failed setting affinity hint\n"); - /*we dont want to break here*/ - } - } eq_set_ci(&priv->eq_table.eq[vec], 1); } @@ -1450,8 +1441,6 @@ void mlx4_release_eq(struct mlx4_dev *dev, int vec) irq_set_affinity_notifier( priv->eq_table.eq[vec].irq, NULL); - irq_set_affinity_hint(priv->eq_table.eq[vec].irq, - NULL); free_irq(priv->eq_table.eq[vec].irq, &priv->eq_table.eq[vec]); priv->msix_ctl.pool_bm &= ~(1ULL << i); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 0e15295bedd6..b5db1bf361dc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -313,7 +313,6 @@ struct mlx4_en_rx_ring { unsigned long csum_ok; unsigned long csum_none; int hwtstamp_rx_filter; - cpumask_var_t affinity_mask; }; struct mlx4_en_cq { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b9b70e00e3c1..ca38871a585c 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1161,7 +1161,7 @@ int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr); int mlx4_SYNC_TPT(struct mlx4_dev *dev); int mlx4_test_interrupts(struct mlx4_dev *dev); int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, - int *vector, cpumask_t *cpu_hint_mask); + int *vector); void mlx4_release_eq(struct mlx4_dev *dev, int vec); int mlx4_get_phys_port_id(struct mlx4_dev *dev); -- cgit v1.2.3-71-gd317 From f220e62659e968c3bbe4d96d73008832d19b9f77 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 13 May 2014 12:47:42 +0200 Subject: drm/plane: Fix sparse warnings Include the drm_plane_helper.h header file to fix the following sparse warnings: CHECK drivers/gpu/drm/drm_plane_helper.c drivers/gpu/drm/drm_plane_helper.c:102:5: warning: symbol 'drm_primary_helper_update' was not declared. Should it be static? drivers/gpu/drm/drm_plane_helper.c:219:5: warning: symbol 'drm_primary_helper_disable' was not declared. Should it be static? drivers/gpu/drm/drm_plane_helper.c:233:6: warning: symbol 'drm_primary_helper_destroy' was not declared. Should it be static? drivers/gpu/drm/drm_plane_helper.c:241:30: warning: symbol 'drm_primary_helper_funcs' was not declared. Should it be static? drivers/gpu/drm/drm_plane_helper.c:259:18: warning: symbol 'drm_primary_helper_create_plane' was not declared. Should it be static? Doing that makes gcc complain as follows: CC drivers/gpu/drm/drm_plane_helper.o drivers/gpu/drm/drm_plane_helper.c:260:19: error: conflicting types for 'drm_primary_helper_create_plane' struct drm_plane *drm_primary_helper_create_plane(struct drm_device *dev, ^ In file included from drivers/gpu/drm/drm_plane_helper.c:29:0: include/drm/drm_plane_helper.h:42:19: note: previous declaration of 'drm_primary_helper_create_plane' was here struct drm_plane *drm_primary_helper_create_plane(struct drm_device *dev, ^ drivers/gpu/drm/drm_plane_helper.c: In function 'drm_primary_helper_create_plane': drivers/gpu/drm/drm_plane_helper.c:274:11: warning: assignment discards 'const' qualifier from pointer target type formats = safe_modeset_formats; ^ In file included from include/linux/linkage.h:6:0, from include/linux/kernel.h:6, from include/drm/drmP.h:45, from drivers/gpu/drm/drm_plane_helper.c:27: drivers/gpu/drm/drm_plane_helper.c: At top level: drivers/gpu/drm/drm_plane_helper.c:289:15: error: conflicting types for 'drm_primary_helper_create_plane' EXPORT_SYMBOL(drm_primary_helper_create_plane); ^ include/linux/export.h:57:21: note: in definition of macro '__EXPORT_SYMBOL' extern typeof(sym) sym; \ ^ drivers/gpu/drm/drm_plane_helper.c:289:1: note: in expansion of macro 'EXPORT_SYMBOL' EXPORT_SYMBOL(drm_primary_helper_create_plane); ^ In file included from drivers/gpu/drm/drm_plane_helper.c:29:0: include/drm/drm_plane_helper.h:42:19: note: previous declaration of 'drm_primary_helper_create_plane' was here struct drm_plane *drm_primary_helper_create_plane(struct drm_device *dev, ^ Which can easily be fixed by making the signatures of the implementation and the prototype match. Signed-off-by: Thierry Reding Reviewed-by: Matt Roper Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_plane_helper.c | 1 + include/drm/drm_plane_helper.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c index d966afa7ecae..f16bb92db577 100644 --- a/drivers/gpu/drm/drm_plane_helper.c +++ b/drivers/gpu/drm/drm_plane_helper.c @@ -26,6 +26,7 @@ #include #include #include +#include #define SUBPIXEL_MASK 0xffff diff --git a/include/drm/drm_plane_helper.h b/include/drm/drm_plane_helper.h index 09824becee3e..c5e7ab9503c8 100644 --- a/include/drm/drm_plane_helper.h +++ b/include/drm/drm_plane_helper.h @@ -42,7 +42,7 @@ extern int drm_primary_helper_disable(struct drm_plane *plane); extern void drm_primary_helper_destroy(struct drm_plane *plane); extern const struct drm_plane_funcs drm_primary_helper_funcs; extern struct drm_plane *drm_primary_helper_create_plane(struct drm_device *dev, - uint32_t *formats, + const uint32_t *formats, int num_formats); -- cgit v1.2.3-71-gd317 From 63ff05910d9652f8a0c4dbb6c7772cfca5364f12 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Sun, 1 Jun 2014 16:06:53 +0200 Subject: include/scsi/osd_protocol.h: remove unnecessary __constant __constant_cpu_to_be16 converted to cpu_to_be16 This patch fixes checkpatch warnings: "WARNING: __constant_cpu_to_be16 should be cpu_to_be16" Signed-off-by: Fabian Frederick Ack-by: Boaz Harrosh Signed-off-by: Christoph Hellwig --- include/scsi/osd_protocol.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/scsi/osd_protocol.h b/include/scsi/osd_protocol.h index 25ac6283b9c7..a2594afe05c7 100644 --- a/include/scsi/osd_protocol.h +++ b/include/scsi/osd_protocol.h @@ -263,16 +263,16 @@ static inline struct osd_cdb_head *osd_cdb_head(struct osd_cdb *ocdb) * Ex name = FORMAT_OSD we have OSD_ACT_FORMAT_OSD && OSDv1_ACT_FORMAT_OSD */ #define OSD_ACT___(Name, Num) \ - OSD_ACT_##Name = __constant_cpu_to_be16(0x8880 + Num), \ - OSDv1_ACT_##Name = __constant_cpu_to_be16(0x8800 + Num), + OSD_ACT_##Name = cpu_to_be16(0x8880 + Num), \ + OSDv1_ACT_##Name = cpu_to_be16(0x8800 + Num), /* V2 only actions */ #define OSD_ACT_V2(Name, Num) \ - OSD_ACT_##Name = __constant_cpu_to_be16(0x8880 + Num), + OSD_ACT_##Name = cpu_to_be16(0x8880 + Num), #define OSD_ACT_V1_V2(Name, Num1, Num2) \ - OSD_ACT_##Name = __constant_cpu_to_be16(Num2), \ - OSDv1_ACT_##Name = __constant_cpu_to_be16(Num1), + OSD_ACT_##Name = cpu_to_be16(Num2), \ + OSDv1_ACT_##Name = cpu_to_be16(Num1), enum osd_service_actions { OSD_ACT_V2(OBJECT_STRUCTURE_CHECK, 0x00) -- cgit v1.2.3-71-gd317 From 670e5b8eaf85704742bc3cb1df51fdd3ce08fc15 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 28 May 2014 18:44:46 -0700 Subject: net: Add support for device specific address syncing This change provides a function to be used in order to break the ndo_set_rx_mode call into a set of address add and remove calls. The code is based on the implementation of dev_uc_sync/dev_mc_sync. Since they essentially do the same thing but with only one dev I simply named my functions __dev_uc_sync/__dev_mc_sync. I also implemented an unsync version of the functions as well to allow for cleanup on close. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 73 ++++++++++++++++++++++++++++++++++++++++ net/core/dev_addr_lists.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2db1610bf109..774e5391eb8e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3003,6 +3003,15 @@ int __hw_addr_sync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len); void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len); +int __hw_addr_sync_dev(struct netdev_hw_addr_list *list, + struct net_device *dev, + int (*sync)(struct net_device *, const unsigned char *), + int (*unsync)(struct net_device *, + const unsigned char *)); +void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list, + struct net_device *dev, + int (*unsync)(struct net_device *, + const unsigned char *)); void __hw_addr_init(struct netdev_hw_addr_list *list); /* Functions used for device addresses handling */ @@ -3023,6 +3032,38 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from); void dev_uc_flush(struct net_device *dev); void dev_uc_init(struct net_device *dev); +/** + * __dev_uc_sync - Synchonize device's unicast list + * @dev: device to sync + * @sync: function to call if address should be added + * @unsync: function to call if address should be removed + * + * Add newly added addresses to the interface, and release + * addresses that have been deleted. + **/ +static inline int __dev_uc_sync(struct net_device *dev, + int (*sync)(struct net_device *, + const unsigned char *), + int (*unsync)(struct net_device *, + const unsigned char *)) +{ + return __hw_addr_sync_dev(&dev->uc, dev, sync, unsync); +} + +/** + * __dev_uc_unsync - Remove synchonized addresses from device + * @dev: device to sync + * @unsync: function to call if address should be removed + * + * Remove all addresses that were added to the device by dev_uc_sync(). + **/ +static inline void __dev_uc_unsync(struct net_device *dev, + int (*unsync)(struct net_device *, + const unsigned char *)) +{ + __hw_addr_unsync_dev(&dev->uc, dev, unsync); +} + /* Functions used for multicast addresses handling */ int dev_mc_add(struct net_device *dev, const unsigned char *addr); int dev_mc_add_global(struct net_device *dev, const unsigned char *addr); @@ -3035,6 +3076,38 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from); void dev_mc_flush(struct net_device *dev); void dev_mc_init(struct net_device *dev); +/** + * __dev_mc_sync - Synchonize device's multicast list + * @dev: device to sync + * @sync: function to call if address should be added + * @unsync: function to call if address should be removed + * + * Add newly added addresses to the interface, and release + * addresses that have been deleted. + **/ +static inline int __dev_mc_sync(struct net_device *dev, + int (*sync)(struct net_device *, + const unsigned char *), + int (*unsync)(struct net_device *, + const unsigned char *)) +{ + return __hw_addr_sync_dev(&dev->mc, dev, sync, unsync); +} + +/** + * __dev_mc_unsync - Remove synchonized addresses from device + * @dev: device to sync + * @unsync: function to call if address should be removed + * + * Remove all addresses that were added to the device by dev_mc_sync(). + **/ +static inline void __dev_mc_unsync(struct net_device *dev, + int (*unsync)(struct net_device *, + const unsigned char *)) +{ + __hw_addr_unsync_dev(&dev->mc, dev, unsync); +} + /* Functions used for secondary unicast and multicast support */ void dev_set_rx_mode(struct net_device *dev); void __dev_set_rx_mode(struct net_device *dev); diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 329d5794e7dc..b6b230600b97 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -225,6 +225,91 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, } EXPORT_SYMBOL(__hw_addr_unsync); +/** + * __hw_addr_sync_dev - Synchonize device's multicast list + * @list: address list to syncronize + * @dev: device to sync + * @sync: function to call if address should be added + * @unsync: function to call if address should be removed + * + * This funciton is intended to be called from the ndo_set_rx_mode + * function of devices that require explicit address add/remove + * notifications. The unsync function may be NULL in which case + * the addresses requiring removal will simply be removed without + * any notification to the device. + **/ +int __hw_addr_sync_dev(struct netdev_hw_addr_list *list, + struct net_device *dev, + int (*sync)(struct net_device *, const unsigned char *), + int (*unsync)(struct net_device *, + const unsigned char *)) +{ + struct netdev_hw_addr *ha, *tmp; + int err; + + /* first go through and flush out any stale entries */ + list_for_each_entry_safe(ha, tmp, &list->list, list) { + if (!ha->sync_cnt || ha->refcount != 1) + continue; + + /* if unsync is defined and fails defer unsyncing address */ + if (unsync && unsync(dev, ha->addr)) + continue; + + ha->sync_cnt--; + __hw_addr_del_entry(list, ha, false, false); + } + + /* go through and sync new entries to the list */ + list_for_each_entry_safe(ha, tmp, &list->list, list) { + if (ha->sync_cnt) + continue; + + err = sync(dev, ha->addr); + if (err) + return err; + + ha->sync_cnt++; + ha->refcount++; + } + + return 0; +} +EXPORT_SYMBOL(__hw_addr_sync_dev); + +/** + * __hw_addr_unsync_dev - Remove synchonized addresses from device + * @list: address list to remove syncronized addresses from + * @dev: device to sync + * @unsync: function to call if address should be removed + * + * Remove all addresses that were added to the device by __hw_addr_sync_dev(). + * This function is intended to be called from the ndo_stop or ndo_open + * functions on devices that require explicit address add/remove + * notifications. If the unsync function pointer is NULL then this function + * can be used to just reset the sync_cnt for the addresses in the list. + **/ +void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list, + struct net_device *dev, + int (*unsync)(struct net_device *, + const unsigned char *)) +{ + struct netdev_hw_addr *ha, *tmp; + + list_for_each_entry_safe(ha, tmp, &list->list, list) { + if (!ha->sync_cnt) + continue; + + /* if unsync is defined and fails defer unsyncing address */ + if (unsync && unsync(dev, ha->addr)) + continue; + + ha->sync_cnt--; + __hw_addr_del_entry(list, ha, false, false); + } +} +EXPORT_SYMBOL(__hw_addr_unsync_dev); + static void __hw_addr_flush(struct netdev_hw_addr_list *list) { struct netdev_hw_addr *ha, *tmp; -- cgit v1.2.3-71-gd317 From 73f156a6e8c1074ac6327e0abd1169e95eb66463 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Jun 2014 05:26:03 -0700 Subject: inetpeer: get rid of ip_id_count Ideally, we would need to generate IP ID using a per destination IP generator. linux kernels used inet_peer cache for this purpose, but this had a huge cost on servers disabling MTU discovery. 1) each inet_peer struct consumes 192 bytes 2) inetpeer cache uses a binary tree of inet_peer structs, with a nominal size of ~66000 elements under load. 3) lookups in this tree are hitting a lot of cache lines, as tree depth is about 20. 4) If server deals with many tcp flows, we have a high probability of not finding the inet_peer, allocating a fresh one, inserting it in the tree with same initial ip_id_count, (cf secure_ip_id()) 5) We garbage collect inet_peer aggressively. IP ID generation do not have to be 'perfect' Goal is trying to avoid duplicates in a short period of time, so that reassembly units have a chance to complete reassembly of fragments belonging to one message before receiving other fragments with a recycled ID. We simply use an array of generators, and a Jenkin hash using the dst IP as a key. ipv6_select_ident() is put back into net/ipv6/ip6_output.c where it belongs (it is only used from this file) secure_ip_id() and secure_ipv6_id() no longer are needed. Rename ip_select_ident_more() to ip_select_ident_segs() to avoid unnecessary decrement/increment of the number of segments. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ppp/pptp.c | 2 +- include/net/inetpeer.h | 23 +++------------------ include/net/ip.h | 40 ++++++++++++++++++++---------------- include/net/ipv6.h | 2 -- include/net/secure_seq.h | 2 -- net/core/secure_seq.c | 25 ----------------------- net/ipv4/igmp.c | 4 ++-- net/ipv4/inetpeer.c | 18 ----------------- net/ipv4/ip_output.c | 7 +++---- net/ipv4/ip_tunnel_core.c | 2 +- net/ipv4/ipmr.c | 2 +- net/ipv4/raw.c | 2 +- net/ipv4/route.c | 45 +++++++++++++++-------------------------- net/ipv4/xfrm4_mode_tunnel.c | 2 +- net/ipv6/ip6_output.c | 12 +++++++++++ net/ipv6/output_core.c | 30 --------------------------- net/netfilter/ipvs/ip_vs_xmit.c | 2 +- 17 files changed, 65 insertions(+), 155 deletions(-) (limited to 'include') diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 01805319e1e0..1aff970be33e 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -281,7 +281,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) nf_reset(skb); skb->ip_summed = CHECKSUM_NONE; - ip_select_ident(skb, &rt->dst, NULL); + ip_select_ident(skb, NULL); ip_send_check(iph); ip_local_out(skb); diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 6efe73c79c52..823ec7bb9c67 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -41,14 +41,13 @@ struct inet_peer { struct rcu_head gc_rcu; }; /* - * Once inet_peer is queued for deletion (refcnt == -1), following fields - * are not available: rid, ip_id_count + * Once inet_peer is queued for deletion (refcnt == -1), following field + * is not available: rid * We can share memory with rcu_head to help keep inet_peer small. */ union { struct { atomic_t rid; /* Frag reception counter */ - atomic_t ip_id_count; /* IP ID for the next packet */ }; struct rcu_head rcu; struct inet_peer *gc_next; @@ -165,7 +164,7 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout); void inetpeer_invalidate_tree(struct inet_peer_base *); /* - * temporary check to make sure we dont access rid, ip_id_count, tcp_ts, + * temporary check to make sure we dont access rid, tcp_ts, * tcp_ts_stamp if no refcount is taken on inet_peer */ static inline void inet_peer_refcheck(const struct inet_peer *p) @@ -173,20 +172,4 @@ static inline void inet_peer_refcheck(const struct inet_peer *p) WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0); } - -/* can be called with or without local BH being disabled */ -static inline int inet_getid(struct inet_peer *p, int more) -{ - int old, new; - more++; - inet_peer_refcheck(p); - do { - old = atomic_read(&p->ip_id_count); - new = old + more; - if (!new) - new = 1; - } while (atomic_cmpxchg(&p->ip_id_count, old, new) != old); - return new; -} - #endif /* _NET_INETPEER_H */ diff --git a/include/net/ip.h b/include/net/ip.h index 2e4947895d75..0e795df05ec9 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -309,9 +309,19 @@ static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb) } } -void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more); +#define IP_IDENTS_SZ 2048u +extern atomic_t *ip_idents; -static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk) +static inline u32 ip_idents_reserve(u32 hash, int segs) +{ + atomic_t *id_ptr = ip_idents + hash % IP_IDENTS_SZ; + + return atomic_add_return(segs, id_ptr) - segs; +} + +void __ip_select_ident(struct iphdr *iph, int segs); + +static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, int segs) { struct iphdr *iph = ip_hdr(skb); @@ -321,24 +331,20 @@ static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, s * does not change, they drop every other packet in * a TCP stream using header compression. */ - iph->id = (sk && inet_sk(sk)->inet_daddr) ? - htons(inet_sk(sk)->inet_id++) : 0; - } else - __ip_select_ident(iph, dst, 0); -} - -static inline void ip_select_ident_more(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk, int more) -{ - struct iphdr *iph = ip_hdr(skb); - - if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) { if (sk && inet_sk(sk)->inet_daddr) { iph->id = htons(inet_sk(sk)->inet_id); - inet_sk(sk)->inet_id += 1 + more; - } else + inet_sk(sk)->inet_id += segs; + } else { iph->id = 0; - } else - __ip_select_ident(iph, dst, more); + } + } else { + __ip_select_ident(iph, segs); + } +} + +static inline void ip_select_ident(struct sk_buff *skb, struct sock *sk) +{ + ip_select_ident_segs(skb, sk, 1); } static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index ba810d0546bc..574337fe72dd 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -668,8 +668,6 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr)); } -void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt); - int ip6_dst_hoplimit(struct dst_entry *dst); static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6, diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index f257486f17be..3f36d45b714a 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -3,8 +3,6 @@ #include -__u32 secure_ip_id(__be32 daddr); -__u32 secure_ipv6_id(const __be32 daddr[4]); u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 897da56f3aff..ba71212f0251 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -85,31 +85,6 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET -__u32 secure_ip_id(__be32 daddr) -{ - u32 hash[MD5_DIGEST_WORDS]; - - net_secret_init(); - hash[0] = (__force __u32) daddr; - hash[1] = net_secret[13]; - hash[2] = net_secret[14]; - hash[3] = net_secret[15]; - - md5_transform(hash, net_secret); - - return hash[0]; -} - -__u32 secure_ipv6_id(const __be32 daddr[4]) -{ - __u32 hash[4]; - - net_secret_init(); - memcpy(hash, daddr, 16); - md5_transform(hash, net_secret); - - return hash[0]; -} __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 17d34e3c2ac3..6748d420f714 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -369,7 +369,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) pip->saddr = fl4.saddr; pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ - ip_select_ident(skb, &rt->dst, NULL); + ip_select_ident(skb, NULL); ((u8 *)&pip[1])[0] = IPOPT_RA; ((u8 *)&pip[1])[1] = 4; ((u8 *)&pip[1])[2] = 0; @@ -714,7 +714,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, iph->daddr = dst; iph->saddr = fl4.saddr; iph->protocol = IPPROTO_IGMP; - ip_select_ident(skb, &rt->dst, NULL); + ip_select_ident(skb, NULL); ((u8 *)&iph[1])[0] = IPOPT_RA; ((u8 *)&iph[1])[1] = 4; ((u8 *)&iph[1])[2] = 0; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index c98cf141f4ed..4ced1b9a97f0 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -26,20 +26,7 @@ * Theory of operations. * We keep one entry for each peer IP address. The nodes contains long-living * information about the peer which doesn't depend on routes. - * At this moment this information consists only of ID field for the next - * outgoing IP packet. This field is incremented with each packet as encoded - * in inet_getid() function (include/net/inetpeer.h). - * At the moment of writing this notes identifier of IP packets is generated - * to be unpredictable using this code only for packets subjected - * (actually or potentially) to defragmentation. I.e. DF packets less than - * PMTU in size when local fragmentation is disabled use a constant ID and do - * not use this code (see ip_select_ident() in include/net/ip.h). * - * Route cache entries hold references to our nodes. - * New cache entries get references via lookup by destination IP address in - * the avl tree. The reference is grabbed only when it's needed i.e. only - * when we try to output IP packet which needs an unpredictable ID (see - * __ip_select_ident() in net/ipv4/route.c). * Nodes are removed only when reference counter goes to 0. * When it's happened the node may be removed when a sufficient amount of * time has been passed since its last use. The less-recently-used entry can @@ -62,7 +49,6 @@ * refcnt: atomically against modifications on other CPU; * usually under some other lock to prevent node disappearing * daddr: unchangeable - * ip_id_count: atomic value (no lock needed) */ static struct kmem_cache *peer_cachep __read_mostly; @@ -497,10 +483,6 @@ relookup: p->daddr = *daddr; atomic_set(&p->refcnt, 1); atomic_set(&p->rid, 0); - atomic_set(&p->ip_id_count, - (daddr->family == AF_INET) ? - secure_ip_id(daddr->addr.a4) : - secure_ipv6_id(daddr->addr.a6)); p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; /* 60*HZ is arbitrary, but chosen enough high so that the first diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 6e231ab58d65..8d3b6b0e9857 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -148,7 +148,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); iph->saddr = saddr; iph->protocol = sk->sk_protocol; - ip_select_ident(skb, &rt->dst, sk); + ip_select_ident(skb, sk); if (opt && opt->opt.optlen) { iph->ihl += opt->opt.optlen>>2; @@ -430,8 +430,7 @@ packet_routed: ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); } - ip_select_ident_more(skb, &rt->dst, sk, - (skb_shinfo(skb)->gso_segs ?: 1) - 1); + ip_select_ident_segs(skb, sk, skb_shinfo(skb)->gso_segs ?: 1); /* TODO : should we use skb->sk here instead of sk ? */ skb->priority = sk->sk_priority; @@ -1379,7 +1378,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, iph->ttl = ttl; iph->protocol = sk->sk_protocol; ip_copy_addrs(iph, fl4); - ip_select_ident(skb, &rt->dst, sk); + ip_select_ident(skb, sk); if (opt) { iph->ihl += opt->optlen>>2; diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index bcf206c79005..847e69cbff7e 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -74,7 +74,7 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, iph->daddr = dst; iph->saddr = src; iph->ttl = ttl; - __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1); + __ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1); err = ip_local_out_sk(sk, skb); if (unlikely(net_xmit_eval(err))) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 2bc9cc47f246..65bcaa789043 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1663,7 +1663,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) iph->protocol = IPPROTO_IPIP; iph->ihl = 5; iph->tot_len = htons(skb->len); - ip_select_ident(skb, skb_dst(skb), NULL); + ip_select_ident(skb, NULL); ip_send_check(iph); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index a9dbe58bdfe7..2c65160565e1 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -389,7 +389,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, iph->check = 0; iph->tot_len = htons(length); if (!iph->id) - ip_select_ident(skb, &rt->dst, NULL); + ip_select_ident(skb, NULL); iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4154eb76b0ad..082239ffe34a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include #include @@ -456,39 +457,19 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, return neigh_create(&arp_tbl, pkey, dev); } -/* - * Peer allocation may fail only in serious out-of-memory conditions. However - * we still can generate some output. - * Random ID selection looks a bit dangerous because we have no chances to - * select ID being unique in a reasonable period of time. - * But broken packet identifier may be better than no packet at all. - */ -static void ip_select_fb_ident(struct iphdr *iph) -{ - static DEFINE_SPINLOCK(ip_fb_id_lock); - static u32 ip_fallback_id; - u32 salt; +atomic_t *ip_idents __read_mostly; +EXPORT_SYMBOL(ip_idents); - spin_lock_bh(&ip_fb_id_lock); - salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr); - iph->id = htons(salt & 0xFFFF); - ip_fallback_id = salt; - spin_unlock_bh(&ip_fb_id_lock); -} - -void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) +void __ip_select_ident(struct iphdr *iph, int segs) { - struct net *net = dev_net(dst->dev); - struct inet_peer *peer; + static u32 ip_idents_hashrnd __read_mostly; + u32 hash, id; - peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1); - if (peer) { - iph->id = htons(inet_getid(peer, more)); - inet_putpeer(peer); - return; - } + net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd)); - ip_select_fb_ident(iph); + hash = jhash_1word((__force u32)iph->daddr, ip_idents_hashrnd); + id = ip_idents_reserve(hash, segs); + iph->id = htons(id); } EXPORT_SYMBOL(__ip_select_ident); @@ -2711,6 +2692,12 @@ int __init ip_rt_init(void) { int rc = 0; + ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL); + if (!ip_idents) + panic("IP: failed to allocate ip_idents\n"); + + prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents)); + #ifdef CONFIG_IP_ROUTE_CLASSID ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); if (!ip_rt_acct) diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 05f2b484954f..91771a7c802f 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -58,12 +58,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); - ip_select_ident(skb, dst->child, NULL); top_iph->ttl = ip4_dst_hoplimit(dst->child); top_iph->saddr = x->props.saddr.a4; top_iph->daddr = x->id.daddr.a4; + ip_select_ident(skb, NULL); return 0; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 85aaeca1f7f3..cb9df0eb4023 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -537,6 +537,18 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) skb_copy_secmark(to, from); } +static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) +{ + static u32 ip6_idents_hashrnd __read_mostly; + u32 hash, id; + + net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); + + hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd); + id = ip_idents_reserve(hash, 1); + fhdr->identification = htonl(id); +} + int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 6313abd53c9d..6179ac186ab9 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -8,36 +8,6 @@ #include #include -void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) -{ - static atomic_t ipv6_fragmentation_id; - struct in6_addr addr; - int old, new; - -#if IS_ENABLED(CONFIG_IPV6) - struct inet_peer *peer; - struct net *net; - - net = dev_net(rt->dst.dev); - peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); - if (peer) { - fhdr->identification = htonl(inet_getid(peer, 0)); - inet_putpeer(peer); - return; - } -#endif - do { - old = atomic_read(&ipv6_fragmentation_id); - new = old + 1; - if (!new) - new = 1; - } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old); - - addr = rt->rt6i_dst.addr; - addr.s6_addr32[0] ^= (__force __be32)new; - fhdr->identification = htonl(secure_ipv6_id(addr.s6_addr32)); -} -EXPORT_SYMBOL(ipv6_select_ident); int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) { diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 487b55e04337..73ba1cc7a88d 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -883,7 +883,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, iph->daddr = cp->daddr.ip; iph->saddr = saddr; iph->ttl = old_iph->ttl; - ip_select_ident(skb, &rt->dst, NULL); + ip_select_ident(skb, NULL); /* Another hack: avoid icmp_send in ip_fragment */ skb->ignore_df = 1; -- cgit v1.2.3-71-gd317 From 1b49dcf3d7c765ad18ca7167a0e441824eb1f7af Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 16 Mar 2014 14:51:35 -0700 Subject: virtio-scsi.h: Add virtio_scsi_cmd_req_pi + VIRTIO_SCSI_F_T10_PI bits This patch adds a virtio_scsi_cmd_req_pi header as recommened by Paolo that contains pi_bytesout + pi_bytesin elements used for signaling when protection information buffers (in bytes) are expected to preceed the data payload buffers. Also add new VIRTIO_SCSI_F_T10_PI feature bit to be used to signal host support. Cc: Paolo Bonzini Cc: Martin K. Petersen Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Sagi Grimberg Cc: H. Peter Anvin Acked-by: Rusty Russell Acked-by: Michael S. Tsirkin Signed-off-by: Nicholas Bellinger --- include/linux/virtio_scsi.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/virtio_scsi.h b/include/linux/virtio_scsi.h index 4195b97a3def..de429d1f4357 100644 --- a/include/linux/virtio_scsi.h +++ b/include/linux/virtio_scsi.h @@ -35,11 +35,23 @@ struct virtio_scsi_cmd_req { u8 lun[8]; /* Logical Unit Number */ u64 tag; /* Command identifier */ u8 task_attr; /* Task attribute */ - u8 prio; + u8 prio; /* SAM command priority field */ u8 crn; u8 cdb[VIRTIO_SCSI_CDB_SIZE]; } __packed; +/* SCSI command request, followed by protection information */ +struct virtio_scsi_cmd_req_pi { + u8 lun[8]; /* Logical Unit Number */ + u64 tag; /* Command identifier */ + u8 task_attr; /* Task attribute */ + u8 prio; /* SAM command priority field */ + u8 crn; + u32 pi_bytesout; /* DataOUT PI Number of bytes */ + u32 pi_bytesin; /* DataIN PI Number of bytes */ + u8 cdb[VIRTIO_SCSI_CDB_SIZE]; +} __packed; + /* Response, followed by sense data and data-in */ struct virtio_scsi_cmd_resp { u32 sense_len; /* Sense data length */ @@ -97,6 +109,7 @@ struct virtio_scsi_config { #define VIRTIO_SCSI_F_INOUT 0 #define VIRTIO_SCSI_F_HOTPLUG 1 #define VIRTIO_SCSI_F_CHANGE 2 +#define VIRTIO_SCSI_F_T10_PI 3 /* Response codes */ #define VIRTIO_SCSI_S_OK 0 -- cgit v1.2.3-71-gd317 From 09b93088d75009807b72293f26e2634430ce5ba9 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 11 May 2014 15:15:11 +0300 Subject: IB: Add a QP creation flag to use GFP_NOIO allocations This addresses a problem where NFS client writes over IPoIB connected mode may deadlock on memory allocation/writeback. The problem is not directly memory reclamation. There is an indirect dependency between network filesystems writing back pages and ipoib_cm_tx_init() due to how a kworker is used. Page reclaim cannot make forward progress until ipoib_cm_tx_init() succeeds and it is stuck in page reclaim itself waiting for network transmission. Ordinarily this situation may be avoided by having the caller use GFP_NOFS but ipoib_cm_tx_init() does not have that information. To address this, take a general approach and add a new QP creation flag that tells the low-level hardware driver to use GFP_NOIO for the memory allocations related to the new QP. Use the new flag in the ipoib connected mode path, and if the driver doesn't support it, re-issue the QP creation without the flag. Signed-off-by: Mel Gorman Signed-off-by: Jiri Kosina Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 18 +++++++++++++++--- include/rdma/ib_verbs.h | 1 + 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 1377f85911c2..933efcea0d03 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1030,10 +1030,20 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ .cap.max_send_sge = 1, .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_RC, - .qp_context = tx + .qp_context = tx, + .create_flags = IB_QP_CREATE_USE_GFP_NOIO }; - return ib_create_qp(priv->pd, &attr); + struct ib_qp *tx_qp; + + tx_qp = ib_create_qp(priv->pd, &attr); + if (PTR_ERR(tx_qp) == -EINVAL) { + ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n", + priv->ca->name); + attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO; + tx_qp = ib_create_qp(priv->pd, &attr); + } + return tx_qp; } static int ipoib_cm_send_req(struct net_device *dev, @@ -1104,12 +1114,14 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, struct ipoib_dev_priv *priv = netdev_priv(p->dev); int ret; - p->tx_ring = vzalloc(ipoib_sendq_size * sizeof *p->tx_ring); + p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring, + GFP_NOIO, PAGE_KERNEL); if (!p->tx_ring) { ipoib_warn(priv, "failed to allocate tx ring\n"); ret = -ENOMEM; goto err_tx; } + memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring); p->qp = ipoib_cm_create_tx_qp(p->dev, p); if (IS_ERR(p->qp)) { diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index acd825182977..d75b02f9c80d 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -783,6 +783,7 @@ enum ib_qp_create_flags { IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, IB_QP_CREATE_NETIF_QP = 1 << 5, IB_QP_CREATE_SIGNATURE_EN = 1 << 6, + IB_QP_CREATE_USE_GFP_NOIO = 1 << 7, /* reserve bits 26-31 for low level drivers' internal use */ IB_QP_CREATE_RESERVED_START = 1 << 26, IB_QP_CREATE_RESERVED_END = 1 << 31, -- cgit v1.2.3-71-gd317 From 40f2287bd583f4df4c602c1a29a48df2730fb6d4 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Sun, 11 May 2014 15:15:12 +0300 Subject: IB/mlx4: Implement IB_QP_CREATE_USE_GFP_NOIO Modify the various routines used to allocate memory resources which serve QPs in mlx4 to get an input GFP directive. Have the Ethernet driver to use GFP_KERNEL in it's QP allocations as done prior to this commit, and the IB driver to use GFP_NOIO when the IB verbs IB_QP_CREATE_USE_GFP_NOIO QP creation flag is provided. Signed-off-by: Mel Gorman Signed-off-by: Jiri Kosina Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/cq.c | 6 ++--- drivers/infiniband/hw/mlx4/mlx4_ib.h | 1 + drivers/infiniband/hw/mlx4/qp.c | 30 ++++++++++++---------- drivers/infiniband/hw/mlx4/srq.c | 7 ++--- drivers/net/ethernet/mellanox/mlx4/alloc.c | 27 +++++++++---------- drivers/net/ethernet/mellanox/mlx4/cq.c | 4 +-- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 6 ++--- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 2 +- drivers/net/ethernet/mellanox/mlx4/icm.c | 7 ++--- drivers/net/ethernet/mellanox/mlx4/icm.h | 3 ++- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 4 +-- drivers/net/ethernet/mellanox/mlx4/mr.c | 17 ++++++------ drivers/net/ethernet/mellanox/mlx4/qp.c | 20 +++++++-------- .../net/ethernet/mellanox/mlx4/resource_tracker.c | 4 +-- drivers/net/ethernet/mellanox/mlx4/srq.c | 4 +-- include/linux/mlx4/device.h | 10 +++++--- 16 files changed, 82 insertions(+), 70 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 5f640814cc81..1066eec854a9 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -102,7 +102,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf * int err; err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size, - PAGE_SIZE * 2, &buf->buf); + PAGE_SIZE * 2, &buf->buf, GFP_KERNEL); if (err) goto out; @@ -113,7 +113,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf * if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf); + err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf, GFP_KERNEL); if (err) goto err_mtt; @@ -209,7 +209,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector uar = &to_mucontext(context)->uar; } else { - err = mlx4_db_alloc(dev->dev, &cq->db, 1); + err = mlx4_db_alloc(dev->dev, &cq->db, 1, GFP_KERNEL); if (err) goto err_cq; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index f589522fddfd..bb8c9dd442ae 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -156,6 +156,7 @@ enum mlx4_ib_qp_flags { MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP, + MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO, MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30, MLX4_IB_SRIOV_SQP = 1 << 31, }; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 41308af4163c..8710baf60bb9 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -610,7 +610,8 @@ static int qp_has_rq(struct ib_qp_init_attr *attr) static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, - struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp) + struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp, + gfp_t gfp) { int qpn; int err; @@ -748,14 +749,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err; if (qp_has_rq(init_attr)) { - err = mlx4_db_alloc(dev->dev, &qp->db, 0); + err = mlx4_db_alloc(dev->dev, &qp->db, 0, gfp); if (err) goto err; *qp->db.db = 0; } - if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) { + if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf, gfp)) { err = -ENOMEM; goto err_db; } @@ -765,13 +766,12 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf); + err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf, gfp); if (err) goto err_mtt; - qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL); - qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL); - + qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp); + qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp); if (!qp->sq.wrid || !qp->rq.wrid) { err = -ENOMEM; goto err_wrid; @@ -801,7 +801,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err_proxy; } - err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp); + err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp); if (err) goto err_qpn; @@ -1040,7 +1040,10 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp = NULL; int err; u16 xrcdn = 0; + gfp_t gfp; + gfp = (init_attr->create_flags & MLX4_IB_QP_CREATE_USE_GFP_NOIO) ? + GFP_NOIO : GFP_KERNEL; /* * We only support LSO, vendor flag1, and multicast loopback blocking, * and only for kernel UD QPs. @@ -1049,7 +1052,8 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK | MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP | - MLX4_IB_QP_NETIF)) + MLX4_IB_QP_NETIF | + MLX4_IB_QP_CREATE_USE_GFP_NOIO)) return ERR_PTR(-EINVAL); if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { @@ -1059,7 +1063,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, if (init_attr->create_flags && (udata || - ((init_attr->create_flags & ~MLX4_IB_SRIOV_SQP) && + ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) && init_attr->qp_type != IB_QPT_UD) || ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) && init_attr->qp_type > IB_QPT_GSI))) @@ -1079,7 +1083,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_RAW_PACKET: - qp = kzalloc(sizeof *qp, GFP_KERNEL); + qp = kzalloc(sizeof *qp, gfp); if (!qp) return ERR_PTR(-ENOMEM); qp->pri.vid = 0xFFFF; @@ -1088,7 +1092,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, case IB_QPT_UD: { err = create_qp_common(to_mdev(pd->device), pd, init_attr, - udata, 0, &qp); + udata, 0, &qp, gfp); if (err) return ERR_PTR(err); @@ -1106,7 +1110,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, get_sqp_num(to_mdev(pd->device), init_attr), - &qp); + &qp, gfp); if (err) return ERR_PTR(err); diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 60c5fb025fc7..62d9285300af 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -134,13 +134,14 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (err) goto err_mtt; } else { - err = mlx4_db_alloc(dev->dev, &srq->db, 0); + err = mlx4_db_alloc(dev->dev, &srq->db, 0, GFP_KERNEL); if (err) goto err_srq; *srq->db.db = 0; - if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) { + if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf, + GFP_KERNEL)) { err = -ENOMEM; goto err_db; } @@ -165,7 +166,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf); + err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf, GFP_KERNEL); if (err) goto err_mtt; diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index c3ad464d0627..b0297da50304 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -171,7 +171,7 @@ void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap) */ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, - struct mlx4_buf *buf) + struct mlx4_buf *buf, gfp_t gfp) { dma_addr_t t; @@ -180,7 +180,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, buf->npages = 1; buf->page_shift = get_order(size) + PAGE_SHIFT; buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev, - size, &t, GFP_KERNEL); + size, &t, gfp); if (!buf->direct.buf) return -ENOMEM; @@ -200,14 +200,14 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, buf->npages = buf->nbufs; buf->page_shift = PAGE_SHIFT; buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list), - GFP_KERNEL); + gfp); if (!buf->page_list) return -ENOMEM; for (i = 0; i < buf->nbufs; ++i) { buf->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, - &t, GFP_KERNEL); + &t, gfp); if (!buf->page_list[i].buf) goto err_free; @@ -218,7 +218,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, if (BITS_PER_LONG == 64) { struct page **pages; - pages = kmalloc(sizeof *pages * buf->nbufs, GFP_KERNEL); + pages = kmalloc(sizeof *pages * buf->nbufs, gfp); if (!pages) goto err_free; for (i = 0; i < buf->nbufs; ++i) @@ -260,11 +260,12 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) } EXPORT_SYMBOL_GPL(mlx4_buf_free); -static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device) +static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device, + gfp_t gfp) { struct mlx4_db_pgdir *pgdir; - pgdir = kzalloc(sizeof *pgdir, GFP_KERNEL); + pgdir = kzalloc(sizeof *pgdir, gfp); if (!pgdir) return NULL; @@ -272,7 +273,7 @@ static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device) pgdir->bits[0] = pgdir->order0; pgdir->bits[1] = pgdir->order1; pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE, - &pgdir->db_dma, GFP_KERNEL); + &pgdir->db_dma, gfp); if (!pgdir->db_page) { kfree(pgdir); return NULL; @@ -312,7 +313,7 @@ found: return 0; } -int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order) +int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_db_pgdir *pgdir; @@ -324,7 +325,7 @@ int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order) if (!mlx4_alloc_db_from_pgdir(pgdir, db, order)) goto out; - pgdir = mlx4_alloc_db_pgdir(&(dev->pdev->dev)); + pgdir = mlx4_alloc_db_pgdir(&(dev->pdev->dev), gfp); if (!pgdir) { ret = -ENOMEM; goto out; @@ -376,13 +377,13 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, { int err; - err = mlx4_db_alloc(dev, &wqres->db, 1); + err = mlx4_db_alloc(dev, &wqres->db, 1, GFP_KERNEL); if (err) return err; *wqres->db.db = 0; - err = mlx4_buf_alloc(dev, size, max_direct, &wqres->buf); + err = mlx4_buf_alloc(dev, size, max_direct, &wqres->buf, GFP_KERNEL); if (err) goto err_db; @@ -391,7 +392,7 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf); + err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf, GFP_KERNEL); if (err) goto err_mtt; diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index 0487121e4a0f..c90cde5b4aee 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -173,11 +173,11 @@ int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn) if (*cqn == -1) return -ENOMEM; - err = mlx4_table_get(dev, &cq_table->table, *cqn); + err = mlx4_table_get(dev, &cq_table->table, *cqn, GFP_KERNEL); if (err) goto err_out; - err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn); + err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn, GFP_KERNEL); if (err) goto err_put; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index ba049ae88749..87857a6463eb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -972,7 +972,7 @@ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn, if (!context) return -ENOMEM; - err = mlx4_qp_alloc(mdev->dev, qpn, qp); + err = mlx4_qp_alloc(mdev->dev, qpn, qp, GFP_KERNEL); if (err) { en_err(priv, "Failed to allocate qp #%x\n", qpn); goto out; @@ -1012,7 +1012,7 @@ int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv) en_err(priv, "Failed reserving drop qpn\n"); return err; } - err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp); + err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp, GFP_KERNEL); if (err) { en_err(priv, "Failed allocating drop qp\n"); mlx4_qp_release_range(priv->mdev->dev, qpn, 1); @@ -1071,7 +1071,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) } /* Configure RSS indirection qp */ - err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp); + err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp, GFP_KERNEL); if (err) { en_err(priv, "Failed to allocate RSS indirection QP\n"); goto rss_err; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index dd1f6d346459..bc0cc1eb214d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -113,7 +113,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring->buf_size, (unsigned long long) ring->wqres.buf.direct.map); ring->qpn = qpn; - err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp); + err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp, GFP_KERNEL); if (err) { en_err(priv, "Failed allocating qp %d\n", ring->qpn); goto err_map; diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index 5fbf4924c272..eb1747e1937d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -245,7 +245,8 @@ int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev) MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); } -int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj) +int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj, + int gfp) { u32 i = (obj & (table->num_obj - 1)) / (MLX4_TABLE_CHUNK_SIZE / table->obj_size); @@ -259,7 +260,7 @@ int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj) } table->icm[i] = mlx4_alloc_icm(dev, MLX4_TABLE_CHUNK_SIZE >> PAGE_SHIFT, - (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) | + (table->lowmem ? gfp : GFP_HIGHUSER) | __GFP_NOWARN, table->coherent); if (!table->icm[i]) { ret = -ENOMEM; @@ -356,7 +357,7 @@ int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 i; for (i = start; i <= end; i += inc) { - err = mlx4_table_get(dev, table, i); + err = mlx4_table_get(dev, table, i, GFP_KERNEL); if (err) goto fail; } diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.h b/drivers/net/ethernet/mellanox/mlx4/icm.h index dee67fa39107..067e6e0af36c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.h +++ b/drivers/net/ethernet/mellanox/mlx4/icm.h @@ -71,7 +71,8 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, gfp_t gfp_mask, int coherent); void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent); -int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj); +int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj, + int gfp); void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj); int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 start, u32 end); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index f9c465101963..627a54ef2955 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -888,7 +888,7 @@ void mlx4_cleanup_cq_table(struct mlx4_dev *dev); void mlx4_cleanup_qp_table(struct mlx4_dev *dev); void mlx4_cleanup_srq_table(struct mlx4_dev *dev); void mlx4_cleanup_mcg_table(struct mlx4_dev *dev); -int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn); +int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp); void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn); int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn); void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn); @@ -896,7 +896,7 @@ int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn); void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn); int __mlx4_mpt_reserve(struct mlx4_dev *dev); void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index); -int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index); +int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp); void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index); u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order); void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg, int order); diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 24835853b753..4c71dafad217 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -364,14 +364,14 @@ static void mlx4_mpt_release(struct mlx4_dev *dev, u32 index) __mlx4_mpt_release(dev, index); } -int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) +int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp) { struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; - return mlx4_table_get(dev, &mr_table->dmpt_table, index); + return mlx4_table_get(dev, &mr_table->dmpt_table, index, gfp); } -static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) +static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp) { u64 param = 0; @@ -382,7 +382,7 @@ static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } - return __mlx4_mpt_alloc_icm(dev, index); + return __mlx4_mpt_alloc_icm(dev, index, gfp); } void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index) @@ -469,7 +469,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr) struct mlx4_mpt_entry *mpt_entry; int err; - err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key)); + err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key), GFP_KERNEL); if (err) return err; @@ -627,13 +627,14 @@ int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, EXPORT_SYMBOL_GPL(mlx4_write_mtt); int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, - struct mlx4_buf *buf) + struct mlx4_buf *buf, gfp_t gfp) { u64 *page_list; int err; int i; - page_list = kmalloc(buf->npages * sizeof *page_list, GFP_KERNEL); + page_list = kmalloc(buf->npages * sizeof *page_list, + gfp); if (!page_list) return -ENOMEM; @@ -680,7 +681,7 @@ int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw) struct mlx4_mpt_entry *mpt_entry; int err; - err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key)); + err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key), GFP_KERNEL); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 61d64ebffd56..917f0d0ba7c6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -272,29 +272,29 @@ void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt) } EXPORT_SYMBOL_GPL(mlx4_qp_release_range); -int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn) +int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_qp_table *qp_table = &priv->qp_table; int err; - err = mlx4_table_get(dev, &qp_table->qp_table, qpn); + err = mlx4_table_get(dev, &qp_table->qp_table, qpn, gfp); if (err) goto err_out; - err = mlx4_table_get(dev, &qp_table->auxc_table, qpn); + err = mlx4_table_get(dev, &qp_table->auxc_table, qpn, gfp); if (err) goto err_put_qp; - err = mlx4_table_get(dev, &qp_table->altc_table, qpn); + err = mlx4_table_get(dev, &qp_table->altc_table, qpn, gfp); if (err) goto err_put_auxc; - err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn); + err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn, gfp); if (err) goto err_put_altc; - err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn); + err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn, gfp); if (err) goto err_put_rdmarc; @@ -316,7 +316,7 @@ err_out: return err; } -static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn) +static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, int gfp) { u64 param = 0; @@ -326,7 +326,7 @@ static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn) MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } - return __mlx4_qp_alloc_icm(dev, qpn); + return __mlx4_qp_alloc_icm(dev, qpn, gfp); } void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn) @@ -355,7 +355,7 @@ static void mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn) __mlx4_qp_free_icm(dev, qpn); } -int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp) +int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, gfp_t gfp) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_qp_table *qp_table = &priv->qp_table; @@ -366,7 +366,7 @@ int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp) qp->qpn = qpn; - err = mlx4_qp_alloc_icm(dev, qpn); + err = mlx4_qp_alloc_icm(dev, qpn, gfp); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 1c3fdd4a1f7d..45da913b5679 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -1532,7 +1532,7 @@ static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, return err; if (!fw_reserved(dev, qpn)) { - err = __mlx4_qp_alloc_icm(dev, qpn); + err = __mlx4_qp_alloc_icm(dev, qpn, GFP_KERNEL); if (err) { res_abort_move(dev, slave, RES_QP, qpn); return err; @@ -1619,7 +1619,7 @@ static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (err) return err; - err = __mlx4_mpt_alloc_icm(dev, mpt->key); + err = __mlx4_mpt_alloc_icm(dev, mpt->key, GFP_KERNEL); if (err) { res_abort_move(dev, slave, RES_MPT, id); return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c index 98faf870b0b0..67146624eb58 100644 --- a/drivers/net/ethernet/mellanox/mlx4/srq.c +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c @@ -103,11 +103,11 @@ int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn) if (*srqn == -1) return -ENOMEM; - err = mlx4_table_get(dev, &srq_table->table, *srqn); + err = mlx4_table_get(dev, &srq_table->table, *srqn, GFP_KERNEL); if (err) goto err_out; - err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn); + err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn, GFP_KERNEL); if (err) goto err_put; return 0; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index ba87bd21295a..be60b002bb37 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -837,7 +837,7 @@ static inline int mlx4_is_slave(struct mlx4_dev *dev) } int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, - struct mlx4_buf *buf); + struct mlx4_buf *buf, gfp_t gfp); void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset) { @@ -874,9 +874,10 @@ int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw); int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int start_index, int npages, u64 *page_list); int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, - struct mlx4_buf *buf); + struct mlx4_buf *buf, gfp_t gfp); -int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order); +int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, + gfp_t gfp); void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db); int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, @@ -892,7 +893,8 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq); int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base); void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt); -int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp); +int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, + gfp_t gfp); void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp); int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcdn, -- cgit v1.2.3-71-gd317 From d0c94692e0a360828745a469bcf90b5c4f9273d0 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Thu, 27 Mar 2014 19:59:39 +0100 Subject: drm/edid: Parse and handle HDMI deep color modes. Check the HDMI cea block for deep color mode bits. If available, assign the highest supported bpc for a hdmi display, corresponding to the given deep color modes. Signed-off-by: Mario Kleiner Signed-off-by: Alex Deucher --- drivers/gpu/drm/drm_edid.c | 110 ++++++++++++++++++++++++++++++++++++++++++++- include/drm/drm_edid.h | 5 +++ 2 files changed, 113 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index d74239fec291..3989c185579f 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -3422,17 +3422,117 @@ bool drm_rgb_quant_range_selectable(struct edid *edid) } EXPORT_SYMBOL(drm_rgb_quant_range_selectable); +/** + * drm_assign_hdmi_deep_color_info - detect whether monitor supports + * hdmi deep color modes and update drm_display_info if so. + * + * @edid: monitor EDID information + * @info: Updated with maximum supported deep color bpc and color format + * if deep color supported. + * + * Parse the CEA extension according to CEA-861-B. + * Return true if HDMI deep color supported, false if not or unknown. + */ +static bool drm_assign_hdmi_deep_color_info(struct edid *edid, + struct drm_display_info *info, + struct drm_connector *connector) +{ + u8 *edid_ext, *hdmi; + int i; + int start_offset, end_offset; + unsigned int dc_bpc = 0; + + edid_ext = drm_find_cea_extension(edid); + if (!edid_ext) + return false; + + if (cea_db_offsets(edid_ext, &start_offset, &end_offset)) + return false; + + /* + * Because HDMI identifier is in Vendor Specific Block, + * search it from all data blocks of CEA extension. + */ + for_each_cea_db(edid_ext, i, start_offset, end_offset) { + if (cea_db_is_hdmi_vsdb(&edid_ext[i])) { + /* HDMI supports at least 8 bpc */ + info->bpc = 8; + + hdmi = &edid_ext[i]; + if (cea_db_payload_len(hdmi) < 6) + return false; + + if (hdmi[6] & DRM_EDID_HDMI_DC_30) { + dc_bpc = 10; + DRM_DEBUG("%s: HDMI sink does deep color 30.\n", + drm_get_connector_name(connector)); + } + + if (hdmi[6] & DRM_EDID_HDMI_DC_36) { + dc_bpc = 12; + DRM_DEBUG("%s: HDMI sink does deep color 36.\n", + drm_get_connector_name(connector)); + } + + if (hdmi[6] & DRM_EDID_HDMI_DC_48) { + dc_bpc = 16; + DRM_DEBUG("%s: HDMI sink does deep color 48.\n", + drm_get_connector_name(connector)); + } + + if (dc_bpc > 0) { + DRM_DEBUG("%s: Assigning HDMI sink color depth as %d bpc.\n", + drm_get_connector_name(connector), dc_bpc); + info->bpc = dc_bpc; + + /* + * Deep color support mandates RGB444 support for all video + * modes and forbids YCRCB422 support for all video modes per + * HDMI 1.3 spec. + */ + info->color_formats = DRM_COLOR_FORMAT_RGB444; + + /* YCRCB444 is optional according to spec. */ + if (hdmi[6] & DRM_EDID_HDMI_DC_Y444) { + info->color_formats |= DRM_COLOR_FORMAT_YCRCB444; + DRM_DEBUG("%s: HDMI sink does YCRCB444 in deep color.\n", + drm_get_connector_name(connector)); + } + + /* + * Spec says that if any deep color mode is supported at all, + * then deep color 36 bit must be supported. + */ + if (!(hdmi[6] & DRM_EDID_HDMI_DC_36)) { + DRM_DEBUG("%s: HDMI sink should do DC_36, but does not!\n", + drm_get_connector_name(connector)); + } + + return true; + } + else { + DRM_DEBUG("%s: No deep color support on this HDMI sink.\n", + drm_get_connector_name(connector)); + } + } + } + + return false; +} + /** * drm_add_display_info - pull display info out if present * @edid: EDID data * @info: display info (attached to connector) + * @connector: connector whose edid is used to build display info * * Grab any available display info and stuff it into the drm_display_info * structure that's part of the connector. Useful for tracking bpp and * color spaces. */ static void drm_add_display_info(struct edid *edid, - struct drm_display_info *info) + struct drm_display_info *info, + struct drm_connector *connector) { u8 *edid_ext; @@ -3462,6 +3562,9 @@ static void drm_add_display_info(struct edid *edid, info->color_formats |= DRM_COLOR_FORMAT_YCRCB422; } + /* HDMI deep color modes supported? Assign to info, if so */ + drm_assign_hdmi_deep_color_info(edid, info, connector); + /* Only defined for 1.4 with digital displays */ if (edid->revision < 4) return; @@ -3491,6 +3594,9 @@ static void drm_add_display_info(struct edid *edid, break; } + DRM_DEBUG("%s: Assigning EDID-1.4 digital sink color depth as %d bpc.\n", + drm_get_connector_name(connector), info->bpc); + info->color_formats |= DRM_COLOR_FORMAT_RGB444; if (edid->features & DRM_EDID_FEATURE_RGB_YCRCB444) info->color_formats |= DRM_COLOR_FORMAT_YCRCB444; @@ -3549,7 +3655,7 @@ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid) if (quirks & (EDID_QUIRK_PREFER_LARGE_60 | EDID_QUIRK_PREFER_LARGE_75)) edid_fixup_preferred(connector, quirks); - drm_add_display_info(edid, &connector->display_info); + drm_add_display_info(edid, &connector->display_info, connector); if (quirks & EDID_QUIRK_FORCE_8BPC) connector->display_info.bpc = 8; diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index a1441c5ac63d..b96031d947a0 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -202,6 +202,11 @@ struct detailed_timing { #define DRM_EDID_FEATURE_PM_SUSPEND (1 << 6) #define DRM_EDID_FEATURE_PM_STANDBY (1 << 7) +#define DRM_EDID_HDMI_DC_48 (1 << 6) +#define DRM_EDID_HDMI_DC_36 (1 << 5) +#define DRM_EDID_HDMI_DC_30 (1 << 4) +#define DRM_EDID_HDMI_DC_Y444 (1 << 3) + struct edid { u8 header[8]; /* Vendor & product info */ -- cgit v1.2.3-71-gd317 From 688cea83f4396fa98b77a126ed278b89daccccdc Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Fri, 30 May 2014 16:00:56 +0800 Subject: macvlan: add netpoll support Add netpoll support to macvlan devices. Based on the netpoll support in the 802.1q vlan code. Tested and macvlan could work well with netconsole. Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 66 +++++++++++++++++++++++++++++++++++++++++++++- include/linux/if_macvlan.h | 3 +++ 2 files changed, 68 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index d2dbcfc68ee4..eee9106d1da1 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -33,6 +33,7 @@ #include #include #include +#include #define MACVLAN_HASH_SIZE (1 << BITS_PER_BYTE) @@ -357,12 +358,26 @@ xmit_world: return dev_queue_xmit(skb); } +static inline netdev_tx_t macvlan_netpoll_send_skb(struct macvlan_dev *vlan, struct sk_buff *skb) +{ +#ifdef CONFIG_NET_POLL_CONTROLLER + if (vlan->netpoll) + netpoll_send_skb(vlan->netpoll, skb); +#else + BUG(); +#endif + return NETDEV_TX_OK; +} + static netdev_tx_t macvlan_start_xmit(struct sk_buff *skb, struct net_device *dev) { unsigned int len = skb->len; int ret; - const struct macvlan_dev *vlan = netdev_priv(dev); + struct macvlan_dev *vlan = netdev_priv(dev); + + if (unlikely(netpoll_tx_running(dev))) + return macvlan_netpoll_send_skb(vlan, skb); if (vlan->fwd_priv) { skb->dev = vlan->lowerdev; @@ -788,6 +803,50 @@ static netdev_features_t macvlan_fix_features(struct net_device *dev, return features; } +#ifdef CONFIG_NET_POLL_CONTROLLER +static void macvlan_dev_poll_controller(struct net_device *dev) +{ + return; +} + +static int macvlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + struct net_device *real_dev = vlan->lowerdev; + struct netpoll *netpoll; + int err = 0; + + netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL); + err = -ENOMEM; + if (!netpoll) + goto out; + + err = __netpoll_setup(netpoll, real_dev); + if (err) { + kfree(netpoll); + goto out; + } + + vlan->netpoll = netpoll; + +out: + return err; +} + +static void macvlan_dev_netpoll_cleanup(struct net_device *dev) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + struct netpoll *netpoll = vlan->netpoll; + + if (!netpoll) + return; + + vlan->netpoll = NULL; + + __netpoll_free_async(netpoll); +} +#endif /* CONFIG_NET_POLL_CONTROLLER */ + static const struct ethtool_ops macvlan_ethtool_ops = { .get_link = ethtool_op_get_link, .get_settings = macvlan_ethtool_get_settings, @@ -813,6 +872,11 @@ static const struct net_device_ops macvlan_netdev_ops = { .ndo_fdb_del = macvlan_fdb_del, .ndo_fdb_dump = ndo_dflt_fdb_dump, .ndo_get_lock_subclass = macvlan_get_nest_level, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = macvlan_dev_poll_controller, + .ndo_netpoll_setup = macvlan_dev_netpoll_setup, + .ndo_netpoll_cleanup = macvlan_dev_netpoll_cleanup, +#endif }; void macvlan_common_setup(struct net_device *dev) diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index a9a53b12397b..6b2c7cf352a5 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -57,6 +57,9 @@ struct macvlan_dev { netdev_features_t tap_features; int minor; int nest_level; +#ifdef CONFIG_NET_POLL_CONTROLLER + struct netpoll *netpoll; +#endif }; static inline void macvlan_count_rx(const struct macvlan_dev *vlan, -- cgit v1.2.3-71-gd317 From 41c389d72cf0756957450c25c1dbc7d026324df8 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 27 May 2014 22:39:37 -0700 Subject: bridge: Add bridge ifindex to bridge fdb notify msgs (This patch was previously posted as RFC at http://patchwork.ozlabs.org/patch/352677/) This patch adds NDA_MASTER attribute to neighbour attributes enum for bridge/master ifindex. And adds NDA_MASTER to bridge fdb notify msgs. Today bridge fdb notifications dont contain bridge information. Userspace can derive it from the port information in the fdb notification. However this is tricky in some scenarious. Example, bridge port delete notification comes before bridge fdb delete notifications. And we have seen problems in userspace when using libnl where, the bridge fdb delete notification handling code does not understand which bridge this fdb entry is part of because the bridge and port association has already been deleted. And these notifications (port membership and fdb) are generated on separate rtnl groups. Fixing the order of notifications could possibly solve the problem for some cases (I can submit a separate patch for that). This patch chooses to add NDA_MASTER to bridge fdb notify msgs because it not only solves the problem described above, but also helps userspace avoid another lookup into link msgs to derive the master index. Signed-off-by: Roopa Prabhu Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/neighbour.h | 1 + net/bridge/br_fdb.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index d3ef583104e0..4a1d7e96dfe3 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -24,6 +24,7 @@ enum { NDA_PORT, NDA_VNI, NDA_IFINDEX, + NDA_MASTER, __NDA_MAX }; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 648d0e849595..2c45c069ea1a 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -616,6 +616,8 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr)) goto nla_put_failure; + if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex)) + goto nla_put_failure; ci.ndm_used = jiffies_to_clock_t(now - fdb->used); ci.ndm_confirmed = 0; ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated); @@ -637,6 +639,7 @@ static inline size_t fdb_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ + + nla_total_size(sizeof(u32)) /* NDA_MASTER */ + nla_total_size(sizeof(u16)) /* NDA_VLAN */ + nla_total_size(sizeof(struct nda_cacheinfo)); } -- cgit v1.2.3-71-gd317 From fe62d001372388abb15a324148c913f9b43722a8 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 15 May 2014 01:25:27 +0100 Subject: ethtool: Replace ethtool_ops::{get,set}_rxfh_indir() with {get,set}_rxfh() ETHTOOL_{G,S}RXFHINDIR and ETHTOOL_{G,S}RSSH should work for drivers regardless of whether they expose the hash key, unless you try to set a hash key for a driver that doesn't expose it. Signed-off-by: Ben Hutchings Acked-by: Jeff Kirsher --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 15 ++++++++------- drivers/net/ethernet/broadcom/tg3.c | 8 ++++---- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 8 ++++---- drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c | 15 +++++++++------ drivers/net/ethernet/intel/igb/igb_ethtool.c | 9 +++++---- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 10 +++++----- drivers/net/ethernet/sfc/ethtool.c | 10 +++++----- drivers/net/vmxnet3/vmxnet3_ethtool.c | 8 ++++---- include/linux/ethtool.h | 6 ------ net/core/ethtool.c | 8 ++++---- 10 files changed, 48 insertions(+), 49 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index 03224090ecf9..af138f8aa361 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -3316,7 +3316,7 @@ static u32 bnx2x_get_rxfh_indir_size(struct net_device *dev) return T_ETH_INDIRECTION_TABLE_SIZE; } -static int bnx2x_get_rxfh_indir(struct net_device *dev, u32 *indir) +static int bnx2x_get_rxfh(struct net_device *dev, u32 *indir, u8 *key) { struct bnx2x *bp = netdev_priv(dev); u8 ind_table[T_ETH_INDIRECTION_TABLE_SIZE] = {0}; @@ -3340,14 +3340,15 @@ static int bnx2x_get_rxfh_indir(struct net_device *dev, u32 *indir) return 0; } -static int bnx2x_set_rxfh_indir(struct net_device *dev, const u32 *indir) +static int bnx2x_set_rxfh(struct net_device *dev, const u32 *indir, + const u8 *key) { struct bnx2x *bp = netdev_priv(dev); size_t i; for (i = 0; i < T_ETH_INDIRECTION_TABLE_SIZE; i++) { /* - * The same as in bnx2x_get_rxfh_indir: we can't use a memcpy() + * The same as in bnx2x_get_rxfh: we can't use a memcpy() * as an internal storage of an indirection table is a u8 array * while indir->ring_index points to an array of u32. * @@ -3471,8 +3472,8 @@ static const struct ethtool_ops bnx2x_ethtool_ops = { .get_rxnfc = bnx2x_get_rxnfc, .set_rxnfc = bnx2x_set_rxnfc, .get_rxfh_indir_size = bnx2x_get_rxfh_indir_size, - .get_rxfh_indir = bnx2x_get_rxfh_indir, - .set_rxfh_indir = bnx2x_set_rxfh_indir, + .get_rxfh = bnx2x_get_rxfh, + .set_rxfh = bnx2x_set_rxfh, .get_channels = bnx2x_get_channels, .set_channels = bnx2x_set_channels, .get_module_info = bnx2x_get_module_info, @@ -3498,8 +3499,8 @@ static const struct ethtool_ops bnx2x_vf_ethtool_ops = { .get_rxnfc = bnx2x_get_rxnfc, .set_rxnfc = bnx2x_set_rxnfc, .get_rxfh_indir_size = bnx2x_get_rxfh_indir_size, - .get_rxfh_indir = bnx2x_get_rxfh_indir, - .set_rxfh_indir = bnx2x_set_rxfh_indir, + .get_rxfh = bnx2x_get_rxfh, + .set_rxfh = bnx2x_set_rxfh, .get_channels = bnx2x_get_channels, .set_channels = bnx2x_set_channels, }; diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index ccd90156aebc..8c2314ed260c 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -12501,7 +12501,7 @@ static u32 tg3_get_rxfh_indir_size(struct net_device *dev) return size; } -static int tg3_get_rxfh_indir(struct net_device *dev, u32 *indir) +static int tg3_get_rxfh(struct net_device *dev, u32 *indir, u8 *key) { struct tg3 *tp = netdev_priv(dev); int i; @@ -12512,7 +12512,7 @@ static int tg3_get_rxfh_indir(struct net_device *dev, u32 *indir) return 0; } -static int tg3_set_rxfh_indir(struct net_device *dev, const u32 *indir) +static int tg3_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key) { struct tg3 *tp = netdev_priv(dev); size_t i; @@ -14044,8 +14044,8 @@ static const struct ethtool_ops tg3_ethtool_ops = { .get_sset_count = tg3_get_sset_count, .get_rxnfc = tg3_get_rxnfc, .get_rxfh_indir_size = tg3_get_rxfh_indir_size, - .get_rxfh_indir = tg3_get_rxfh_indir, - .set_rxfh_indir = tg3_set_rxfh_indir, + .get_rxfh = tg3_get_rxfh, + .set_rxfh = tg3_set_rxfh, .get_channels = tg3_get_channels, .set_channels = tg3_set_channels, .get_ts_info = tg3_get_ts_info, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 266a5bc6aedf..8cf6be93f491 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2739,7 +2739,7 @@ static u32 get_rss_table_size(struct net_device *dev) return pi->rss_size; } -static int get_rss_table(struct net_device *dev, u32 *p) +static int get_rss_table(struct net_device *dev, u32 *p, u8 *key) { const struct port_info *pi = netdev_priv(dev); unsigned int n = pi->rss_size; @@ -2749,7 +2749,7 @@ static int get_rss_table(struct net_device *dev, u32 *p) return 0; } -static int set_rss_table(struct net_device *dev, const u32 *p) +static int set_rss_table(struct net_device *dev, const u32 *p, const u8 *key) { unsigned int i; struct port_info *pi = netdev_priv(dev); @@ -2851,8 +2851,8 @@ static const struct ethtool_ops cxgb_ethtool_ops = { .set_wol = set_wol, .get_rxnfc = get_rxnfc, .get_rxfh_indir_size = get_rss_table_size, - .get_rxfh_indir = get_rss_table, - .set_rxfh_indir = set_rss_table, + .get_rxfh = get_rss_table, + .set_rxfh = set_rss_table, .flash_device = set_flash, }; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index 77e786d2d0e0..dbc8986c2dae 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -626,13 +626,14 @@ static u32 i40evf_get_rxfh_indir_size(struct net_device *netdev) } /** - * i40evf_get_rxfh_indir - get the rx flow hash indirection table + * i40evf_get_rxfh - get the rx flow hash indirection table * @netdev: network interface device structure * @indir: indirection table + * @key: hash key (will be %NULL until get_rxfh_key_size is implemented) * * Reads the indirection table directly from the hardware. Always returns 0. **/ -static int i40evf_get_rxfh_indir(struct net_device *netdev, u32 *indir) +static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key) { struct i40evf_adapter *adapter = netdev_priv(netdev); struct i40e_hw *hw = &adapter->hw; @@ -650,14 +651,16 @@ static int i40evf_get_rxfh_indir(struct net_device *netdev, u32 *indir) } /** - * i40evf_set_rxfh_indir - set the rx flow hash indirection table + * i40evf_set_rxfh - set the rx flow hash indirection table * @netdev: network interface device structure * @indir: indirection table + * @key: hash key (will be %NULL until get_rxfh_key_size is implemented) * * Returns -EINVAL if the table specifies an inavlid queue id, otherwise * returns 0 after programming the table. **/ -static int i40evf_set_rxfh_indir(struct net_device *netdev, const u32 *indir) +static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir, + const u8 *key) { struct i40evf_adapter *adapter = netdev_priv(netdev); struct i40e_hw *hw = &adapter->hw; @@ -691,8 +694,8 @@ static struct ethtool_ops i40evf_ethtool_ops = { .get_rxnfc = i40evf_get_rxnfc, .set_rxnfc = i40evf_set_rxnfc, .get_rxfh_indir_size = i40evf_get_rxfh_indir_size, - .get_rxfh_indir = i40evf_get_rxfh_indir, - .set_rxfh_indir = i40evf_set_rxfh_indir, + .get_rxfh = i40evf_get_rxfh, + .set_rxfh = i40evf_set_rxfh, .get_channels = i40evf_get_channels, }; diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index a84297c85fb1..d8bbcf1873ca 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2830,7 +2830,7 @@ static u32 igb_get_rxfh_indir_size(struct net_device *netdev) return IGB_RETA_SIZE; } -static int igb_get_rxfh_indir(struct net_device *netdev, u32 *indir) +static int igb_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key) { struct igb_adapter *adapter = netdev_priv(netdev); int i; @@ -2876,7 +2876,8 @@ void igb_write_rss_indir_tbl(struct igb_adapter *adapter) } } -static int igb_set_rxfh_indir(struct net_device *netdev, const u32 *indir) +static int igb_set_rxfh(struct net_device *netdev, const u32 *indir, + const u8 *key) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -3025,8 +3026,8 @@ static const struct ethtool_ops igb_ethtool_ops = { .get_module_info = igb_get_module_info, .get_module_eeprom = igb_get_module_eeprom, .get_rxfh_indir_size = igb_get_rxfh_indir_size, - .get_rxfh_indir = igb_get_rxfh_indir, - .set_rxfh_indir = igb_set_rxfh_indir, + .get_rxfh = igb_get_rxfh, + .set_rxfh = igb_set_rxfh, .get_channels = igb_get_channels, .set_channels = igb_set_channels, .begin = igb_ethtool_begin, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index a72d99fd7a2d..263a1c7a3370 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -564,7 +564,7 @@ static u32 mlx4_en_get_rxfh_indir_size(struct net_device *dev) return priv->rx_ring_num; } -static int mlx4_en_get_rxfh_indir(struct net_device *dev, u32 *ring_index) +static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_rss_map *rss_map = &priv->rss_map; @@ -582,8 +582,8 @@ static int mlx4_en_get_rxfh_indir(struct net_device *dev, u32 *ring_index) return err; } -static int mlx4_en_set_rxfh_indir(struct net_device *dev, - const u32 *ring_index) +static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index, + const u8 *key) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; @@ -1224,8 +1224,8 @@ const struct ethtool_ops mlx4_en_ethtool_ops = { .get_rxnfc = mlx4_en_get_rxnfc, .set_rxnfc = mlx4_en_set_rxnfc, .get_rxfh_indir_size = mlx4_en_get_rxfh_indir_size, - .get_rxfh_indir = mlx4_en_get_rxfh_indir, - .set_rxfh_indir = mlx4_en_set_rxfh_indir, + .get_rxfh = mlx4_en_get_rxfh, + .set_rxfh = mlx4_en_set_rxfh, .get_channels = mlx4_en_get_channels, .set_channels = mlx4_en_set_channels, .get_ts_info = mlx4_en_get_ts_info, diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 0de8b07c24c2..74739c4b9997 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -1033,7 +1033,7 @@ static u32 efx_ethtool_get_rxfh_indir_size(struct net_device *net_dev) 0 : ARRAY_SIZE(efx->rx_indir_table)); } -static int efx_ethtool_get_rxfh_indir(struct net_device *net_dev, u32 *indir) +static int efx_ethtool_get_rxfh(struct net_device *net_dev, u32 *indir, u8 *key) { struct efx_nic *efx = netdev_priv(net_dev); @@ -1041,8 +1041,8 @@ static int efx_ethtool_get_rxfh_indir(struct net_device *net_dev, u32 *indir) return 0; } -static int efx_ethtool_set_rxfh_indir(struct net_device *net_dev, - const u32 *indir) +static int efx_ethtool_set_rxfh(struct net_device *net_dev, + const u32 *indir, const u8 *key) { struct efx_nic *efx = netdev_priv(net_dev); @@ -1125,8 +1125,8 @@ const struct ethtool_ops efx_ethtool_ops = { .get_rxnfc = efx_ethtool_get_rxnfc, .set_rxnfc = efx_ethtool_set_rxnfc, .get_rxfh_indir_size = efx_ethtool_get_rxfh_indir_size, - .get_rxfh_indir = efx_ethtool_get_rxfh_indir, - .set_rxfh_indir = efx_ethtool_set_rxfh_indir, + .get_rxfh = efx_ethtool_get_rxfh, + .set_rxfh = efx_ethtool_set_rxfh, .get_ts_info = efx_ethtool_get_ts_info, .get_module_info = efx_ethtool_get_module_info, .get_module_eeprom = efx_ethtool_get_module_eeprom, diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index 00e120296e92..9396cca93b09 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -579,7 +579,7 @@ vmxnet3_get_rss_indir_size(struct net_device *netdev) } static int -vmxnet3_get_rss_indir(struct net_device *netdev, u32 *p) +vmxnet3_get_rss(struct net_device *netdev, u32 *p, u8 *key) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); struct UPT1_RSSConf *rssConf = adapter->rss_conf; @@ -592,7 +592,7 @@ vmxnet3_get_rss_indir(struct net_device *netdev, u32 *p) } static int -vmxnet3_set_rss_indir(struct net_device *netdev, const u32 *p) +vmxnet3_set_rss(struct net_device *netdev, const u32 *p, const u8 *key) { unsigned int i; unsigned long flags; @@ -628,8 +628,8 @@ static const struct ethtool_ops vmxnet3_ethtool_ops = { .get_rxnfc = vmxnet3_get_rxnfc, #ifdef VMXNET3_RSS .get_rxfh_indir_size = vmxnet3_get_rss_indir_size, - .get_rxfh_indir = vmxnet3_get_rss_indir, - .set_rxfh_indir = vmxnet3_set_rss_indir, + .get_rxfh = vmxnet3_get_rss, + .set_rxfh = vmxnet3_set_rss, #endif }; diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 874fde01d398..e658229fee39 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -158,15 +158,11 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) * Returns zero if not supported for this specific device. * @get_rxfh_indir_size: Get the size of the RX flow hash indirection table. * Returns zero if not supported for this specific device. - * @get_rxfh_indir: Get the contents of the RX flow hash indirection table. - * Will not be called if @get_rxfh_indir_size returns zero. * @get_rxfh: Get the contents of the RX flow hash indirection table and hash * key. * Will only be called if one or both of @get_rxfh_indir_size and * @get_rxfh_key_size are implemented and return non-zero. * Returns a negative error code or zero. - * @set_rxfh_indir: Set the contents of the RX flow hash indirection table. - * Will not be called if @get_rxfh_indir_size returns zero. * @set_rxfh: Set the contents of the RX flow hash indirection table and/or * hash key. In case only the indirection table or hash key is to be * changed, the other argument will be %NULL. @@ -248,8 +244,6 @@ struct ethtool_ops { int (*get_rxfh)(struct net_device *, u32 *indir, u8 *key); int (*set_rxfh)(struct net_device *, const u32 *indir, const u8 *key); - int (*get_rxfh_indir)(struct net_device *, u32 *); - int (*set_rxfh_indir)(struct net_device *, const u32 *); void (*get_channels)(struct net_device *, struct ethtool_channels *); int (*set_channels)(struct net_device *, struct ethtool_channels *); int (*get_dump_flag)(struct net_device *, struct ethtool_dump *); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index b8857348bdf3..8ae452afb545 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -582,7 +582,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, int ret; if (!dev->ethtool_ops->get_rxfh_indir_size || - !dev->ethtool_ops->get_rxfh_indir) + !dev->ethtool_ops->get_rxfh) return -EOPNOTSUPP; dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); if (dev_size == 0) @@ -608,7 +608,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, if (!indir) return -ENOMEM; - ret = dev->ethtool_ops->get_rxfh_indir(dev, indir); + ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL); if (ret) goto out; @@ -632,7 +632,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, int ret; u32 ringidx_offset = offsetof(struct ethtool_rxfh_indir, ring_index[0]); - if (!ops->get_rxfh_indir_size || !ops->set_rxfh_indir || + if (!ops->get_rxfh_indir_size || !ops->set_rxfh || !ops->get_rxnfc) return -EOPNOTSUPP; @@ -669,7 +669,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, goto out; } - ret = ops->set_rxfh_indir(dev, indir); + ret = ops->set_rxfh(dev, indir, NULL); out: kfree(indir); -- cgit v1.2.3-71-gd317 From 23372af15e638bf3ce0764554db3b5e58bf7ceb8 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 9 May 2014 22:45:08 -0400 Subject: NVMe: Update data structures for NVMe 1.2 Include changes from the current set of ratified Technical Proposals for NVMe 1.2. Signed-off-by: Matthew Wilcox --- include/uapi/linux/nvme.h | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h index e74da782d69c..a6fb2a360577 100644 --- a/include/uapi/linux/nvme.h +++ b/include/uapi/linux/nvme.h @@ -27,7 +27,12 @@ struct nvme_id_power_state { __u8 read_lat; __u8 write_tput; __u8 write_lat; - __u8 rsvd16[16]; + __le16 idle_power; + __u8 idle_scale; + __u8 rsvd19; + __le16 active_power; + __u8 active_work_scale; + __u8 rsvd23[9]; }; enum { @@ -46,7 +51,8 @@ struct nvme_id_ctrl { __u8 mic; __u8 mdts; __u16 cntlid; - __u8 rsvd80[176]; + __u32 ver; + __u8 rsvd84[172]; __le16 oacs; __u8 acl; __u8 aerl; @@ -56,7 +62,9 @@ struct nvme_id_ctrl { __u8 npss; __u8 avscc; __u8 apsta; - __u8 rsvd266[246]; + __le16 wctemp; + __le16 cctemp; + __u8 rsvd270[242]; __u8 sqes; __u8 cqes; __u8 rsvd514[2]; @@ -102,7 +110,12 @@ struct nvme_id_ns { __u8 dps; __u8 nmic; __u8 rescap; - __u8 rsvd32[88]; + __u8 fpi; + __u8 rsvd33; + __le16 nawun; + __le16 nawupf; + __le16 nacwu; + __u8 rsvd40[80]; __u8 eui64[8]; struct nvme_lbaf lbaf[16]; __u8 rsvd192[192]; @@ -134,7 +147,10 @@ struct nvme_smart_log { __u8 unsafe_shutdowns[16]; __u8 media_errors[16]; __u8 num_err_log_entries[16]; - __u8 rsvd192[320]; + __le32 warning_temp_time; + __le32 critical_comp_time; + __le16 temp_sensor[8]; + __u8 rsvd216[296]; }; enum { -- cgit v1.2.3-71-gd317 From 1dd40c3ecd9b8a4ab91dbf2e6ce10b82a3b5ae63 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 14 Mar 2014 18:41:24 -0400 Subject: dm: introduce dm_accept_partial_bio The function dm_accept_partial_bio allows the target to specify how many sectors of the current bio it will process. If the target only wants to accept part of the bio, it calls dm_accept_partial_bio and the DM core sends the rest of the data in next bio. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 59 +++++++++++++++++++++++++++++++++++++------ include/linux/device-mapper.h | 2 ++ 2 files changed, 53 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 368a20dd85c2..97940fc8c302 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1110,6 +1110,46 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) } EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); +/* + * A target may call dm_accept_partial_bio only from the map routine. It is + * allowed for all bio types except REQ_FLUSH. + * + * dm_accept_partial_bio informs the dm that the target only wants to process + * additional n_sectors sectors of the bio and the rest of the data should be + * sent in a next bio. + * + * A diagram that explains the arithmetics: + * +--------------------+---------------+-------+ + * | 1 | 2 | 3 | + * +--------------------+---------------+-------+ + * + * <-------------- *tio->len_ptr ---------------> + * <------- bi_size -------> + * <-- n_sectors --> + * + * Region 1 was already iterated over with bio_advance or similar function. + * (it may be empty if the target doesn't use bio_advance) + * Region 2 is the remaining bio size that the target wants to process. + * (it may be empty if region 1 is non-empty, although there is no reason + * to make it empty) + * The target requires that region 3 is to be sent in the next bio. + * + * If the target wants to receive multiple copies of the bio (via num_*bios, etc), + * the partially processed part (the sum of regions 1+2) must be the same for all + * copies of the bio. + */ +void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) +{ + struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); + unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT; + BUG_ON(bio->bi_rw & REQ_FLUSH); + BUG_ON(bi_size > *tio->len_ptr); + BUG_ON(n_sectors > bi_size); + *tio->len_ptr -= bi_size - n_sectors; + bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT; +} +EXPORT_SYMBOL_GPL(dm_accept_partial_bio); + static void __map_bio(struct dm_target_io *tio) { int r; @@ -1200,11 +1240,13 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, static void __clone_and_map_simple_bio(struct clone_info *ci, struct dm_target *ti, - unsigned target_bio_nr, unsigned len) + unsigned target_bio_nr, unsigned *len) { struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs, target_bio_nr); struct bio *clone = &tio->clone; + tio->len_ptr = len; + /* * Discard requests require the bio's inline iovecs be initialized. * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush @@ -1212,13 +1254,13 @@ static void __clone_and_map_simple_bio(struct clone_info *ci, */ __bio_clone_fast(clone, ci->bio); if (len) - bio_setup_sector(clone, ci->sector, len); + bio_setup_sector(clone, ci->sector, *len); __map_bio(tio); } static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, - unsigned num_bios, unsigned len) + unsigned num_bios, unsigned *len) { unsigned target_bio_nr; @@ -1233,13 +1275,13 @@ static int __send_empty_flush(struct clone_info *ci) BUG_ON(bio_has_data(ci->bio)); while ((ti = dm_table_get_target(ci->map, target_nr++))) - __send_duplicate_bios(ci, ti, ti->num_flush_bios, 0); + __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL); return 0; } static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, - sector_t sector, unsigned len) + sector_t sector, unsigned *len) { struct bio *bio = ci->bio; struct dm_target_io *tio; @@ -1254,7 +1296,8 @@ static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) { tio = alloc_tio(ci, ti, 0, target_bio_nr); - clone_bio(tio, bio, sector, len); + tio->len_ptr = len; + clone_bio(tio, bio, sector, *len); __map_bio(tio); } } @@ -1306,7 +1349,7 @@ static int __send_changing_extent_only(struct clone_info *ci, else len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti)); - __send_duplicate_bios(ci, ti, num_bios, len); + __send_duplicate_bios(ci, ti, num_bios, &len); ci->sector += len; } while (ci->sector_count -= len); @@ -1345,7 +1388,7 @@ static int __split_and_process_non_flush(struct clone_info *ci) len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count); - __clone_and_map_data_bio(ci, ti, ci->sector, len); + __clone_and_map_data_bio(ci, ti, ci->sector, &len); ci->sector += len; ci->sector_count -= len; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 63da56ed9796..0adca299f238 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -291,6 +291,7 @@ struct dm_target_io { struct dm_io *io; struct dm_target *ti; unsigned target_bio_nr; + unsigned *len_ptr; struct bio clone; }; @@ -401,6 +402,7 @@ int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid); struct gendisk *dm_disk(struct mapped_device *md); int dm_suspended(struct dm_target *ti); int dm_noflush_suspending(struct dm_target *ti); +void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors); union map_info *dm_get_rq_mapinfo(struct request *rq); struct queue_limits *dm_get_queue_limits(struct mapped_device *md); -- cgit v1.2.3-71-gd317 From bd67608a6127c994e897c49cc4f72d9095925301 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 3 Jun 2014 23:04:30 -0400 Subject: NVMe: Rename io_timeout to nvme_io_timeout It's positively immoral to have a global variable called 'io_timeout'. Keep the module parameter called io_timeout, though. Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 4 ++-- include/linux/nvme.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index bb6ce311ad44..2af079e571fc 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -54,8 +54,8 @@ static unsigned char admin_timeout = 60; module_param(admin_timeout, byte, 0644); MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands"); -unsigned char io_timeout = 30; -module_param(io_timeout, byte, 0644); +unsigned char nvme_io_timeout = 30; +module_param_named(io_timeout, nvme_io_timeout, byte, 0644); MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O"); static unsigned char retry_time = 30; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 1813cfdb7e80..8541dd920bb7 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -62,8 +62,8 @@ enum { #define NVME_VS(major, minor) (major << 16 | minor) -extern unsigned char io_timeout; -#define NVME_IO_TIMEOUT (io_timeout * HZ) +extern unsigned char nvme_io_timeout; +#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) /* * Represents an NVM Express device. Each nvme_dev is a PCI function. -- cgit v1.2.3-71-gd317 From d5ab2b430b3cec65ba5b30b2e1de8ea46715cb93 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 3 Jun 2014 14:56:23 +0300 Subject: drm: drop drm_get_connector_name() and drm_get_encoder_name() No longer used or needed as the structs have a name field. Acked-by: David Herrmann Signed-off-by: Jani Nikula Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc.c | 20 -------------------- include/drm/drm_crtc.h | 2 -- 2 files changed, 22 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 986531395872..f6664a75ad57 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -256,26 +256,6 @@ void drm_connector_ida_destroy(void) ida_destroy(&drm_connector_enum_list[i].ida); } -/** - * drm_get_encoder_name - return a string for encoder - * @encoder: the encoder to get name for - */ -const char *drm_get_encoder_name(const struct drm_encoder *encoder) -{ - return encoder->name; -} -EXPORT_SYMBOL(drm_get_encoder_name); - -/** - * drm_get_connector_name - return a string for connector - * @connector: the connector to get name for - */ -const char *drm_get_connector_name(const struct drm_connector *connector) -{ - return connector->name; -} -EXPORT_SYMBOL(drm_get_connector_name); - /** * drm_get_connector_status_name - return a string for connector status * @status: connector status to compute name of diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 5c1c31cc11cd..e8fe9d8e135c 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -909,7 +909,6 @@ extern int drm_crtc_check_viewport(const struct drm_crtc *crtc, extern void drm_encoder_cleanup(struct drm_encoder *encoder); -extern const char *drm_get_connector_name(const struct drm_connector *connector); extern const char *drm_get_connector_status_name(enum drm_connector_status status); extern const char *drm_get_subpixel_order_name(enum subpixel_order order); extern const char *drm_get_dpms_name(int val); @@ -972,7 +971,6 @@ extern int drm_mode_create_tv_properties(struct drm_device *dev, int num_formats char *formats[]); extern int drm_mode_create_scaling_mode_property(struct drm_device *dev); extern int drm_mode_create_dirty_info_property(struct drm_device *dev); -extern const char *drm_get_encoder_name(const struct drm_encoder *encoder); extern int drm_mode_connector_attach_encoder(struct drm_connector *connector, struct drm_encoder *encoder); -- cgit v1.2.3-71-gd317 From a2b34e226ac9fbd20179091fad0ee1a24ad48669 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 5 Oct 2013 16:36:52 -0400 Subject: drm: helpers to find mode objects Add a few more useful helpers to find mode objects. Signed-off-by: Rob Clark Reviewed-by: Daniel Vetter --- drivers/gpu/drm/drm_crtc.c | 90 +++++++++++++++------------------------------- include/drm/drm_crtc.h | 33 +++++++++++++++++ 2 files changed, 61 insertions(+), 62 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index f6664a75ad57..c4b44be7d464 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -1692,7 +1692,6 @@ int drm_mode_getcrtc(struct drm_device *dev, { struct drm_mode_crtc *crtc_resp = data; struct drm_crtc *crtc; - struct drm_mode_object *obj; int ret = 0; if (!drm_core_check_feature(dev, DRIVER_MODESET)) @@ -1700,13 +1699,11 @@ int drm_mode_getcrtc(struct drm_device *dev, drm_modeset_lock_all(dev); - obj = drm_mode_object_find(dev, crtc_resp->crtc_id, - DRM_MODE_OBJECT_CRTC); - if (!obj) { + crtc = drm_crtc_find(dev, crtc_resp->crtc_id); + if (!crtc) { ret = -ENOENT; goto out; } - crtc = obj_to_crtc(obj); crtc_resp->x = crtc->x; crtc_resp->y = crtc->y; @@ -1760,7 +1757,6 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_mode_get_connector *out_resp = data; - struct drm_mode_object *obj; struct drm_connector *connector; struct drm_display_mode *mode; int mode_count = 0; @@ -1784,13 +1780,11 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, mutex_lock(&dev->mode_config.mutex); - obj = drm_mode_object_find(dev, out_resp->connector_id, - DRM_MODE_OBJECT_CONNECTOR); - if (!obj) { + connector = drm_connector_find(dev, out_resp->connector_id); + if (!connector) { ret = -ENOENT; goto out; } - connector = obj_to_connector(obj); props_count = connector->properties.count; @@ -1905,7 +1899,6 @@ int drm_mode_getencoder(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_mode_get_encoder *enc_resp = data; - struct drm_mode_object *obj; struct drm_encoder *encoder; int ret = 0; @@ -1913,13 +1906,11 @@ int drm_mode_getencoder(struct drm_device *dev, void *data, return -EINVAL; drm_modeset_lock_all(dev); - obj = drm_mode_object_find(dev, enc_resp->encoder_id, - DRM_MODE_OBJECT_ENCODER); - if (!obj) { + encoder = drm_encoder_find(dev, enc_resp->encoder_id); + if (!encoder) { ret = -ENOENT; goto out; } - encoder = obj_to_encoder(obj); if (encoder->crtc) enc_resp->crtc_id = encoder->crtc->base.id; @@ -2017,7 +2008,6 @@ int drm_mode_getplane(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_mode_get_plane *plane_resp = data; - struct drm_mode_object *obj; struct drm_plane *plane; uint32_t __user *format_ptr; int ret = 0; @@ -2026,13 +2016,11 @@ int drm_mode_getplane(struct drm_device *dev, void *data, return -EINVAL; drm_modeset_lock_all(dev); - obj = drm_mode_object_find(dev, plane_resp->plane_id, - DRM_MODE_OBJECT_PLANE); - if (!obj) { + plane = drm_plane_find(dev, plane_resp->plane_id); + if (!plane) { ret = -ENOENT; goto out; } - plane = obj_to_plane(obj); if (plane->crtc) plane_resp->crtc_id = plane->crtc->base.id; @@ -2085,7 +2073,6 @@ int drm_mode_setplane(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_mode_set_plane *plane_req = data; - struct drm_mode_object *obj; struct drm_plane *plane; struct drm_crtc *crtc; struct drm_framebuffer *fb = NULL, *old_fb = NULL; @@ -2100,14 +2087,12 @@ int drm_mode_setplane(struct drm_device *dev, void *data, * First, find the plane, crtc, and fb objects. If not available, * we don't bother to call the driver. */ - obj = drm_mode_object_find(dev, plane_req->plane_id, - DRM_MODE_OBJECT_PLANE); - if (!obj) { + plane = drm_plane_find(dev, plane_req->plane_id); + if (!plane) { DRM_DEBUG_KMS("Unknown plane ID %d\n", plane_req->plane_id); return -ENOENT; } - plane = obj_to_plane(obj); /* No fb means shut it down */ if (!plane_req->fb_id) { @@ -2124,15 +2109,13 @@ int drm_mode_setplane(struct drm_device *dev, void *data, goto out; } - obj = drm_mode_object_find(dev, plane_req->crtc_id, - DRM_MODE_OBJECT_CRTC); - if (!obj) { + crtc = drm_crtc_find(dev, plane_req->crtc_id); + if (!crtc) { DRM_DEBUG_KMS("Unknown crtc ID %d\n", plane_req->crtc_id); ret = -ENOENT; goto out; } - crtc = obj_to_crtc(obj); fb = drm_framebuffer_lookup(dev, plane_req->fb_id); if (!fb) { @@ -2319,7 +2302,6 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, { struct drm_mode_config *config = &dev->mode_config; struct drm_mode_crtc *crtc_req = data; - struct drm_mode_object *obj; struct drm_crtc *crtc; struct drm_connector **connector_set = NULL, *connector; struct drm_framebuffer *fb = NULL; @@ -2337,14 +2319,12 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, return -ERANGE; drm_modeset_lock_all(dev); - obj = drm_mode_object_find(dev, crtc_req->crtc_id, - DRM_MODE_OBJECT_CRTC); - if (!obj) { + crtc = drm_crtc_find(dev, crtc_req->crtc_id); + if (!crtc) { DRM_DEBUG_KMS("Unknown CRTC ID %d\n", crtc_req->crtc_id); ret = -ENOENT; goto out; } - crtc = obj_to_crtc(obj); DRM_DEBUG_KMS("[CRTC:%d]\n", crtc->base.id); if (crtc_req->mode_valid) { @@ -2427,15 +2407,13 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, goto out; } - obj = drm_mode_object_find(dev, out_id, - DRM_MODE_OBJECT_CONNECTOR); - if (!obj) { + connector = drm_connector_find(dev, out_id); + if (!connector) { DRM_DEBUG_KMS("Connector id %d unknown\n", out_id); ret = -ENOENT; goto out; } - connector = obj_to_connector(obj); DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); @@ -2467,7 +2445,6 @@ static int drm_mode_cursor_common(struct drm_device *dev, struct drm_mode_cursor2 *req, struct drm_file *file_priv) { - struct drm_mode_object *obj; struct drm_crtc *crtc; int ret = 0; @@ -2477,12 +2454,11 @@ static int drm_mode_cursor_common(struct drm_device *dev, if (!req->flags || (~DRM_MODE_CURSOR_FLAGS & req->flags)) return -EINVAL; - obj = drm_mode_object_find(dev, req->crtc_id, DRM_MODE_OBJECT_CRTC); - if (!obj) { + crtc = drm_crtc_find(dev, req->crtc_id); + if (!crtc) { DRM_DEBUG_KMS("Unknown CRTC ID %d\n", req->crtc_id); return -ENOENT; } - crtc = obj_to_crtc(obj); mutex_lock(&crtc->mutex); if (req->flags & DRM_MODE_CURSOR_BO) { @@ -3439,7 +3415,6 @@ EXPORT_SYMBOL(drm_object_property_get_value); int drm_mode_getproperty_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { - struct drm_mode_object *obj; struct drm_mode_get_property *out_resp = data; struct drm_property *property; int enum_count = 0; @@ -3458,12 +3433,11 @@ int drm_mode_getproperty_ioctl(struct drm_device *dev, return -EINVAL; drm_modeset_lock_all(dev); - obj = drm_mode_object_find(dev, out_resp->prop_id, DRM_MODE_OBJECT_PROPERTY); - if (!obj) { + property = drm_property_find(dev, out_resp->prop_id); + if (!property) { ret = -ENOENT; goto done; } - property = obj_to_property(obj); if (property->flags & (DRM_MODE_PROP_ENUM | DRM_MODE_PROP_BITMASK)) { list_for_each_entry(prop_enum, &property->enum_blob_list, head) @@ -3591,7 +3565,6 @@ static void drm_property_destroy_blob(struct drm_device *dev, int drm_mode_getblob_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { - struct drm_mode_object *obj; struct drm_mode_get_blob *out_resp = data; struct drm_property_blob *blob; int ret = 0; @@ -3601,12 +3574,11 @@ int drm_mode_getblob_ioctl(struct drm_device *dev, return -EINVAL; drm_modeset_lock_all(dev); - obj = drm_mode_object_find(dev, out_resp->blob_id, DRM_MODE_OBJECT_BLOB); - if (!obj) { + blob = drm_property_blob_find(dev, out_resp->blob_id); + if (!blob) { ret = -ENOENT; goto done; } - blob = obj_to_blob(obj); if (out_resp->length == blob->length) { blob_ptr = (void __user *)(unsigned long)out_resp->data; @@ -3988,7 +3960,6 @@ int drm_mode_gamma_set_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_mode_crtc_lut *crtc_lut = data; - struct drm_mode_object *obj; struct drm_crtc *crtc; void *r_base, *g_base, *b_base; int size; @@ -3998,12 +3969,11 @@ int drm_mode_gamma_set_ioctl(struct drm_device *dev, return -EINVAL; drm_modeset_lock_all(dev); - obj = drm_mode_object_find(dev, crtc_lut->crtc_id, DRM_MODE_OBJECT_CRTC); - if (!obj) { + crtc = drm_crtc_find(dev, crtc_lut->crtc_id); + if (!crtc) { ret = -ENOENT; goto out; } - crtc = obj_to_crtc(obj); if (crtc->funcs->gamma_set == NULL) { ret = -ENOSYS; @@ -4062,7 +4032,6 @@ int drm_mode_gamma_get_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_mode_crtc_lut *crtc_lut = data; - struct drm_mode_object *obj; struct drm_crtc *crtc; void *r_base, *g_base, *b_base; int size; @@ -4072,12 +4041,11 @@ int drm_mode_gamma_get_ioctl(struct drm_device *dev, return -EINVAL; drm_modeset_lock_all(dev); - obj = drm_mode_object_find(dev, crtc_lut->crtc_id, DRM_MODE_OBJECT_CRTC); - if (!obj) { + crtc = drm_crtc_find(dev, crtc_lut->crtc_id); + if (!crtc) { ret = -ENOENT; goto out; } - crtc = obj_to_crtc(obj); /* memcpy into gamma store */ if (crtc_lut->gamma_size != crtc->gamma_size) { @@ -4130,7 +4098,6 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_mode_crtc_page_flip *page_flip = data; - struct drm_mode_object *obj; struct drm_crtc *crtc; struct drm_framebuffer *fb = NULL, *old_fb = NULL; struct drm_pending_vblank_event *e = NULL; @@ -4144,10 +4111,9 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, if ((page_flip->flags & DRM_MODE_PAGE_FLIP_ASYNC) && !dev->mode_config.async_page_flip) return -EINVAL; - obj = drm_mode_object_find(dev, page_flip->crtc_id, DRM_MODE_OBJECT_CRTC); - if (!obj) + crtc = drm_crtc_find(dev, page_flip->crtc_id); + if (!crtc) return -ENOENT; - crtc = obj_to_crtc(obj); mutex_lock(&crtc->mutex); if (crtc->primary->fb == NULL) { diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index e8fe9d8e135c..90bd1a25e2c6 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -1061,6 +1061,15 @@ extern int drm_format_vert_chroma_subsampling(uint32_t format); extern const char *drm_get_format_name(uint32_t format); /* Helpers */ + +static inline struct drm_plane *drm_plane_find(struct drm_device *dev, + uint32_t id) +{ + struct drm_mode_object *mo; + mo = drm_mode_object_find(dev, id, DRM_MODE_OBJECT_PLANE); + return mo ? obj_to_plane(mo) : NULL; +} + static inline struct drm_crtc *drm_crtc_find(struct drm_device *dev, uint32_t id) { @@ -1077,6 +1086,30 @@ static inline struct drm_encoder *drm_encoder_find(struct drm_device *dev, return mo ? obj_to_encoder(mo) : NULL; } +static inline struct drm_connector *drm_connector_find(struct drm_device *dev, + uint32_t id) +{ + struct drm_mode_object *mo; + mo = drm_mode_object_find(dev, id, DRM_MODE_OBJECT_CONNECTOR); + return mo ? obj_to_connector(mo) : NULL; +} + +static inline struct drm_property *drm_property_find(struct drm_device *dev, + uint32_t id) +{ + struct drm_mode_object *mo; + mo = drm_mode_object_find(dev, id, DRM_MODE_OBJECT_PROPERTY); + return mo ? obj_to_property(mo) : NULL; +} + +static inline struct drm_property_blob * +drm_property_blob_find(struct drm_device *dev, uint32_t id) +{ + struct drm_mode_object *mo; + mo = drm_mode_object_find(dev, id, DRM_MODE_OBJECT_BLOB); + return mo ? obj_to_blob(mo) : NULL; +} + /* Plane list iterator for legacy (overlay only) planes. */ #define drm_for_each_legacy_plane(plane, planelist) \ list_for_each_entry(plane, planelist, head) \ -- cgit v1.2.3-71-gd317 From 5ea22f24d77b511d68c4ecaf4e6fd5d6ab462b8f Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 30 May 2014 11:34:01 -0400 Subject: drm: add extended property types If we continue to use bitmask for type, we will quickly run out of room to add new types. Split this up so existing part of bitmask range continues to function as before, but reserve a chunk of the remaining space for an integer type-id. Wrap this all up in some type-check helpers to keep the backwards-compat uglyness contained. Signed-off-by: Rob Clark Reviewed-by: Daniel Vetter --- drivers/gpu/drm/drm_crtc.c | 26 +++++++++++++++++--------- include/drm/drm_crtc.h | 17 +++++++++++++++++ include/uapi/drm/drm_mode.h | 13 +++++++++++++ 3 files changed, 47 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index c4b44be7d464..73f543af4924 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -3094,6 +3094,9 @@ struct drm_property *drm_property_create(struct drm_device *dev, int flags, } list_add_tail(&property->head, &dev->mode_config.property_list); + + WARN_ON(!drm_property_type_valid(property)); + return property; fail: kfree(property->values); @@ -3251,14 +3254,16 @@ int drm_property_add_enum(struct drm_property *property, int index, { struct drm_property_enum *prop_enum; - if (!(property->flags & (DRM_MODE_PROP_ENUM | DRM_MODE_PROP_BITMASK))) + if (!(drm_property_type_is(property, DRM_MODE_PROP_ENUM) || + drm_property_type_is(property, DRM_MODE_PROP_BITMASK))) return -EINVAL; /* * Bitmask enum properties have the additional constraint of values * from 0 to 63 */ - if ((property->flags & DRM_MODE_PROP_BITMASK) && (value > 63)) + if (drm_property_type_is(property, DRM_MODE_PROP_BITMASK) && + (value > 63)) return -EINVAL; if (!list_empty(&property->enum_blob_list)) { @@ -3439,10 +3444,11 @@ int drm_mode_getproperty_ioctl(struct drm_device *dev, goto done; } - if (property->flags & (DRM_MODE_PROP_ENUM | DRM_MODE_PROP_BITMASK)) { + if (drm_property_type_is(property, DRM_MODE_PROP_ENUM) || + drm_property_type_is(property, DRM_MODE_PROP_BITMASK)) { list_for_each_entry(prop_enum, &property->enum_blob_list, head) enum_count++; - } else if (property->flags & DRM_MODE_PROP_BLOB) { + } else if (drm_property_type_is(property, DRM_MODE_PROP_BLOB)) { list_for_each_entry(prop_blob, &property->enum_blob_list, head) blob_count++; } @@ -3464,7 +3470,8 @@ int drm_mode_getproperty_ioctl(struct drm_device *dev, } out_resp->count_values = value_count; - if (property->flags & (DRM_MODE_PROP_ENUM | DRM_MODE_PROP_BITMASK)) { + if (drm_property_type_is(property, DRM_MODE_PROP_ENUM) || + drm_property_type_is(property, DRM_MODE_PROP_BITMASK)) { if ((out_resp->count_enum_blobs >= enum_count) && enum_count) { copied = 0; enum_ptr = (struct drm_mode_property_enum __user *)(unsigned long)out_resp->enum_blob_ptr; @@ -3486,7 +3493,7 @@ int drm_mode_getproperty_ioctl(struct drm_device *dev, out_resp->count_enum_blobs = enum_count; } - if (property->flags & DRM_MODE_PROP_BLOB) { + if (drm_property_type_is(property, DRM_MODE_PROP_BLOB)) { if ((out_resp->count_enum_blobs >= blob_count) && blob_count) { copied = 0; blob_id_ptr = (uint32_t __user *)(unsigned long)out_resp->enum_blob_ptr; @@ -3640,17 +3647,18 @@ static bool drm_property_change_is_valid(struct drm_property *property, { if (property->flags & DRM_MODE_PROP_IMMUTABLE) return false; - if (property->flags & DRM_MODE_PROP_RANGE) { + + if (drm_property_type_is(property, DRM_MODE_PROP_RANGE)) { if (value < property->values[0] || value > property->values[1]) return false; return true; - } else if (property->flags & DRM_MODE_PROP_BITMASK) { + } else if (drm_property_type_is(property, DRM_MODE_PROP_BITMASK)) { int i; uint64_t valid_mask = 0; for (i = 0; i < property->num_values; i++) valid_mask |= (1ULL << property->values[i]); return !(value & ~valid_mask); - } else if (property->flags & DRM_MODE_PROP_BLOB) { + } else if (drm_property_type_is(property, DRM_MODE_PROP_BLOB)) { /* Only the driver knows */ return true; } else { diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 90bd1a25e2c6..92f6ec2de86a 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -930,6 +930,23 @@ extern void drm_mode_config_cleanup(struct drm_device *dev); extern int drm_mode_connector_update_edid_property(struct drm_connector *connector, struct edid *edid); + +static inline bool drm_property_type_is(struct drm_property *property, + uint32_t type) +{ + /* instanceof for props.. handles extended type vs original types: */ + if (property->flags & DRM_MODE_PROP_EXTENDED_TYPE) + return (property->flags & DRM_MODE_PROP_EXTENDED_TYPE) == type; + return property->flags & type; +} + +static inline bool drm_property_type_valid(struct drm_property *property) +{ + if (property->flags & DRM_MODE_PROP_EXTENDED_TYPE) + return !(property->flags & DRM_MODE_PROP_LEGACY_TYPE); + return !!(property->flags & DRM_MODE_PROP_LEGACY_TYPE); +} + extern int drm_object_property_set_value(struct drm_mode_object *obj, struct drm_property *property, uint64_t val); diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 719add464f95..caeaa6f7ebde 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -252,6 +252,19 @@ struct drm_mode_get_connector { #define DRM_MODE_PROP_BLOB (1<<4) #define DRM_MODE_PROP_BITMASK (1<<5) /* bitmask of enumerated types */ +/* non-extended types: legacy bitmask, one bit per type: */ +#define DRM_MODE_PROP_LEGACY_TYPE ( \ + DRM_MODE_PROP_RANGE | \ + DRM_MODE_PROP_ENUM | \ + DRM_MODE_PROP_BLOB | \ + DRM_MODE_PROP_BITMASK) + +/* extended-types: rather than continue to consume a bit per type, + * grab a chunk of the bits to use as integer type id. + */ +#define DRM_MODE_PROP_EXTENDED_TYPE 0x0000ffc0 +#define DRM_MODE_PROP_TYPE(n) ((n) << 6) + struct drm_mode_property_enum { __u64 value; char name[DRM_PROP_NAME_LEN]; -- cgit v1.2.3-71-gd317 From 98f75de40e9d83c3a90d294b8fd25fa2874212a9 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 30 May 2014 11:37:03 -0400 Subject: drm: add object property type An object property is an id (idr) for a drm mode object. This will allow a property to be used set/get a framebuffer, CRTC, etc. Signed-off-by: Rob Clark Reviewed-by: Daniel Vetter --- drivers/gpu/drm/drm_crtc.c | 61 +++++++++++++++++++++++++++++++++++++++------ include/drm/drm_crtc.h | 5 ++++ include/uapi/drm/drm_mode.h | 1 + 3 files changed, 60 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 73f543af4924..f990d9f180f0 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -370,6 +370,21 @@ void drm_mode_object_put(struct drm_device *dev, mutex_unlock(&dev->mode_config.idr_mutex); } +static struct drm_mode_object *_object_find(struct drm_device *dev, + uint32_t id, uint32_t type) +{ + struct drm_mode_object *obj = NULL; + + mutex_lock(&dev->mode_config.idr_mutex); + obj = idr_find(&dev->mode_config.crtc_idr, id); + if (!obj || (type != DRM_MODE_OBJECT_ANY && obj->type != type) || + (obj->id != id)) + obj = NULL; + mutex_unlock(&dev->mode_config.idr_mutex); + + return obj; +} + /** * drm_mode_object_find - look up a drm object with static lifetime * @dev: drm device @@ -377,7 +392,9 @@ void drm_mode_object_put(struct drm_device *dev, * @type: type of the mode object * * Note that framebuffers cannot be looked up with this functions - since those - * are reference counted, they need special treatment. + * are reference counted, they need special treatment. Even with + * DRM_MODE_OBJECT_ANY (although that will simply return NULL + * rather than WARN_ON()). */ struct drm_mode_object *drm_mode_object_find(struct drm_device *dev, uint32_t id, uint32_t type) @@ -387,13 +404,10 @@ struct drm_mode_object *drm_mode_object_find(struct drm_device *dev, /* Framebuffers are reference counted and need their own lookup * function.*/ WARN_ON(type == DRM_MODE_OBJECT_FB); - - mutex_lock(&dev->mode_config.idr_mutex); - obj = idr_find(&dev->mode_config.crtc_idr, id); - if (!obj || (obj->type != type) || (obj->id != id)) + obj = _object_find(dev, id, type); + /* don't leak out unref'd fb's */ + if (obj && (obj->type == DRM_MODE_OBJECT_FB)) obj = NULL; - mutex_unlock(&dev->mode_config.idr_mutex); - return obj; } EXPORT_SYMBOL(drm_mode_object_find); @@ -3074,6 +3088,8 @@ struct drm_property *drm_property_create(struct drm_device *dev, int flags, if (!property) return NULL; + property->dev = dev; + if (num_values) { property->values = kzalloc(sizeof(uint64_t)*num_values, GFP_KERNEL); if (!property->values) @@ -3234,6 +3250,23 @@ struct drm_property *drm_property_create_range(struct drm_device *dev, int flags } EXPORT_SYMBOL(drm_property_create_range); +struct drm_property *drm_property_create_object(struct drm_device *dev, + int flags, const char *name, uint32_t type) +{ + struct drm_property *property; + + flags |= DRM_MODE_PROP_OBJECT; + + property = drm_property_create(dev, flags, name, 1); + if (!property) + return NULL; + + property->values[0] = type; + + return property; +} +EXPORT_SYMBOL(drm_property_create_object); + /** * drm_property_add_enum - add a possible value to an enumeration property * @property: enumeration property to change @@ -3661,6 +3694,20 @@ static bool drm_property_change_is_valid(struct drm_property *property, } else if (drm_property_type_is(property, DRM_MODE_PROP_BLOB)) { /* Only the driver knows */ return true; + } else if (drm_property_type_is(property, DRM_MODE_PROP_OBJECT)) { + struct drm_mode_object *obj; + /* a zero value for an object property translates to null: */ + if (value == 0) + return true; + /* + * NOTE: use _object_find() directly to bypass restriction on + * looking up refcnt'd objects (ie. fb's). For a refcnt'd + * object this could race against object finalization, so it + * simply tells us that the object *was* valid. Which is good + * enough. + */ + obj = _object_find(property->dev, value, property->values[0]); + return obj != NULL; } else { int i; for (i = 0; i < property->num_values; i++) diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 92f6ec2de86a..cb2e599f769f 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -50,6 +50,7 @@ struct drm_clip_rect; #define DRM_MODE_OBJECT_BLOB 0xbbbbbbbb #define DRM_MODE_OBJECT_PLANE 0xeeeeeeee #define DRM_MODE_OBJECT_BRIDGE 0xbdbdbdbd +#define DRM_MODE_OBJECT_ANY 0 struct drm_mode_object { uint32_t id; @@ -190,6 +191,7 @@ struct drm_property { char name[DRM_PROP_NAME_LEN]; uint32_t num_values; uint64_t *values; + struct drm_device *dev; struct list_head enum_blob_list; }; @@ -980,6 +982,8 @@ struct drm_property *drm_property_create_bitmask(struct drm_device *dev, struct drm_property *drm_property_create_range(struct drm_device *dev, int flags, const char *name, uint64_t min, uint64_t max); +struct drm_property *drm_property_create_object(struct drm_device *dev, + int flags, const char *name, uint32_t type); extern void drm_property_destroy(struct drm_device *dev, struct drm_property *property); extern int drm_property_add_enum(struct drm_property *property, int index, uint64_t value, const char *name); @@ -995,6 +999,7 @@ extern int drm_mode_crtc_set_gamma_size(struct drm_crtc *crtc, int gamma_size); extern struct drm_mode_object *drm_mode_object_find(struct drm_device *dev, uint32_t id, uint32_t type); + /* IOCTLs */ extern int drm_mode_getresources(struct drm_device *dev, void *data, struct drm_file *file_priv); diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index caeaa6f7ebde..57f46348befe 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -264,6 +264,7 @@ struct drm_mode_get_connector { */ #define DRM_MODE_PROP_EXTENDED_TYPE 0x0000ffc0 #define DRM_MODE_PROP_TYPE(n) ((n) << 6) +#define DRM_MODE_PROP_OBJECT DRM_MODE_PROP_TYPE(1) struct drm_mode_property_enum { __u64 value; -- cgit v1.2.3-71-gd317 From ebc44cf386734374983922c6fc1ae8ac47f88bef Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 12 Sep 2012 22:22:31 -0500 Subject: drm: add signed-range property type Like range, but values are signed. Signed-off-by: Rob Clark Reviewed-by: David Herrmann --- drivers/gpu/drm/drm_crtc.c | 45 +++++++++++++++++++++++++++++++++------------ include/drm/drm_crtc.h | 12 ++++++++++++ include/uapi/drm/drm_mode.h | 1 + 3 files changed, 46 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index f990d9f180f0..6127073407e0 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -3213,6 +3213,22 @@ struct drm_property *drm_property_create_bitmask(struct drm_device *dev, } EXPORT_SYMBOL(drm_property_create_bitmask); +static struct drm_property *property_create_range(struct drm_device *dev, + int flags, const char *name, + uint64_t min, uint64_t max) +{ + struct drm_property *property; + + property = drm_property_create(dev, flags, name, 2); + if (!property) + return NULL; + + property->values[0] = min; + property->values[1] = max; + + return property; +} + /** * drm_property_create - create a new ranged property type * @dev: drm device @@ -3235,21 +3251,20 @@ struct drm_property *drm_property_create_range(struct drm_device *dev, int flags const char *name, uint64_t min, uint64_t max) { - struct drm_property *property; - - flags |= DRM_MODE_PROP_RANGE; - - property = drm_property_create(dev, flags, name, 2); - if (!property) - return NULL; - - property->values[0] = min; - property->values[1] = max; - - return property; + return property_create_range(dev, DRM_MODE_PROP_RANGE | flags, + name, min, max); } EXPORT_SYMBOL(drm_property_create_range); +struct drm_property *drm_property_create_signed_range(struct drm_device *dev, + int flags, const char *name, + int64_t min, int64_t max) +{ + return property_create_range(dev, DRM_MODE_PROP_SIGNED_RANGE | flags, + name, I642U64(min), I642U64(max)); +} +EXPORT_SYMBOL(drm_property_create_signed_range); + struct drm_property *drm_property_create_object(struct drm_device *dev, int flags, const char *name, uint32_t type) { @@ -3685,6 +3700,12 @@ static bool drm_property_change_is_valid(struct drm_property *property, if (value < property->values[0] || value > property->values[1]) return false; return true; + } else if (drm_property_type_is(property, DRM_MODE_PROP_SIGNED_RANGE)) { + int64_t svalue = U642I64(value); + if (svalue < U642I64(property->values[0]) || + svalue > U642I64(property->values[1])) + return false; + return true; } else if (drm_property_type_is(property, DRM_MODE_PROP_BITMASK)) { int i; uint64_t valid_mask = 0; diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index cb2e599f769f..849cfdccff09 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -65,6 +65,15 @@ struct drm_object_properties { uint64_t values[DRM_OBJECT_MAX_PROPERTY]; }; +static inline int64_t U642I64(uint64_t val) +{ + return (int64_t)*((int64_t *)&val); +} +static inline uint64_t I642U64(int64_t val) +{ + return (uint64_t)*((uint64_t *)&val); +} + enum drm_connector_force { DRM_FORCE_UNSPECIFIED, DRM_FORCE_OFF, @@ -982,6 +991,9 @@ struct drm_property *drm_property_create_bitmask(struct drm_device *dev, struct drm_property *drm_property_create_range(struct drm_device *dev, int flags, const char *name, uint64_t min, uint64_t max); +struct drm_property *drm_property_create_signed_range(struct drm_device *dev, + int flags, const char *name, + int64_t min, int64_t max); struct drm_property *drm_property_create_object(struct drm_device *dev, int flags, const char *name, uint32_t type); extern void drm_property_destroy(struct drm_device *dev, struct drm_property *property); diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 57f46348befe..def54f9e07ca 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -265,6 +265,7 @@ struct drm_mode_get_connector { #define DRM_MODE_PROP_EXTENDED_TYPE 0x0000ffc0 #define DRM_MODE_PROP_TYPE(n) ((n) << 6) #define DRM_MODE_PROP_OBJECT DRM_MODE_PROP_TYPE(1) +#define DRM_MODE_PROP_SIGNED_RANGE DRM_MODE_PROP_TYPE(2) struct drm_mode_property_enum { __u64 value; -- cgit v1.2.3-71-gd317 From 6e9f798d91c526982cca0026cd451e8fdbf18aaf Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 29 May 2014 23:54:47 +0200 Subject: drm: Split connection_mutex out of mode_config.mutex (v3) After the split-out of crtc locks from the big mode_config.mutex there's still two major areas it protects: - Various connector probe states, like connector->status, EDID properties, probed mode lists and similar information. - The links from connector->encoder and encoder->crtc and other modeset-relevant connector state (e.g. properties which control the panel fitter). The later is used by modeset operations. But they don't really care about the former since it's allowed to e.g. enable a disconnected VGA output or with a mode not in the probed list. Thus far this hasn't been a problem, but for the atomic modeset conversion Rob Clark needs to convert all modeset relevant locks into w/w locks. This is required because the order of acquisition is determined by how userspace supplies the atomic modeset data. This has run into troubles in the detect path since the i915 load detect code needs _both_ protections offered by the mode_config.mutex: It updates probe state and it needs to change the modeset configuration to enable the temporary load detect pipe. The big deal here is that for the probe/detect users of this lock a plain mutex fits best, but for atomic modesets we really want a w/w mutex. To fix this lets split out a new connection_mutex lock for the modeset relevant parts. For simplicity I've decided to only add one additional lock for all connector/encoder links and modeset configuration states. We have piles of different modeset objects in addition to those (like bridges or panels), so adding per-object locks would be much more effort. Also, we're guaranteed (at least for now) to do a full modeset if we need to acquire this lock. Which means that fine-grained locking is fairly irrelevant compared to the amount of time the full modeset will take. I've done a full audit, and there's just a few things that justify special focus: - Locking in drm_sysfs.c is almost completely absent. We should sprinkle mode_config.connection_mutex over this file a bit, but since it already lacks mode_config.mutex this patch wont make the situation any worse. This is material for a follow-up patch. - omap has a omap_framebuffer_flush function which walks the connector->encoder->crtc links and is called from many contexts. Some look like they don't acquire mode_config.mutex, so this is already racy. Again fixing this is material for a separate patch. - The radeon hot_plug function to retrain DP links looks at connector->dpms. Currently this happens without any locking, so is already racy. I think radeon_hotplug_work_func should gain mutex_lock/unlock calls for the mode_config.connection_mutex. - Same applies to i915's intel_dp_hot_plug. But again, this is already racy. - i915 load_detect code needs to acquire this lock. Which means the w/w dance due to Rob's work will be nicely contained to _just_ this function. I've added fixme comments everywhere where it looks suspicious but in the sysfs code. After a quick irc discussion with Dave Airlie it sounds like the lack of locking in there is due to sysfs cleanup fun at module unload. v1: original (only compile tested) v2: missing mutex_init(), etc (from Rob Clark) v3: i915 needs more care in the conversion: - Protect the edp pp logic with the connection_mutex. - Use connection_mutex in the backlight code due to get_pipe_from_connector. - Use drm_modeset_lock_all in suspend/resume paths. - Update lock checks in the overlay code. Cc: Alex Deucher Cc: Rob Clark Signed-off-by: Daniel Vetter Reviewed-by: Rob Clark --- drivers/gpu/drm/drm_crtc.c | 8 ++++++++ drivers/gpu/drm/drm_crtc_helper.c | 2 ++ drivers/gpu/drm/drm_edid.c | 2 ++ drivers/gpu/drm/drm_plane_helper.c | 7 +++++++ drivers/gpu/drm/i915/i915_debugfs.c | 4 ++-- drivers/gpu/drm/i915/i915_drv.c | 6 ++---- drivers/gpu/drm/i915/intel_display.c | 16 ++++++++++++---- drivers/gpu/drm/i915/intel_dp.c | 15 ++++++++------- drivers/gpu/drm/i915/intel_opregion.c | 4 ++-- drivers/gpu/drm/i915/intel_overlay.c | 4 ++-- drivers/gpu/drm/i915/intel_panel.c | 8 ++++---- drivers/gpu/drm/omapdrm/omap_fb.c | 1 + drivers/gpu/drm/radeon/radeon_connectors.c | 1 + include/drm/drm_crtc.h | 1 + 14 files changed, 54 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 5b3e2920c376..f3b98d4b6f46 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -54,6 +54,8 @@ void drm_modeset_lock_all(struct drm_device *dev) mutex_lock(&dev->mode_config.mutex); + mutex_lock(&dev->mode_config.connection_mutex); + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) mutex_lock_nest_lock(&crtc->mutex, &dev->mode_config.mutex); } @@ -72,6 +74,8 @@ void drm_modeset_unlock_all(struct drm_device *dev) list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) mutex_unlock(&crtc->mutex); + mutex_unlock(&dev->mode_config.connection_mutex); + mutex_unlock(&dev->mode_config.mutex); } EXPORT_SYMBOL(drm_modeset_unlock_all); @@ -93,6 +97,7 @@ void drm_warn_on_modeset_not_all_locked(struct drm_device *dev) list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) WARN_ON(!mutex_is_locked(&crtc->mutex)); + WARN_ON(!mutex_is_locked(&dev->mode_config.connection_mutex)); WARN_ON(!mutex_is_locked(&dev->mode_config.mutex)); } EXPORT_SYMBOL(drm_warn_on_modeset_not_all_locked); @@ -1793,6 +1798,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, DRM_DEBUG_KMS("[CONNECTOR:%d:?]\n", out_resp->connector_id); mutex_lock(&dev->mode_config.mutex); + mutex_lock(&dev->mode_config.connection_mutex); connector = drm_connector_find(dev, out_resp->connector_id); if (!connector) { @@ -1891,6 +1897,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, out_resp->count_encoders = encoders_count; out: + mutex_unlock(&dev->mode_config.connection_mutex); mutex_unlock(&dev->mode_config.mutex); return ret; @@ -4640,6 +4647,7 @@ EXPORT_SYMBOL(drm_format_vert_chroma_subsampling); void drm_mode_config_init(struct drm_device *dev) { mutex_init(&dev->mode_config.mutex); + mutex_init(&dev->mode_config.connection_mutex); mutex_init(&dev->mode_config.idr_mutex); mutex_init(&dev->mode_config.fb_lock); INIT_LIST_HEAD(&dev->mode_config.fb_list); diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index 231c9433341f..b55d27c872f6 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -89,6 +89,8 @@ bool drm_helper_encoder_in_use(struct drm_encoder *encoder) struct drm_device *dev = encoder->dev; WARN_ON(!mutex_is_locked(&dev->mode_config.mutex)); + WARN_ON(!mutex_is_locked(&dev->mode_config.connection_mutex)); + list_for_each_entry(connector, &dev->mode_config.connector_list, head) if (connector->encoder == encoder) return true; diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 085e5550aac5..7be21781d3bd 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -3304,6 +3304,8 @@ struct drm_connector *drm_select_eld(struct drm_encoder *encoder, struct drm_connector *connector; struct drm_device *dev = encoder->dev; + WARN_ON(!mutex_is_locked(&dev->mode_config.mutex)); + list_for_each_entry(connector, &dev->mode_config.connector_list, head) if (connector->encoder == encoder && connector->eld[0]) return connector; diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c index d966afa7ecae..458d9bf09209 100644 --- a/drivers/gpu/drm/drm_plane_helper.c +++ b/drivers/gpu/drm/drm_plane_helper.c @@ -54,6 +54,13 @@ static int get_connectors_for_crtc(struct drm_crtc *crtc, struct drm_connector *connector; int count = 0; + /* + * Note: Once we change the plane hooks to more fine-grained locking we + * need to grab the connection_mutex here to be able to make these + * checks. + */ + WARN_ON(!mutex_is_locked(&dev->mode_config.connection_mutex)); + list_for_each_entry(connector, &dev->mode_config.connector_list, head) if (connector->encoder && connector->encoder->crtc == crtc) { if (connector_list != NULL && count < num_connectors) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 36f4c15f538b..169fc2d8c554 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2609,7 +2609,7 @@ static int i9xx_pipe_crc_auto_source(struct drm_device *dev, enum pipe pipe, *source = INTEL_PIPE_CRC_SOURCE_PIPE; - mutex_lock(&dev->mode_config.mutex); + drm_modeset_lock_all(dev); list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) { if (!encoder->base.crtc) @@ -2645,7 +2645,7 @@ static int i9xx_pipe_crc_auto_source(struct drm_device *dev, enum pipe pipe, break; } } - mutex_unlock(&dev->mode_config.mutex); + drm_modeset_unlock_all(dev); return ret; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 4619c9e51907..5b5b82c3f570 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -533,13 +533,11 @@ static int i915_drm_freeze(struct drm_device *dev) * Disable CRTCs directly since we want to preserve sw state * for _thaw. */ - mutex_lock(&dev->mode_config.mutex); + drm_modeset_lock_all(dev); for_each_crtc(dev, crtc) { - mutex_lock(&crtc->mutex); dev_priv->display.crtc_disable(crtc); - mutex_unlock(&crtc->mutex); } - mutex_unlock(&dev->mode_config.mutex); + drm_modeset_unlock_all(dev); intel_modeset_suspend_hw(dev); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e80b2917e3cb..c2976ade823f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8323,6 +8323,8 @@ bool intel_get_load_detect_pipe(struct drm_connector *connector, connector->base.id, connector->name, encoder->base.id, encoder->name); + mutex_lock(&dev->mode_config.connection_mutex); + /* * Algorithm gets a little messy: * @@ -8365,7 +8367,7 @@ bool intel_get_load_detect_pipe(struct drm_connector *connector, */ if (!crtc) { DRM_DEBUG_KMS("no pipe available for load-detect\n"); - return false; + goto fail_unlock_connector; } mutex_lock(&crtc->mutex); @@ -8419,6 +8421,9 @@ bool intel_get_load_detect_pipe(struct drm_connector *connector, else intel_crtc->new_config = NULL; mutex_unlock(&crtc->mutex); +fail_unlock_connector: + mutex_unlock(&dev->mode_config.connection_mutex); + return false; } @@ -8448,6 +8453,7 @@ void intel_release_load_detect_pipe(struct drm_connector *connector, } mutex_unlock(&crtc->mutex); + mutex_unlock(&connector->dev->mode_config.connection_mutex); return; } @@ -8456,6 +8462,7 @@ void intel_release_load_detect_pipe(struct drm_connector *connector, connector->funcs->dpms(connector, old->dpms_mode); mutex_unlock(&crtc->mutex); + mutex_unlock(&connector->dev->mode_config.connection_mutex); } static int i9xx_pll_refclk(struct drm_device *dev, @@ -10986,8 +10993,9 @@ static void intel_crtc_init(struct drm_device *dev, int pipe) enum pipe intel_get_pipe_from_connector(struct intel_connector *connector) { struct drm_encoder *encoder = connector->base.encoder; + struct drm_device *dev = connector->base.dev; - WARN_ON(!mutex_is_locked(&connector->base.dev->mode_config.mutex)); + WARN_ON(!mutex_is_locked(&dev->mode_config.connection_mutex)); if (!encoder) return INVALID_PIPE; @@ -11756,9 +11764,9 @@ void intel_modeset_init(struct drm_device *dev) /* Just in case the BIOS is doing something questionable. */ intel_disable_fbc(dev); - mutex_lock(&dev->mode_config.mutex); + drm_modeset_lock_all(dev); intel_modeset_setup_hw_state(dev, false); - mutex_unlock(&dev->mode_config.mutex); + drm_modeset_unlock_all(dev); for_each_intel_crtc(dev, crtc) { if (!crtc->active) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index d94f0271e422..c4d883921282 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1154,7 +1154,7 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) u32 pp; u32 pp_stat_reg, pp_ctrl_reg; - WARN_ON(!mutex_is_locked(&dev->mode_config.mutex)); + WARN_ON(!mutex_is_locked(&dev->mode_config.connection_mutex)); if (!intel_dp->want_panel_vdd && edp_have_panel_vdd(intel_dp)) { struct intel_digital_port *intel_dig_port = @@ -1191,9 +1191,9 @@ static void edp_panel_vdd_work(struct work_struct *__work) struct intel_dp, panel_vdd_work); struct drm_device *dev = intel_dp_to_dev(intel_dp); - mutex_lock(&dev->mode_config.mutex); + mutex_lock(&dev->mode_config.connection_mutex); edp_panel_vdd_off_sync(intel_dp); - mutex_unlock(&dev->mode_config.mutex); + mutex_unlock(&dev->mode_config.connection_mutex); } static void edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync) @@ -3235,6 +3235,7 @@ intel_dp_check_link_status(struct intel_dp *intel_dp) u8 sink_irq_vector; u8 link_status[DP_LINK_STATUS_SIZE]; + /* FIXME: This access isn't protected by any locks. */ if (!intel_encoder->connectors_active) return; @@ -3665,9 +3666,9 @@ void intel_dp_encoder_destroy(struct drm_encoder *encoder) drm_encoder_cleanup(encoder); if (is_edp(intel_dp)) { cancel_delayed_work_sync(&intel_dp->panel_vdd_work); - mutex_lock(&dev->mode_config.mutex); + mutex_lock(&dev->mode_config.connection_mutex); edp_panel_vdd_off_sync(intel_dp); - mutex_unlock(&dev->mode_config.mutex); + mutex_unlock(&dev->mode_config.connection_mutex); } kfree(intel_dig_port); } @@ -4246,9 +4247,9 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, drm_dp_aux_unregister_i2c_bus(&intel_dp->aux); if (is_edp(intel_dp)) { cancel_delayed_work_sync(&intel_dp->panel_vdd_work); - mutex_lock(&dev->mode_config.mutex); + mutex_lock(&dev->mode_config.connection_mutex); edp_panel_vdd_off_sync(intel_dp); - mutex_unlock(&dev->mode_config.mutex); + mutex_unlock(&dev->mode_config.connection_mutex); } drm_sysfs_connector_remove(connector); drm_connector_cleanup(connector); diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index acde2945eb8a..b812e9d39f38 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -410,7 +410,7 @@ static u32 asle_set_backlight(struct drm_device *dev, u32 bclp) if (bclp > 255) return ASLC_BACKLIGHT_FAILED; - mutex_lock(&dev->mode_config.mutex); + mutex_lock(&dev->mode_config.connection_mutex); /* * Update backlight on all connectors that support backlight (usually @@ -421,7 +421,7 @@ static u32 asle_set_backlight(struct drm_device *dev, u32 bclp) intel_panel_set_backlight(intel_connector, bclp, 255); iowrite32(DIV_ROUND_UP(bclp * 100, 255) | ASLE_CBLV_VALID, &asle->cblv); - mutex_unlock(&dev->mode_config.mutex); + mutex_unlock(&dev->mode_config.connection_mutex); return 0; diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 27eb820d1b19..e97ea33e0117 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -688,7 +688,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, u32 swidth, swidthsw, sheight, ostride; BUG_ON(!mutex_is_locked(&dev->struct_mutex)); - BUG_ON(!mutex_is_locked(&dev->mode_config.mutex)); + BUG_ON(!mutex_is_locked(&dev->mode_config.connection_mutex)); BUG_ON(!overlay); ret = intel_overlay_release_old_vid(overlay); @@ -793,7 +793,7 @@ int intel_overlay_switch_off(struct intel_overlay *overlay) int ret; BUG_ON(!mutex_is_locked(&dev->struct_mutex)); - BUG_ON(!mutex_is_locked(&dev->mode_config.mutex)); + BUG_ON(!mutex_is_locked(&dev->mode_config.connection_mutex)); ret = intel_overlay_recover_from_interrupt(overlay); if (ret != 0) diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 3fb3f939dca5..d4d415665475 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -876,12 +876,12 @@ static int intel_backlight_device_update_status(struct backlight_device *bd) struct intel_connector *connector = bl_get_data(bd); struct drm_device *dev = connector->base.dev; - mutex_lock(&dev->mode_config.mutex); + mutex_lock(&dev->mode_config.connection_mutex); DRM_DEBUG_KMS("updating intel_backlight, brightness=%d/%d\n", bd->props.brightness, bd->props.max_brightness); intel_panel_set_backlight(connector, bd->props.brightness, bd->props.max_brightness); - mutex_unlock(&dev->mode_config.mutex); + mutex_unlock(&dev->mode_config.connection_mutex); return 0; } @@ -893,9 +893,9 @@ static int intel_backlight_device_get_brightness(struct backlight_device *bd) int ret; intel_runtime_pm_get(dev_priv); - mutex_lock(&dev->mode_config.mutex); + mutex_lock(&dev->mode_config.connection_mutex); ret = intel_panel_get_backlight(connector); - mutex_unlock(&dev->mode_config.mutex); + mutex_unlock(&dev->mode_config.connection_mutex); intel_runtime_pm_put(dev_priv); return ret; diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c index 8b019602ffe6..2a5cacdc344b 100644 --- a/drivers/gpu/drm/omapdrm/omap_fb.c +++ b/drivers/gpu/drm/omapdrm/omap_fb.c @@ -346,6 +346,7 @@ void omap_framebuffer_flush(struct drm_framebuffer *fb, VERB("flush: %d,%d %dx%d, fb=%p", x, y, w, h, fb); + /* FIXME: This is racy - no protection against modeset config changes. */ while ((connector = omap_framebuffer_get_next_connector(fb, connector))) { /* only consider connectors that are part of a chain */ if (connector->encoder && connector->encoder->crtc) { diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 31d4cf60bae3..4522f7dce653 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -48,6 +48,7 @@ void radeon_connector_hotplug(struct drm_connector *connector) radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd); /* if the connector is already off, don't turn it back on */ + /* FIXME: This access isn't protected by any locks. */ if (connector->dpms != DRM_MODE_DPMS_ON) return; diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 849cfdccff09..6c295df7b0df 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -738,6 +738,7 @@ struct drm_mode_group { */ struct drm_mode_config { struct mutex mutex; /* protects configuration (mode lists etc.) */ + struct mutex connection_mutex; /* protects connector->encoder and encoder->crtc links */ struct mutex idr_mutex; /* for IDR management */ struct idr crtc_idr; /* use this idr for all IDs, fb, crtc, connector, modes - just makes life easier */ /* this is limited to one for now */ -- cgit v1.2.3-71-gd317 From bfec07d0f8ed78b10df3ca3bc23e27de1166ea45 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 28 May 2014 08:56:09 +0800 Subject: f2fs: avoid overflow when large directory feathure is enabled When large directory feathure is enable, We have one case which could cause overflow in dir_buckets() as following: special case: level + dir_level >= 32 and level < MAX_DIR_HASH_DEPTH / 2. Here we define MAX_DIR_BUCKETS to limit the return value when the condition could trigger potential overflow. Changes from V1 o modify description of calculation in f2fs.txt suggested by Changman Lee. Suggested-by: Changman Lee Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 8 ++++---- fs/f2fs/dir.c | 4 ++-- include/linux/f2fs_fs.h | 3 +++ 3 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 25311e113e75..51afba17bbae 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -461,11 +461,11 @@ The number of blocks and buckets are determined by, # of blocks in level #n = | `- 4, Otherwise - ,- 2^ (n + dir_level), - | if n < MAX_DIR_HASH_DEPTH / 2, + ,- 2^(n + dir_level), + | if n + dir_level < MAX_DIR_HASH_DEPTH / 2, # of buckets in level #n = | - `- 2^((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1), - Otherwise + `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), + Otherwise When F2FS finds a file name in a directory, at first a hash value of the file name is calculated. Then, F2FS scans the hash table in level #0 to find the diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index c3f148555c37..966acb039e3b 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -23,10 +23,10 @@ static unsigned long dir_blocks(struct inode *inode) static unsigned int dir_buckets(unsigned int level, int dir_level) { - if (level < MAX_DIR_HASH_DEPTH / 2) + if (level + dir_level < MAX_DIR_HASH_DEPTH / 2) return 1 << (level + dir_level); else - return 1 << ((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1); + return MAX_DIR_BUCKETS; } static unsigned int bucket_blocks(unsigned int level) diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 8c03f71307c6..ba6f3127738f 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -394,6 +394,9 @@ typedef __le32 f2fs_hash_t; /* MAX level for dir lookup */ #define MAX_DIR_HASH_DEPTH 63 +/* MAX buckets in one level of dir */ +#define MAX_DIR_BUCKETS (1 << ((MAX_DIR_HASH_DEPTH / 2) - 1)) + #define SIZE_OF_DIR_ENTRY 11 /* by byte */ #define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ BITS_PER_BYTE) -- cgit v1.2.3-71-gd317 From 1dbe4152168d44fa164edbdc9f1243de70b98f7a Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Mon, 12 May 2014 12:27:43 +0900 Subject: f2fs: large volume support f2fs's cp has one page which consists of struct f2fs_checkpoint and version bitmap of sit and nat. To support lots of segments, we need more blocks for sit bitmap. So let's arrange sit bitmap as following: +-----------------+------------+ | f2fs_checkpoint | sit bitmap | | + nat bitmap | | +-----------------+------------+ 0 4k N blocks Signed-off-by: Changman Lee [Jaegeuk Kim: simple code change for readability] Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 45 ++++++++++++++++++++++++++++++++++++++++----- fs/f2fs/f2fs.h | 13 +++++++++++-- include/linux/f2fs_fs.h | 2 ++ 3 files changed, 53 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index fe968c7bfc90..ecba8da3308b 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -371,7 +371,9 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi) return; sbi->por_doing = true; - start_blk = __start_cp_addr(sbi) + 1; + + start_blk = __start_cp_addr(sbi) + 1 + + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); orphan_blkaddr = __start_sum_addr(sbi) - 1; ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP); @@ -512,8 +514,11 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) unsigned long blk_size = sbi->blocksize; unsigned long long cp1_version = 0, cp2_version = 0; unsigned long long cp_start_blk_no; + unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); + block_t cp_blk_no; + int i; - sbi->ckpt = kzalloc(blk_size, GFP_KERNEL); + sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL); if (!sbi->ckpt) return -ENOMEM; /* @@ -544,6 +549,23 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) cp_block = (struct f2fs_checkpoint *)page_address(cur_page); memcpy(sbi->ckpt, cp_block, blk_size); + if (cp_blks <= 1) + goto done; + + cp_blk_no = le32_to_cpu(fsb->cp_blkaddr); + if (cur_page == cp2) + cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg); + + for (i = 1; i < cp_blks; i++) { + void *sit_bitmap_ptr; + unsigned char *ckpt = (unsigned char *)sbi->ckpt; + + cur_page = get_meta_page(sbi, cp_blk_no + i); + sit_bitmap_ptr = page_address(cur_page); + memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size); + f2fs_put_page(cur_page, 1); + } +done: f2fs_put_page(cp1, 1); f2fs_put_page(cp2, 1); return 0; @@ -736,6 +758,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) __u32 crc32 = 0; void *kaddr; int i; + int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); /* * This avoids to conduct wrong roll-forward operations and uses @@ -786,16 +809,19 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1) / F2FS_ORPHANS_PER_BLOCK; - ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks); + ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + + orphan_blocks); if (is_umount) { set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); ckpt->cp_pack_total_block_count = cpu_to_le32(2 + - data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE); + cp_payload_blks + data_sum_blocks + + orphan_blocks + NR_CURSEG_NODE_TYPE); } else { clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); ckpt->cp_pack_total_block_count = cpu_to_le32(2 + - data_sum_blocks + orphan_blocks); + cp_payload_blks + data_sum_blocks + + orphan_blocks); } if (sbi->n_orphans) @@ -821,6 +847,15 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) set_page_dirty(cp_page); f2fs_put_page(cp_page, 1); + for (i = 1; i < 1 + cp_payload_blks; i++) { + cp_page = grab_meta_page(sbi, start_blk++); + kaddr = page_address(cp_page); + memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE, + (1 << sbi->log_blocksize)); + set_page_dirty(cp_page); + f2fs_put_page(cp_page, 1); + } + if (sbi->n_orphans) { write_orphan_inodes(sbi, start_blk); start_blk += orphan_blocks; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 676a2c6ccec7..9684b1f77a7d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -764,9 +764,18 @@ static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - int offset = (flag == NAT_BITMAP) ? + int offset; + + if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload) > 0) { + if (flag == NAT_BITMAP) + return &ckpt->sit_nat_version_bitmap; + else + return ((unsigned char *)ckpt + F2FS_BLKSIZE); + } else { + offset = (flag == NAT_BITMAP) ? le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0; - return &ckpt->sit_nat_version_bitmap + offset; + return &ckpt->sit_nat_version_bitmap + offset; + } } static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index ba6f3127738f..6ff0b0b42d47 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -19,6 +19,7 @@ #define F2FS_LOG_SECTORS_PER_BLOCK 3 /* 4KB: F2FS_BLKSIZE */ #define F2FS_BLKSIZE 4096 /* support only 4KB block */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ +#define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE) #define NULL_ADDR ((block_t)0) /* used as block_t addresses */ #define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ @@ -75,6 +76,7 @@ struct f2fs_super_block { __le16 volume_name[512]; /* volume name */ __le32 extension_count; /* # of extensions below */ __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ + __le32 cp_payload; } __packed; /* -- cgit v1.2.3-71-gd317 From 39b9004d1f626b88b775c7655d3f286e135dfec6 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 30 Sep 2013 16:13:39 +0200 Subject: gpu: ipu-v3: Move i.MX IPUv3 core driver out of staging The i.MX Image Processing Unit (IPU) contains a number of image processing blocks that sit right in the middle between DRM and V4L2. Some of the modules, such as Display Controller, Processor, and Interface (DC, DP, DI) or CMOS Sensor Interface (CSI) and their FIFOs could be assigned to either framework, but others, such as the dma controller (IDMAC) and image converter (IC) can be used by both. The IPUv3 core driver provides an internal API to access the modules, to be used by both DRM and V4L2 IPUv3 drivers. Signed-off-by: Lucas Stach Signed-off-by: Philipp Zabel Acked-by: Greg Kroah-Hartman --- drivers/gpu/Makefile | 1 + drivers/gpu/ipu-v3/Kconfig | 7 + drivers/gpu/ipu-v3/Makefile | 3 + drivers/gpu/ipu-v3/ipu-common.c | 1261 +++++++++++++++++++++++++++ drivers/gpu/ipu-v3/ipu-dc.c | 411 +++++++++ drivers/gpu/ipu-v3/ipu-di.c | 730 ++++++++++++++++ drivers/gpu/ipu-v3/ipu-dmfc.c | 415 +++++++++ drivers/gpu/ipu-v3/ipu-dp.c | 338 +++++++ drivers/gpu/ipu-v3/ipu-prv.h | 206 +++++ drivers/staging/imx-drm/Kconfig | 11 +- drivers/staging/imx-drm/Makefile | 1 - drivers/staging/imx-drm/imx-hdmi.c | 2 +- drivers/staging/imx-drm/imx-tve.c | 2 +- drivers/staging/imx-drm/ipu-v3/Makefile | 3 - drivers/staging/imx-drm/ipu-v3/imx-ipu-v3.h | 326 ------- drivers/staging/imx-drm/ipu-v3/ipu-common.c | 1261 --------------------------- drivers/staging/imx-drm/ipu-v3/ipu-dc.c | 412 --------- drivers/staging/imx-drm/ipu-v3/ipu-di.c | 730 ---------------- drivers/staging/imx-drm/ipu-v3/ipu-dmfc.c | 415 --------- drivers/staging/imx-drm/ipu-v3/ipu-dp.c | 338 ------- drivers/staging/imx-drm/ipu-v3/ipu-prv.h | 206 ----- drivers/staging/imx-drm/ipuv3-crtc.c | 2 +- drivers/staging/imx-drm/ipuv3-plane.c | 2 +- drivers/video/Kconfig | 1 + include/video/imx-ipu-v3.h | 326 +++++++ 25 files changed, 3704 insertions(+), 3706 deletions(-) create mode 100644 drivers/gpu/ipu-v3/Kconfig create mode 100644 drivers/gpu/ipu-v3/Makefile create mode 100644 drivers/gpu/ipu-v3/ipu-common.c create mode 100644 drivers/gpu/ipu-v3/ipu-dc.c create mode 100644 drivers/gpu/ipu-v3/ipu-di.c create mode 100644 drivers/gpu/ipu-v3/ipu-dmfc.c create mode 100644 drivers/gpu/ipu-v3/ipu-dp.c create mode 100644 drivers/gpu/ipu-v3/ipu-prv.h delete mode 100644 drivers/staging/imx-drm/ipu-v3/Makefile delete mode 100644 drivers/staging/imx-drm/ipu-v3/imx-ipu-v3.h delete mode 100644 drivers/staging/imx-drm/ipu-v3/ipu-common.c delete mode 100644 drivers/staging/imx-drm/ipu-v3/ipu-dc.c delete mode 100644 drivers/staging/imx-drm/ipu-v3/ipu-di.c delete mode 100644 drivers/staging/imx-drm/ipu-v3/ipu-dmfc.c delete mode 100644 drivers/staging/imx-drm/ipu-v3/ipu-dp.c delete mode 100644 drivers/staging/imx-drm/ipu-v3/ipu-prv.h create mode 100644 include/video/imx-ipu-v3.h (limited to 'include') diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile index d8a22c2a579d..70da9eb52a42 100644 --- a/drivers/gpu/Makefile +++ b/drivers/gpu/Makefile @@ -1,2 +1,3 @@ obj-y += drm/ vga/ obj-$(CONFIG_TEGRA_HOST1X) += host1x/ +obj-$(CONFIG_IMX_IPUV3_CORE) += ipu-v3/ diff --git a/drivers/gpu/ipu-v3/Kconfig b/drivers/gpu/ipu-v3/Kconfig new file mode 100644 index 000000000000..2f228a2f2a48 --- /dev/null +++ b/drivers/gpu/ipu-v3/Kconfig @@ -0,0 +1,7 @@ +config IMX_IPUV3_CORE + tristate "IPUv3 core support" + depends on SOC_IMX5 || SOC_IMX6Q || SOC_IMX6SL || ARCH_MULTIPLATFORM + depends on RESET_CONTROLLER + help + Choose this if you have a i.MX5/6 system and want to use the Image + Processing Unit. This option only enables IPU base support. diff --git a/drivers/gpu/ipu-v3/Makefile b/drivers/gpu/ipu-v3/Makefile new file mode 100644 index 000000000000..d21cc37d0498 --- /dev/null +++ b/drivers/gpu/ipu-v3/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_IMX_IPUV3_CORE) += imx-ipu-v3.o + +imx-ipu-v3-objs := ipu-common.o ipu-dc.o ipu-di.o ipu-dp.o ipu-dmfc.o diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c new file mode 100644 index 000000000000..7e1f614e7fbc --- /dev/null +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -0,0 +1,1261 @@ +/* + * Copyright (c) 2010 Sascha Hauer + * Copyright (C) 2005-2009 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include