From 4fe6a946823a9bc8619fd16b7ea7d15914a30f22 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Sep 2021 21:51:01 +0100 Subject: afs: Try to avoid taking RCU read lock when checking vnode validity Try to avoid taking the RCU read lock when checking the validity of a vnode's callback state. The only thing it's needed for is to pin the parent volume's server list whilst we search it to find the record of the server we're currently using to see if it has been reinitialised (ie. it sent us a CB.InitCallBackState* RPC). Do this by the following means: (1) Keep an additional per-cell counter (fs_s_break) that's incremented each time any of the fileservers in the cell reinitialises. Since the new counter can be accessed without RCU from the vnode, we can check that first - and only if it differs, get the RCU read lock and check the volume's server list. (2) Replace afs_get_s_break_rcu() with afs_check_server_good() which now indicates whether the callback promise is still expected to be present on the server. This does the checks as described in (1). (3) Restructure afs_check_validity() to take account of the change in (2). We can also get rid of the valid variable and just use the need_clear variable with the addition of the afs_cb_break_no_promise reason. (4) afs_check_validity() probably shouldn't be altering vnode->cb_v_break and vnode->cb_s_break when it doesn't have cb_lock exclusively locked. Move the change to vnode->cb_v_break to __afs_break_callback(). Delegate the change to vnode->cb_s_break to afs_select_fileserver() and set vnode->cb_fs_s_break there also. (5) afs_validate() no longer needs to get the RCU read lock around its call to afs_check_validity() - and can skip the call entirely if we don't have a promise. Signed-off-by: David Howells Tested-by: Markus Suvanto cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/163111669583.283156.1397603105683094563.stgit@warthog.procyon.org.uk/ --- include/trace/events/afs.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 9f73ed2cf061..bca73e8c8cde 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -306,11 +306,13 @@ enum afs_flock_operation { enum afs_cb_break_reason { afs_cb_break_no_break, + afs_cb_break_no_promise, afs_cb_break_for_callback, afs_cb_break_for_deleted, afs_cb_break_for_lapsed, + afs_cb_break_for_s_reinit, afs_cb_break_for_unlink, - afs_cb_break_for_vsbreak, + afs_cb_break_for_v_break, afs_cb_break_for_volume_callback, afs_cb_break_for_zap, }; @@ -602,11 +604,13 @@ enum afs_cb_break_reason { #define afs_cb_break_reasons \ EM(afs_cb_break_no_break, "no-break") \ + EM(afs_cb_break_no_promise, "no-promise") \ EM(afs_cb_break_for_callback, "break-cb") \ EM(afs_cb_break_for_deleted, "break-del") \ EM(afs_cb_break_for_lapsed, "break-lapsed") \ + EM(afs_cb_break_for_s_reinit, "s-reinit") \ EM(afs_cb_break_for_unlink, "break-unlink") \ - EM(afs_cb_break_for_vsbreak, "break-vs") \ + EM(afs_cb_break_for_v_break, "break-v") \ EM(afs_cb_break_for_volume_callback, "break-v-cb") \ E_(afs_cb_break_for_zap, "break-zap") -- cgit v1.2.3-71-gd317 From 80f0a1f99983296be587325004acf72dd11eccd8 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Mon, 13 Sep 2021 12:02:56 +0200 Subject: workqueue: annotate alloc_workqueue() as printf This also enables checking of allows alloc_ordered_workqueue(). Signed-off-by: Rolf Eike Beer Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 2ebef6b1a3d6..74d3c1efd9bb 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -399,9 +399,8 @@ extern struct workqueue_struct *system_freezable_power_efficient_wq; * RETURNS: * Pointer to the allocated workqueue on success, %NULL on failure. */ -struct workqueue_struct *alloc_workqueue(const char *fmt, - unsigned int flags, - int max_active, ...); +__printf(1, 4) struct workqueue_struct * +alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...); /** * alloc_ordered_workqueue - allocate an ordered workqueue -- cgit v1.2.3-71-gd317 From 099dd788e31b4f426ef49c2785069804925a84e1 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 13 Sep 2021 14:51:10 -0500 Subject: cifs: remove pathname for file from SPDX header checkpatch complains about source files with filenames (e.g. in these cases just below the SPDX header in comments at the top of various files in fs/cifs). It also is helpful to change this now so will be less confusing when the parent directory is renamed e.g. from fs/cifs to fs/smb_client (or fs/smbfs) Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/cache.c | 2 +- fs/cifs/cifs_debug.c | 1 - fs/cifs/cifs_fs_sb.h | 1 - fs/cifs/cifs_ioctl.h | 1 - fs/cifs/cifs_spnego.c | 2 +- fs/cifs/cifs_spnego.h | 2 +- fs/cifs/cifs_unicode.c | 1 - fs/cifs/cifsacl.c | 1 - fs/cifs/cifsacl.h | 1 - fs/cifs/cifsencrypt.c | 1 - fs/cifs/cifsfs.c | 1 - fs/cifs/cifsfs.h | 1 - fs/cifs/cifsglob.h | 1 - fs/cifs/cifspdu.h | 1 - fs/cifs/cifsproto.h | 1 - fs/cifs/cifssmb.c | 1 - fs/cifs/connect.c | 1 - fs/cifs/dir.c | 1 - fs/cifs/dns_resolve.c | 1 - fs/cifs/dns_resolve.h | 4 ++-- fs/cifs/export.c | 1 - fs/cifs/file.c | 1 - fs/cifs/fscache.c | 2 +- fs/cifs/fscache.h | 2 +- fs/cifs/inode.c | 1 - fs/cifs/ioctl.c | 1 - fs/cifs/link.c | 1 - fs/cifs/misc.c | 1 - fs/cifs/netmisc.c | 1 - fs/cifs/ntlmssp.h | 1 - fs/cifs/readdir.c | 1 - fs/cifs/rfc1002pdu.h | 1 - fs/cifs/sess.c | 1 - fs/cifs/smb2file.c | 1 - fs/cifs/smb2glob.h | 1 - fs/cifs/smb2inode.c | 1 - fs/cifs/smb2misc.c | 1 - fs/cifs/smb2pdu.c | 1 - fs/cifs/smb2pdu.h | 1 - fs/cifs/smb2proto.h | 1 - fs/cifs/smb2status.h | 1 - fs/cifs/smb2transport.c | 1 - fs/cifs/smberr.h | 1 - fs/cifs/transport.c | 1 - fs/cifs/winucase.c | 1 - fs/cifs/xattr.c | 1 - fs/smbfs_common/smbfsctl.h | 2 +- include/uapi/linux/cifs/cifs_mount.h | 1 - 48 files changed, 8 insertions(+), 49 deletions(-) (limited to 'include') diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c index 8a3b30ec860c..8be57aaedab6 100644 --- a/fs/cifs/cache.c +++ b/fs/cifs/cache.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/cache.c - CIFS filesystem cache index structure definitions + * CIFS filesystem cache index structure definitions * * Copyright (c) 2010 Novell, Inc. * Authors(s): Suresh Jayaraman (sjayaraman@suse.de> diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 51a824fc926a..de2c12bcfa4b 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * fs/cifs_debug.c * * Copyright (C) International Business Machines Corp., 2000,2005 * diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 4fd788586399..f97407520ea1 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/cifs_fs_sb.h * * Copyright (c) International Business Machines Corp., 2002,2004 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/cifs_ioctl.h b/fs/cifs/cifs_ioctl.h index ef723be358af..b87cbbe6d2d4 100644 --- a/fs/cifs/cifs_ioctl.h +++ b/fs/cifs/cifs_ioctl.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/cifs_ioctl.h * * Structure definitions for io control for cifs/smb3 * diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 8fa26a8530f8..353bd0dd7026 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/cifs_spnego.c -- SPNEGO upcall management for CIFS + * SPNEGO upcall management for CIFS * * Copyright (c) 2007 Red Hat, Inc. * Author(s): Jeff Layton (jlayton@redhat.com) diff --git a/fs/cifs/cifs_spnego.h b/fs/cifs/cifs_spnego.h index 31387d0ea32e..e6a0451877d4 100644 --- a/fs/cifs/cifs_spnego.h +++ b/fs/cifs/cifs_spnego.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/cifs_spnego.h -- SPNEGO upcall management for CIFS + * SPNEGO upcall management for CIFS * * Copyright (c) 2007 Red Hat, Inc. * Author(s): Jeff Layton (jlayton@redhat.com) diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 171ad8b42107..e7582dd79179 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * fs/cifs/cifs_unicode.c * * Copyright (c) International Business Machines Corp., 2000,2009 * Modified by Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 388eb536cff1..ee3aab3dd4ac 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/cifsacl.c * * Copyright (C) International Business Machines Corp., 2007,2008 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h index f8292bcf8594..ccbfc754bd3c 100644 --- a/fs/cifs/cifsacl.h +++ b/fs/cifs/cifsacl.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/cifsacl.h * * Copyright (c) International Business Machines Corp., 2007 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 2e6f40344037..d118282071b3 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/cifsencrypt.c * * Encryption and hashing operations relating to NTLM, NTLMv2. See MS-NLMP * for more detailed information diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 8c20bfa187ac..9fa930dfd78d 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/cifsfs.c * * Copyright (C) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index d25a4099b32e..b50da1901ebd 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/cifsfs.h * * Copyright (c) International Business Machines Corp., 2002, 2007 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index c068f7d8d879..7dd9878e26bf 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/cifsglob.h * * Copyright (C) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 98e8e5aa0613..d2ff438fd31f 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/cifspdu.h * * Copyright (c) International Business Machines Corp., 2002,2009 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index f9740c21ca3d..54966c1a8eb6 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/cifsproto.h * * Copyright (c) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index a8e41c1e80ca..243d17696f06 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/cifssmb.c * * Copyright (C) International Business Machines Corp., 2002,2010 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0db344807ef1..399a0f084573 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/connect.c * * Copyright (C) International Business Machines Corp., 2002,2011 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 5f8a302ffcb2..6e8e7cc26ae2 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/dir.c * * vfs operations that deal with dentries * diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index 8c616aaeb7c4..0458d28d71aa 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/dns_resolve.c * * Copyright (c) 2007 Igor Mammedov * Author(s): Igor Mammedov (niallain@gmail.com) diff --git a/fs/cifs/dns_resolve.h b/fs/cifs/dns_resolve.h index 9fa2807ef79e..afc0df381246 100644 --- a/fs/cifs/dns_resolve.h +++ b/fs/cifs/dns_resolve.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/dns_resolve.h -- DNS Resolver upcall management for CIFS DFS - * Handles host name to IP address resolution + * DNS Resolver upcall management for CIFS DFS + * Handles host name to IP address resolution * * Copyright (c) International Business Machines Corp., 2008 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/export.c b/fs/cifs/export.c index 747a540db954..37c28415df1e 100644 --- a/fs/cifs/export.c +++ b/fs/cifs/export.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/export.c * * Copyright (C) International Business Machines Corp., 2007 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index d0216472f1c6..4d10c9343890 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/file.c * * vfs operations that deal with files * diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index fab47fa7df74..8eedd20c44ab 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/fscache.c - CIFS filesystem cache interface + * CIFS filesystem cache interface * * Copyright (c) 2010 Novell, Inc. * Author(s): Suresh Jayaraman diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h index 82e856b9cf89..9baa1d0f22bd 100644 --- a/fs/cifs/fscache.h +++ b/fs/cifs/fscache.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/fscache.h - CIFS filesystem cache interface definitions + * CIFS filesystem cache interface definitions * * Copyright (c) 2010 Novell, Inc. * Authors(s): Suresh Jayaraman (sjayaraman@suse.de> diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 50c01cff4c84..19af2d0ec8d5 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/inode.c * * Copyright (C) International Business Machines Corp., 2002,2010 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 42c6a0bac6c8..44ae99454fb3 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/ioctl.c * * vfs operations that deal with io control * diff --git a/fs/cifs/link.c b/fs/cifs/link.c index f0a6d63bc08c..852e54ee82c2 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/link.c * * Copyright (C) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 9469f1cf0b46..f4313935e734 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/misc.c * * Copyright (C) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 0e728aac67e9..fa9fbd6a819c 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * fs/cifs/netmisc.c * * Copyright (c) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h index 378133ce8869..25a2b8ef88b9 100644 --- a/fs/cifs/ntlmssp.h +++ b/fs/cifs/ntlmssp.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/ntlmssp.h * * Copyright (c) International Business Machines Corp., 2002,2007 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 54d77c99e21c..1929e80c09ee 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/readdir.c * * Directory search handling * diff --git a/fs/cifs/rfc1002pdu.h b/fs/cifs/rfc1002pdu.h index 137f7c95afd6..ae1d025da294 100644 --- a/fs/cifs/rfc1002pdu.h +++ b/fs/cifs/rfc1002pdu.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/rfc1002pdu.h * * Protocol Data Unit definitions for RFC 1001/1002 support * diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 118403fbeda2..23e02db7923f 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/sess.c * * SMB/CIFS session setup handling routines * diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index c9d8a50062b8..f5dcc4940b6d 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/smb2file.c * * Copyright (C) International Business Machines Corp., 2002, 2011 * Author(s): Steve French (sfrench@us.ibm.com), diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h index d0e9f3782bd9..ca692b2283cd 100644 --- a/fs/cifs/smb2glob.h +++ b/fs/cifs/smb2glob.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/smb2glob.h * * Definitions for various global variables and structures * diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 957b2594f02e..8297703492ee 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/smb2inode.c * * Copyright (C) International Business Machines Corp., 2002, 2011 * Etersoft, 2012 diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 668f77108831..29b5554f6263 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/smb2misc.c * * Copyright (C) International Business Machines Corp., 2002,2011 * Etersoft, 2012 diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index b6d2e3591927..672ae78e866a 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/smb2pdu.c * * Copyright (C) International Business Machines Corp., 2009, 2013 * Etersoft, 2012 diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index e9cac7970b66..f32c99c9ba13 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/smb2pdu.h * * Copyright (c) International Business Machines Corp., 2009, 2013 * Etersoft, 2012 diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 263767f644f8..547945443fa7 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/smb2proto.h * * Copyright (c) International Business Machines Corp., 2002, 2011 * Etersoft, 2012 diff --git a/fs/cifs/smb2status.h b/fs/cifs/smb2status.h index 0215ef36e240..a9e958166fc5 100644 --- a/fs/cifs/smb2status.h +++ b/fs/cifs/smb2status.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/smb2status.h * * SMB2 Status code (network error) definitions * Definitions are from MS-ERREF diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 6f7952ea4941..f59b956f9d25 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/smb2transport.c * * Copyright (C) International Business Machines Corp., 2002, 2011 * Etersoft, 2012 diff --git a/fs/cifs/smberr.h b/fs/cifs/smberr.h index 60189efb3236..aeffdad829e2 100644 --- a/fs/cifs/smberr.h +++ b/fs/cifs/smberr.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/smberr.h * * Copyright (c) International Business Machines Corp., 2002,2004 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 75a95de320cf..b7379329b741 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/transport.c * * Copyright (C) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/winucase.c b/fs/cifs/winucase.c index 59b6c577aa0a..2f075b5b50df 100644 --- a/fs/cifs/winucase.c +++ b/fs/cifs/winucase.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * fs/cifs/winucase.c * * Copyright (c) Jeffrey Layton , 2013 * diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 9ed481e79ce0..7d8b72d67c80 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/xattr.c * * Copyright (c) International Business Machines Corp., 2003, 2007 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/smbfs_common/smbfsctl.h b/fs/smbfs_common/smbfsctl.h index d01e8c9d7a31..926f87cd6af0 100644 --- a/fs/smbfs_common/smbfsctl.h +++ b/fs/smbfs_common/smbfsctl.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: LGPL-2.1+ */ /* - * fs/cifs/smbfsctl.h: SMB, CIFS, SMB2 FSCTL definitions + * SMB, CIFS, SMB2 FSCTL definitions * * Copyright (c) International Business Machines Corp., 2002,2013 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/include/uapi/linux/cifs/cifs_mount.h b/include/uapi/linux/cifs/cifs_mount.h index 69829205fdb5..8e87d27b0951 100644 --- a/include/uapi/linux/cifs/cifs_mount.h +++ b/include/uapi/linux/cifs/cifs_mount.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1+ WITH Linux-syscall-note */ /* - * include/uapi/linux/cifs/cifs_mount.h * * Author(s): Scott Lovenberg (scott.lovenberg@gmail.com) * -- cgit v1.2.3-71-gd317 From b564171ade70570b7f335fa8ed17adb28409e3ac Mon Sep 17 00:00:00 2001 From: Li Li Date: Fri, 10 Sep 2021 09:42:10 -0700 Subject: binder: fix freeze race Currently cgroup freezer is used to freeze the application threads, and BINDER_FREEZE is used to freeze the corresponding binder interface. There's already a mechanism in ioctl(BINDER_FREEZE) to wait for any existing transactions to drain out before actually freezing the binder interface. But freezing an app requires 2 steps, freezing the binder interface with ioctl(BINDER_FREEZE) and then freezing the application main threads with cgroupfs. This is not an atomic operation. The following race issue might happen. 1) Binder interface is frozen by ioctl(BINDER_FREEZE); 2) Main thread A initiates a new sync binder transaction to process B; 3) Main thread A is frozen by "echo 1 > cgroup.freeze"; 4) The response from process B reaches the frozen thread, which will unexpectedly fail. This patch provides a mechanism to check if there's any new pending transaction happening between ioctl(BINDER_FREEZE) and freezing the main thread. If there's any, the main thread freezing operation can be rolled back to finish the pending transaction. Furthermore, the response might reach the binder driver before the rollback actually happens. That will still cause failed transaction. As the other process doesn't wait for another response of the response, the response transaction failure can be fixed by treating the response transaction like an oneway/async one, allowing it to reach the frozen thread. And it will be consumed when the thread gets unfrozen later. NOTE: This patch reuses the existing definition of struct binder_frozen_status_info but expands the bit assignments of __u32 member sync_recv. To ensure backward compatibility, bit 0 of sync_recv still indicates there's an outstanding sync binder transaction. This patch adds new information to bit 1 of sync_recv, indicating the binder transaction happens exactly when there's a race. If an existing userspace app runs on a new kernel, a sync binder call will set bit 0 of sync_recv so ioctl(BINDER_GET_FROZEN_INFO) still return the expected value (true). The app just doesn't check bit 1 intentionally so it doesn't have the ability to tell if there's a race. This behavior is aligned with what happens on an old kernel which doesn't set bit 1 at all. A new userspace app can 1) check bit 0 to know if there's a sync binder transaction happened when being frozen - same as before; and 2) check bit 1 to know if that sync binder transaction happened exactly when there's a race - a new information for rollback decision. the same time, confirmed the pending transactions succeeded. Fixes: 432ff1e91694 ("binder: BINDER_FREEZE ioctl") Acked-by: Todd Kjos Cc: stable Signed-off-by: Li Li Test: stress test with apps being frozen and initiating binder calls at Link: https://lore.kernel.org/r/20210910164210.2282716-2-dualli@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 35 +++++++++++++++++++++++++++++------ drivers/android/binder_internal.h | 2 ++ include/uapi/linux/android/binder.h | 7 +++++++ 3 files changed, 38 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d9030cb6b1e4..1a68c2f590cf 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -3038,9 +3038,8 @@ static void binder_transaction(struct binder_proc *proc, if (reply) { binder_enqueue_thread_work(thread, tcomplete); binder_inner_proc_lock(target_proc); - if (target_thread->is_dead || target_proc->is_frozen) { - return_error = target_thread->is_dead ? - BR_DEAD_REPLY : BR_FROZEN_REPLY; + if (target_thread->is_dead) { + return_error = BR_DEAD_REPLY; binder_inner_proc_unlock(target_proc); goto err_dead_proc_or_thread; } @@ -4648,6 +4647,22 @@ static int binder_ioctl_get_node_debug_info(struct binder_proc *proc, return 0; } +static bool binder_txns_pending_ilocked(struct binder_proc *proc) +{ + struct rb_node *n; + struct binder_thread *thread; + + if (proc->outstanding_txns > 0) + return true; + + for (n = rb_first(&proc->threads); n; n = rb_next(n)) { + thread = rb_entry(n, struct binder_thread, rb_node); + if (thread->transaction_stack) + return true; + } + return false; +} + static int binder_ioctl_freeze(struct binder_freeze_info *info, struct binder_proc *target_proc) { @@ -4679,8 +4694,13 @@ static int binder_ioctl_freeze(struct binder_freeze_info *info, (!target_proc->outstanding_txns), msecs_to_jiffies(info->timeout_ms)); - if (!ret && target_proc->outstanding_txns) - ret = -EAGAIN; + /* Check pending transactions that wait for reply */ + if (ret >= 0) { + binder_inner_proc_lock(target_proc); + if (binder_txns_pending_ilocked(target_proc)) + ret = -EAGAIN; + binder_inner_proc_unlock(target_proc); + } if (ret < 0) { binder_inner_proc_lock(target_proc); @@ -4696,6 +4716,7 @@ static int binder_ioctl_get_freezer_info( { struct binder_proc *target_proc; bool found = false; + __u32 txns_pending; info->sync_recv = 0; info->async_recv = 0; @@ -4705,7 +4726,9 @@ static int binder_ioctl_get_freezer_info( if (target_proc->pid == info->pid) { found = true; binder_inner_proc_lock(target_proc); - info->sync_recv |= target_proc->sync_recv; + txns_pending = binder_txns_pending_ilocked(target_proc); + info->sync_recv |= target_proc->sync_recv | + (txns_pending << 1); info->async_recv |= target_proc->async_recv; binder_inner_proc_unlock(target_proc); } diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index 810c0b84d3f8..402c4d4362a8 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -378,6 +378,8 @@ struct binder_ref { * binder transactions * (protected by @inner_lock) * @sync_recv: process received sync transactions since last frozen + * bit 0: received sync transaction after being frozen + * bit 1: new pending sync transaction during freezing * (protected by @inner_lock) * @async_recv: process received async transactions since last frozen * (protected by @inner_lock) diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 20e435fe657a..3246f2c74696 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -225,7 +225,14 @@ struct binder_freeze_info { struct binder_frozen_status_info { __u32 pid; + + /* process received sync transactions since last frozen + * bit 0: received sync transaction after being frozen + * bit 1: new pending sync transaction during freezing + */ __u32 sync_recv; + + /* process received async transactions since last frozen */ __u32 async_recv; }; -- cgit v1.2.3-71-gd317 From 7a8aa39d44564703620d937bb54cdea2d003657f Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 13 Sep 2021 17:05:51 +0100 Subject: nvmem: core: Add stubs for nvmem_cell_read_variable_le_u32/64 if !CONFIG_NVMEM When I added nvmem_cell_read_variable_le_u32() and nvmem_cell_read_variable_le_u64() I forgot to add the "static inline" stub functions for when CONFIG_NVMEM wasn't defined. Add them now. This was causing problems with randconfig builds that compiled `drivers/soc/qcom/cpr.c`. Fixes: 6feba6a62c57 ("PM: AVS: qcom-cpr: Use nvmem_cell_read_variable_le_u32()") Fixes: a28e824fb827 ("nvmem: core: Add functions to make number reading easy") Reported-by: kernel test robot Reviewed-by: Bjorn Andersson Signed-off-by: Douglas Anderson Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210913160551.12907-1-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/nvmem-consumer.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h index 923dada24eb4..c0c0cefc3b92 100644 --- a/include/linux/nvmem-consumer.h +++ b/include/linux/nvmem-consumer.h @@ -150,6 +150,20 @@ static inline int nvmem_cell_read_u64(struct device *dev, return -EOPNOTSUPP; } +static inline int nvmem_cell_read_variable_le_u32(struct device *dev, + const char *cell_id, + u32 *val) +{ + return -EOPNOTSUPP; +} + +static inline int nvmem_cell_read_variable_le_u64(struct device *dev, + const char *cell_id, + u64 *val) +{ + return -EOPNOTSUPP; +} + static inline struct nvmem_device *nvmem_device_get(struct device *dev, const char *name) { -- cgit v1.2.3-71-gd317 From 844f7eaaed9267ae17d33778efe65548cc940205 Mon Sep 17 00:00:00 2001 From: Eugene Syromiatnikov Date: Sun, 12 Sep 2021 14:22:34 +0200 Subject: include/uapi/linux/xfrm.h: Fix XFRM_MSG_MAPPING ABI breakage Commit 2d151d39073a ("xfrm: Add possibility to set the default to block if we have no policy") broke ABI by changing the value of the XFRM_MSG_MAPPING enum item, thus also evading the build-time check in security/selinux/nlmsgtab.c:selinux_nlmsg_lookup for presence of proper security permission checks in nlmsg_xfrm_perms. Fix it by placing XFRM_MSG_SETDEFAULT/XFRM_MSG_GETDEFAULT to the end of the enum, right before __XFRM_MSG_MAX, and updating the nlmsg_xfrm_perms accordingly. Fixes: 2d151d39073a ("xfrm: Add possibility to set the default to block if we have no policy") References: https://lore.kernel.org/netdev/20210901151402.GA2557@altlinux.org/ Signed-off-by: Eugene Syromiatnikov Acked-by: Antony Antony Acked-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 6 +++--- security/selinux/nlmsgtab.c | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index b96c1ea7166d..26f456b1f33e 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -213,13 +213,13 @@ enum { XFRM_MSG_GETSPDINFO, #define XFRM_MSG_GETSPDINFO XFRM_MSG_GETSPDINFO + XFRM_MSG_MAPPING, +#define XFRM_MSG_MAPPING XFRM_MSG_MAPPING + XFRM_MSG_SETDEFAULT, #define XFRM_MSG_SETDEFAULT XFRM_MSG_SETDEFAULT XFRM_MSG_GETDEFAULT, #define XFRM_MSG_GETDEFAULT XFRM_MSG_GETDEFAULT - - XFRM_MSG_MAPPING, -#define XFRM_MSG_MAPPING XFRM_MSG_MAPPING __XFRM_MSG_MAX }; #define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1) diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index d59276f48d4f..94ea2a8b2bb7 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c @@ -126,6 +126,8 @@ static const struct nlmsg_perm nlmsg_xfrm_perms[] = { XFRM_MSG_NEWSPDINFO, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_GETSPDINFO, NETLINK_XFRM_SOCKET__NLMSG_READ }, { XFRM_MSG_MAPPING, NETLINK_XFRM_SOCKET__NLMSG_READ }, + { XFRM_MSG_SETDEFAULT, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, + { XFRM_MSG_GETDEFAULT, NETLINK_XFRM_SOCKET__NLMSG_READ }, }; static const struct nlmsg_perm nlmsg_audit_perms[] = @@ -189,7 +191,7 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm) * structures at the top of this file with the new mappings * before updating the BUILD_BUG_ON() macro! */ - BUILD_BUG_ON(XFRM_MSG_MAX != XFRM_MSG_MAPPING); + BUILD_BUG_ON(XFRM_MSG_MAX != XFRM_MSG_GETDEFAULT); err = nlmsg_perm(nlmsg_type, perm, nlmsg_xfrm_perms, sizeof(nlmsg_xfrm_perms)); break; -- cgit v1.2.3-71-gd317 From 58877b0824da15698bd85a0a9dbfa8c354e6ecb7 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Thu, 9 Sep 2021 12:11:58 +0530 Subject: usb: core: hcd: Add support for deferring roothub registration It has been observed with certain PCIe USB cards (like Inateck connected to AM64 EVM or J7200 EVM) that as soon as the primary roothub is registered, port status change is handled even before xHC is running leading to cold plug USB devices not detected. For such cases, registering both the root hubs along with the second HCD is required. Add support for deferring roothub registration in usb_add_hcd(), so that both primary and secondary roothubs are registered along with the second HCD. CC: stable@vger.kernel.org # 5.4+ Suggested-by: Mathias Nyman Tested-by: Chris Chiu Acked-by: Alan Stern Signed-off-by: Kishon Vijay Abraham I Link: https://lore.kernel.org/r/20210909064200.16216-2-kishon@ti.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 29 +++++++++++++++++++++++------ include/linux/usb/hcd.h | 2 ++ 2 files changed, 25 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 0f8b7c93310e..99ff2d23be05 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -2775,6 +2775,7 @@ int usb_add_hcd(struct usb_hcd *hcd, { int retval; struct usb_device *rhdev; + struct usb_hcd *shared_hcd; if (!hcd->skip_phy_initialization && usb_hcd_is_primary_hcd(hcd)) { hcd->phy_roothub = usb_phy_roothub_alloc(hcd->self.sysdev); @@ -2935,13 +2936,26 @@ int usb_add_hcd(struct usb_hcd *hcd, goto err_hcd_driver_start; } + /* starting here, usbcore will pay attention to the shared HCD roothub */ + shared_hcd = hcd->shared_hcd; + if (!usb_hcd_is_primary_hcd(hcd) && shared_hcd && HCD_DEFER_RH_REGISTER(shared_hcd)) { + retval = register_root_hub(shared_hcd); + if (retval != 0) + goto err_register_root_hub; + + if (shared_hcd->uses_new_polling && HCD_POLL_RH(shared_hcd)) + usb_hcd_poll_rh_status(shared_hcd); + } + /* starting here, usbcore will pay attention to this root hub */ - retval = register_root_hub(hcd); - if (retval != 0) - goto err_register_root_hub; + if (!HCD_DEFER_RH_REGISTER(hcd)) { + retval = register_root_hub(hcd); + if (retval != 0) + goto err_register_root_hub; - if (hcd->uses_new_polling && HCD_POLL_RH(hcd)) - usb_hcd_poll_rh_status(hcd); + if (hcd->uses_new_polling && HCD_POLL_RH(hcd)) + usb_hcd_poll_rh_status(hcd); + } return retval; @@ -2985,6 +2999,7 @@ EXPORT_SYMBOL_GPL(usb_add_hcd); void usb_remove_hcd(struct usb_hcd *hcd) { struct usb_device *rhdev = hcd->self.root_hub; + bool rh_registered; dev_info(hcd->self.controller, "remove, state %x\n", hcd->state); @@ -2995,6 +3010,7 @@ void usb_remove_hcd(struct usb_hcd *hcd) dev_dbg(hcd->self.controller, "roothub graceful disconnect\n"); spin_lock_irq (&hcd_root_hub_lock); + rh_registered = hcd->rh_registered; hcd->rh_registered = 0; spin_unlock_irq (&hcd_root_hub_lock); @@ -3004,7 +3020,8 @@ void usb_remove_hcd(struct usb_hcd *hcd) cancel_work_sync(&hcd->died_work); mutex_lock(&usb_bus_idr_lock); - usb_disconnect(&rhdev); /* Sets rhdev to NULL */ + if (rh_registered) + usb_disconnect(&rhdev); /* Sets rhdev to NULL */ mutex_unlock(&usb_bus_idr_lock); /* diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 548a028f2dab..2c1fc9212cf2 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -124,6 +124,7 @@ struct usb_hcd { #define HCD_FLAG_RH_RUNNING 5 /* root hub is running? */ #define HCD_FLAG_DEAD 6 /* controller has died? */ #define HCD_FLAG_INTF_AUTHORIZED 7 /* authorize interfaces? */ +#define HCD_FLAG_DEFER_RH_REGISTER 8 /* Defer roothub registration */ /* The flags can be tested using these macros; they are likely to * be slightly faster than test_bit(). @@ -134,6 +135,7 @@ struct usb_hcd { #define HCD_WAKEUP_PENDING(hcd) ((hcd)->flags & (1U << HCD_FLAG_WAKEUP_PENDING)) #define HCD_RH_RUNNING(hcd) ((hcd)->flags & (1U << HCD_FLAG_RH_RUNNING)) #define HCD_DEAD(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEAD)) +#define HCD_DEFER_RH_REGISTER(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEFER_RH_REGISTER)) /* * Specifies if interfaces are authorized by default -- cgit v1.2.3-71-gd317 From 356ed64991c6847a0c4f2e8fa3b1133f7a14f1fc Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Tue, 14 Sep 2021 10:33:51 +0800 Subject: bpf: Handle return value of BPF_PROG_TYPE_STRUCT_OPS prog Currently if a function ptr in struct_ops has a return value, its caller will get a random return value from it, because the return value of related BPF_PROG_TYPE_STRUCT_OPS prog is just dropped. So adding a new flag BPF_TRAMP_F_RET_FENTRY_RET to tell bpf trampoline to save and return the return value of struct_ops prog if ret_size of the function ptr is greater than 0. Also restricting the flag to be used alone. Fixes: 85d33df357b6 ("bpf: Introduce BPF_MAP_TYPE_STRUCT_OPS") Signed-off-by: Hou Tao Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210914023351.3664499-1-houtao1@huawei.com --- arch/x86/net/bpf_jit_comp.c | 53 ++++++++++++++++++++++++++++++++++----------- include/linux/bpf.h | 3 ++- kernel/bpf/bpf_struct_ops.c | 7 ++++-- 3 files changed, 47 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 0fe6aacef3db..d24a512fd6f3 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1744,7 +1744,7 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args, } static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, - struct bpf_prog *p, int stack_size, bool mod_ret) + struct bpf_prog *p, int stack_size, bool save_ret) { u8 *prog = *pprog; u8 *jmp_insn; @@ -1777,11 +1777,15 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, if (emit_call(&prog, p->bpf_func, prog)) return -EINVAL; - /* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return + /* + * BPF_TRAMP_MODIFY_RETURN trampolines can modify the return * of the previous call which is then passed on the stack to * the next BPF program. + * + * BPF_TRAMP_FENTRY trampoline may need to return the return + * value of BPF_PROG_TYPE_STRUCT_OPS prog. */ - if (mod_ret) + if (save_ret) emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); /* replace 2 nops with JE insn, since jmp target is known */ @@ -1828,13 +1832,15 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) } static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, - struct bpf_tramp_progs *tp, int stack_size) + struct bpf_tramp_progs *tp, int stack_size, + bool save_ret) { int i; u8 *prog = *pprog; for (i = 0; i < tp->nr_progs; i++) { - if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false)) + if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, + save_ret)) return -EINVAL; } *pprog = prog; @@ -1877,6 +1883,23 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, return 0; } +static bool is_valid_bpf_tramp_flags(unsigned int flags) +{ + if ((flags & BPF_TRAMP_F_RESTORE_REGS) && + (flags & BPF_TRAMP_F_SKIP_FRAME)) + return false; + + /* + * BPF_TRAMP_F_RET_FENTRY_RET is only used by bpf_struct_ops, + * and it must be used alone. + */ + if ((flags & BPF_TRAMP_F_RET_FENTRY_RET) && + (flags & ~BPF_TRAMP_F_RET_FENTRY_RET)) + return false; + + return true; +} + /* Example: * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); * its 'struct btf_func_model' will be nr_args=2 @@ -1949,17 +1972,19 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN]; u8 **branches = NULL; u8 *prog; + bool save_ret; /* x86-64 supports up to 6 arguments. 7+ can be added in the future */ if (nr_args > 6) return -ENOTSUPP; - if ((flags & BPF_TRAMP_F_RESTORE_REGS) && - (flags & BPF_TRAMP_F_SKIP_FRAME)) + if (!is_valid_bpf_tramp_flags(flags)) return -EINVAL; - if (flags & BPF_TRAMP_F_CALL_ORIG) - stack_size += 8; /* room for return value of orig_call */ + /* room for return value of orig_call or fentry prog */ + save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); + if (save_ret) + stack_size += 8; if (flags & BPF_TRAMP_F_IP_ARG) stack_size += 8; /* room for IP address argument */ @@ -2005,7 +2030,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i } if (fentry->nr_progs) - if (invoke_bpf(m, &prog, fentry, stack_size)) + if (invoke_bpf(m, &prog, fentry, stack_size, + flags & BPF_TRAMP_F_RET_FENTRY_RET)) return -EINVAL; if (fmod_ret->nr_progs) { @@ -2052,7 +2078,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i } if (fexit->nr_progs) - if (invoke_bpf(m, &prog, fexit, stack_size)) { + if (invoke_bpf(m, &prog, fexit, stack_size, false)) { ret = -EINVAL; goto cleanup; } @@ -2072,9 +2098,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i ret = -EINVAL; goto cleanup; } - /* restore original return value back into RAX */ - emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); } + /* restore return value of orig_call or fentry prog back into RAX */ + if (save_ret) + emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); EMIT1(0x5B); /* pop rbx */ EMIT1(0xC9); /* leave */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f4c16f19f83e..020a7d5bf470 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -578,11 +578,12 @@ struct btf_func_model { * programs only. Should not be used with normal calls and indirect calls. */ #define BPF_TRAMP_F_SKIP_FRAME BIT(2) - /* Store IP address of the caller on the trampoline stack, * so it's available for trampoline's programs. */ #define BPF_TRAMP_F_IP_ARG BIT(3) +/* Return the return value of fentry prog. Only used by bpf_struct_ops. */ +#define BPF_TRAMP_F_RET_FENTRY_RET BIT(4) /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index d6731c32864e..9abcc33f02cf 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -368,6 +368,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, const struct btf_type *mtype, *ptype; struct bpf_prog *prog; u32 moff; + u32 flags; moff = btf_member_bit_offset(t, member) / 8; ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL); @@ -431,10 +432,12 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, tprogs[BPF_TRAMP_FENTRY].progs[0] = prog; tprogs[BPF_TRAMP_FENTRY].nr_progs = 1; + flags = st_ops->func_models[i].ret_size > 0 ? + BPF_TRAMP_F_RET_FENTRY_RET : 0; err = arch_prepare_bpf_trampoline(NULL, image, st_map->image + PAGE_SIZE, - &st_ops->func_models[i], 0, - tprogs, NULL); + &st_ops->func_models[i], + flags, tprogs, NULL); if (err < 0) goto reset_unlock; -- cgit v1.2.3-71-gd317 From f8d858e607b2a36808ac6d4218f5f5203d7a7d63 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 14 Sep 2021 16:46:33 +0200 Subject: xfrm: make user policy API complete >From a userland POV, this API was based on some magic values: - dirmask and action were bitfields but meaning of bits (XFRM_POL_DEFAULT_*) are not exported; - action is confusing, if a bit is set, does it mean drop or accept? Let's try to simplify this uapi by using explicit field and macros. Fixes: 2d151d39073a ("xfrm: Add possibility to set the default to block if we have no policy") Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 9 ++++++--- net/xfrm/xfrm_user.c | 36 +++++++++++++++++++----------------- 2 files changed, 25 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 26f456b1f33e..eda0426ec4c2 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -514,9 +514,12 @@ struct xfrm_user_offload { #define XFRM_OFFLOAD_INBOUND 2 struct xfrm_userpolicy_default { -#define XFRM_USERPOLICY_DIRMASK_MAX (sizeof(__u8) * 8) - __u8 dirmask; - __u8 action; +#define XFRM_USERPOLICY_UNSPEC 0 +#define XFRM_USERPOLICY_BLOCK 1 +#define XFRM_USERPOLICY_ACCEPT 2 + __u8 in; + __u8 fwd; + __u8 out; }; #ifndef __KERNEL__ diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 4719a6d54aa6..90c88390f1fe 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1966,16 +1966,21 @@ static int xfrm_set_default(struct sk_buff *skb, struct nlmsghdr *nlh, { struct net *net = sock_net(skb->sk); struct xfrm_userpolicy_default *up = nlmsg_data(nlh); - u8 dirmask; - u8 old_default = net->xfrm.policy_default; - if (up->dirmask >= XFRM_USERPOLICY_DIRMASK_MAX) - return -EINVAL; + if (up->in == XFRM_USERPOLICY_BLOCK) + net->xfrm.policy_default |= XFRM_POL_DEFAULT_IN; + else if (up->in == XFRM_USERPOLICY_ACCEPT) + net->xfrm.policy_default &= ~XFRM_POL_DEFAULT_IN; - dirmask = (1 << up->dirmask) & XFRM_POL_DEFAULT_MASK; + if (up->fwd == XFRM_USERPOLICY_BLOCK) + net->xfrm.policy_default |= XFRM_POL_DEFAULT_FWD; + else if (up->fwd == XFRM_USERPOLICY_ACCEPT) + net->xfrm.policy_default &= ~XFRM_POL_DEFAULT_FWD; - net->xfrm.policy_default = (old_default & (0xff ^ dirmask)) - | (up->action << up->dirmask); + if (up->out == XFRM_USERPOLICY_BLOCK) + net->xfrm.policy_default |= XFRM_POL_DEFAULT_OUT; + else if (up->out == XFRM_USERPOLICY_ACCEPT) + net->xfrm.policy_default &= ~XFRM_POL_DEFAULT_OUT; rt_genid_bump_all(net); @@ -1988,13 +1993,11 @@ static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh, struct sk_buff *r_skb; struct nlmsghdr *r_nlh; struct net *net = sock_net(skb->sk); - struct xfrm_userpolicy_default *r_up, *up; + struct xfrm_userpolicy_default *r_up; int len = NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_default)); u32 portid = NETLINK_CB(skb).portid; u32 seq = nlh->nlmsg_seq; - up = nlmsg_data(nlh); - r_skb = nlmsg_new(len, GFP_ATOMIC); if (!r_skb) return -ENOMEM; @@ -2005,15 +2008,14 @@ static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh, return -EMSGSIZE; } - if (up->dirmask >= XFRM_USERPOLICY_DIRMASK_MAX) { - kfree_skb(r_skb); - return -EINVAL; - } - r_up = nlmsg_data(r_nlh); - r_up->action = ((net->xfrm.policy_default & (1 << up->dirmask)) >> up->dirmask); - r_up->dirmask = up->dirmask; + r_up->in = net->xfrm.policy_default & XFRM_POL_DEFAULT_IN ? + XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT; + r_up->fwd = net->xfrm.policy_default & XFRM_POL_DEFAULT_FWD ? + XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT; + r_up->out = net->xfrm.policy_default & XFRM_POL_DEFAULT_OUT ? + XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT; nlmsg_end(r_skb, r_nlh); return nlmsg_unicast(net->xfrm.nlsk, r_skb, portid); -- cgit v1.2.3-71-gd317 From 3c9cfb5269f76d447dbadb67835368f3111a91d7 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 17 Sep 2021 14:17:35 +0300 Subject: net: update NXP copyright text NXP Legal insists that the following are not fine: - Saying "NXP Semiconductors" instead of "NXP", since the company's registered name is "NXP" - Putting a "(c)" sign in the copyright string - Putting a comma in the copyright string The only accepted copyright string format is "Copyright NXP". This patch changes the copyright headers in the networking files that were sent by me, or derived from code sent by me. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix.c | 2 +- drivers/net/dsa/ocelot/felix.h | 2 +- drivers/net/dsa/ocelot/felix_vsc9959.c | 2 +- drivers/net/dsa/sja1105/sja1105_clocking.c | 2 +- drivers/net/dsa/sja1105/sja1105_devlink.c | 2 +- drivers/net/dsa/sja1105/sja1105_flower.c | 2 +- drivers/net/dsa/sja1105/sja1105_mdio.c | 2 +- drivers/net/dsa/sja1105/sja1105_spi.c | 2 +- drivers/net/dsa/sja1105/sja1105_static_config.c | 2 +- drivers/net/dsa/sja1105/sja1105_static_config.h | 2 +- drivers/net/dsa/sja1105/sja1105_vl.c | 2 +- drivers/net/dsa/sja1105/sja1105_vl.h | 2 +- drivers/net/ethernet/freescale/enetc/enetc_ierb.c | 2 +- drivers/net/ethernet/freescale/enetc/enetc_ierb.h | 2 +- drivers/net/ethernet/mscc/ocelot_devlink.c | 2 +- drivers/net/ethernet/mscc/ocelot_mrp.c | 2 +- drivers/net/ethernet/mscc/ocelot_net.c | 2 +- drivers/net/pcs/pcs-xpcs-nxp.c | 2 +- include/linux/dsa/ocelot.h | 2 +- include/linux/packing.h | 2 +- lib/packing.c | 2 +- net/dsa/tag_ocelot.c | 2 +- net/dsa/tag_ocelot_8021q.c | 2 +- tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh | 2 +- 24 files changed, 24 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 3656e67af789..a3a9636430d6 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright 2019-2021 NXP Semiconductors +/* Copyright 2019-2021 NXP * * This is an umbrella module for all network switches that are * register-compatible with Ocelot and that perform I/O to their host CPU diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h index 5854bab43327..54024b6f9498 100644 --- a/drivers/net/dsa/ocelot/felix.h +++ b/drivers/net/dsa/ocelot/felix.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright 2019 NXP Semiconductors +/* Copyright 2019 NXP */ #ifndef _MSCC_FELIX_H #define _MSCC_FELIX_H diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index f966a253d1c7..9e2ac8e46619 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0 OR MIT) /* Copyright 2017 Microsemi Corporation - * Copyright 2018-2019 NXP Semiconductors + * Copyright 2018-2019 NXP */ #include #include diff --git a/drivers/net/dsa/sja1105/sja1105_clocking.c b/drivers/net/dsa/sja1105/sja1105_clocking.c index 387a1f2f161c..5bbf1707f2af 100644 --- a/drivers/net/dsa/sja1105/sja1105_clocking.c +++ b/drivers/net/dsa/sja1105/sja1105_clocking.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: BSD-3-Clause -/* Copyright (c) 2016-2018, NXP Semiconductors +/* Copyright 2016-2018 NXP * Copyright (c) 2018-2019, Vladimir Oltean */ #include diff --git a/drivers/net/dsa/sja1105/sja1105_devlink.c b/drivers/net/dsa/sja1105/sja1105_devlink.c index 05c7f4ca3b1a..0569ff066634 100644 --- a/drivers/net/dsa/sja1105/sja1105_devlink.c +++ b/drivers/net/dsa/sja1105/sja1105_devlink.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2018-2019, Vladimir Oltean - * Copyright 2020 NXP Semiconductors + * Copyright 2020 NXP */ #include "sja1105.h" diff --git a/drivers/net/dsa/sja1105/sja1105_flower.c b/drivers/net/dsa/sja1105/sja1105_flower.c index 6c10ffa968ce..72b9b39b0989 100644 --- a/drivers/net/dsa/sja1105/sja1105_flower.c +++ b/drivers/net/dsa/sja1105/sja1105_flower.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright 2020, NXP Semiconductors +/* Copyright 2020 NXP */ #include "sja1105.h" #include "sja1105_vl.h" diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c index 705d3900e43a..215dd17ca790 100644 --- a/drivers/net/dsa/sja1105/sja1105_mdio.c +++ b/drivers/net/dsa/sja1105/sja1105_mdio.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright 2021, NXP Semiconductors +/* Copyright 2021 NXP */ #include #include diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c index d60a530d0272..d3c9ad6d39d4 100644 --- a/drivers/net/dsa/sja1105/sja1105_spi.c +++ b/drivers/net/dsa/sja1105/sja1105_spi.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: BSD-3-Clause -/* Copyright (c) 2016-2018, NXP Semiconductors +/* Copyright 2016-2018 NXP * Copyright (c) 2018, Sensor-Technik Wiedemann GmbH * Copyright (c) 2018-2019, Vladimir Oltean */ diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c index 7a422ef4deb6..baba204ad62f 100644 --- a/drivers/net/dsa/sja1105/sja1105_static_config.c +++ b/drivers/net/dsa/sja1105/sja1105_static_config.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: BSD-3-Clause -/* Copyright (c) 2016-2018, NXP Semiconductors +/* Copyright 2016-2018 NXP * Copyright (c) 2018-2019, Vladimir Oltean */ #include "sja1105_static_config.h" diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.h b/drivers/net/dsa/sja1105/sja1105_static_config.h index bce0f5c03d0b..6a372d5f22ae 100644 --- a/drivers/net/dsa/sja1105/sja1105_static_config.h +++ b/drivers/net/dsa/sja1105/sja1105_static_config.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause */ -/* Copyright (c) 2016-2018, NXP Semiconductors +/* Copyright 2016-2018 NXP * Copyright (c) 2018-2019, Vladimir Oltean */ #ifndef _SJA1105_STATIC_CONFIG_H diff --git a/drivers/net/dsa/sja1105/sja1105_vl.c b/drivers/net/dsa/sja1105/sja1105_vl.c index ec7b65daec20..6802f4057cc0 100644 --- a/drivers/net/dsa/sja1105/sja1105_vl.c +++ b/drivers/net/dsa/sja1105/sja1105_vl.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright 2020, NXP Semiconductors +/* Copyright 2020 NXP */ #include #include diff --git a/drivers/net/dsa/sja1105/sja1105_vl.h b/drivers/net/dsa/sja1105/sja1105_vl.h index 173d78963fed..51fba0dce91a 100644 --- a/drivers/net/dsa/sja1105/sja1105_vl.h +++ b/drivers/net/dsa/sja1105/sja1105_vl.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright 2020, NXP Semiconductors +/* Copyright 2020 NXP */ #ifndef _SJA1105_VL_H #define _SJA1105_VL_H diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ierb.c b/drivers/net/ethernet/freescale/enetc/enetc_ierb.c index ee1468e3eaa3..91f02c505028 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ierb.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ierb.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) -/* Copyright 2021 NXP Semiconductors +/* Copyright 2021 NXP * * The Integrated Endpoint Register Block (IERB) is configured by pre-boot * software and is supposed to be to ENETC what a NVRAM is to a 'real' PCIe diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ierb.h b/drivers/net/ethernet/freescale/enetc/enetc_ierb.h index b3b774e0998a..c2ce47c4be9f 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ierb.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_ierb.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ -/* Copyright 2021 NXP Semiconductors */ +/* Copyright 2021 NXP */ #include #include diff --git a/drivers/net/ethernet/mscc/ocelot_devlink.c b/drivers/net/ethernet/mscc/ocelot_devlink.c index edafbd37d12c..b8737efd2a85 100644 --- a/drivers/net/ethernet/mscc/ocelot_devlink.c +++ b/drivers/net/ethernet/mscc/ocelot_devlink.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: (GPL-2.0 OR MIT) -/* Copyright 2020-2021 NXP Semiconductors +/* Copyright 2020-2021 NXP */ #include #include "ocelot.h" diff --git a/drivers/net/ethernet/mscc/ocelot_mrp.c b/drivers/net/ethernet/mscc/ocelot_mrp.c index 08b481a93460..4b0941f09f71 100644 --- a/drivers/net/ethernet/mscc/ocelot_mrp.c +++ b/drivers/net/ethernet/mscc/ocelot_mrp.c @@ -2,7 +2,7 @@ /* Microsemi Ocelot Switch driver * * Copyright (c) 2017, 2019 Microsemi Corporation - * Copyright 2020-2021 NXP Semiconductors + * Copyright 2020-2021 NXP */ #include diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index c0c465a4a981..e54b9fb2a97a 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -5,7 +5,7 @@ * mscc_ocelot_switch_lib. * * Copyright (c) 2017, 2019 Microsemi Corporation - * Copyright 2020-2021 NXP Semiconductors + * Copyright 2020-2021 NXP */ #include diff --git a/drivers/net/pcs/pcs-xpcs-nxp.c b/drivers/net/pcs/pcs-xpcs-nxp.c index 984c9f7f16a8..d16fc58cd48d 100644 --- a/drivers/net/pcs/pcs-xpcs-nxp.c +++ b/drivers/net/pcs/pcs-xpcs-nxp.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright 2021 NXP Semiconductors +/* Copyright 2021 NXP */ #include #include "pcs-xpcs.h" diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h index c6bc45ae5e03..435777a0073c 100644 --- a/include/linux/dsa/ocelot.h +++ b/include/linux/dsa/ocelot.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 - * Copyright 2019-2021 NXP Semiconductors + * Copyright 2019-2021 NXP */ #ifndef _NET_DSA_TAG_OCELOT_H diff --git a/include/linux/packing.h b/include/linux/packing.h index 54667735cc67..8d6571feb95d 100644 --- a/include/linux/packing.h +++ b/include/linux/packing.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright (c) 2016-2018, NXP Semiconductors + * Copyright 2016-2018 NXP * Copyright (c) 2018-2019, Vladimir Oltean */ #ifndef _LINUX_PACKING_H diff --git a/lib/packing.c b/lib/packing.c index 6ed72dccfdb5..9a72f4bbf0e2 100644 --- a/lib/packing.c +++ b/lib/packing.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 -/* Copyright (c) 2016-2018, NXP Semiconductors +/* Copyright 2016-2018 NXP * Copyright (c) 2018-2019, Vladimir Oltean */ #include diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index d37ab98e7fe1..8025ed778d33 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright 2019 NXP Semiconductors +/* Copyright 2019 NXP */ #include #include diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c index 3038a257ba05..59072930cb02 100644 --- a/net/dsa/tag_ocelot_8021q.c +++ b/net/dsa/tag_ocelot_8021q.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright 2020-2021 NXP Semiconductors +/* Copyright 2020-2021 NXP * * An implementation of the software-defined tag_8021q.c tagger format, which * also preserves full functionality under a vlan_filtering bridge. It does diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh index beee0d5646a6..f7d84549cc3e 100755 --- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh +++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh @@ -1,6 +1,6 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -# Copyright 2020 NXP Semiconductors +# Copyright 2020 NXP WAIT_TIME=1 NUM_NETIFS=4 -- cgit v1.2.3-71-gd317 From cf9579976f724ad517cc15b7caadea728c7e245c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 17 Sep 2021 16:34:32 +0300 Subject: net: mdio: introduce a shutdown method to mdio device drivers MDIO-attached devices might have interrupts and other things that might need quiesced when we kexec into a new kernel. Things are even more creepy when those interrupt lines are shared, and in that case it is absolutely mandatory to disable all interrupt sources. Moreover, MDIO devices might be DSA switches, and DSA needs its own shutdown method to unlink from the DSA master, which is a new requirement that appeared after commit 2f1e8ea726e9 ("net: dsa: link interfaces with the DSA master to get rid of lockdep warnings"). So introduce a ->shutdown method in the MDIO device driver structure. Signed-off-by: Vladimir Oltean Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/mdio_device.c | 11 +++++++++++ include/linux/mdio.h | 3 +++ 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c index c94cb5382dc9..250742ffdfd9 100644 --- a/drivers/net/phy/mdio_device.c +++ b/drivers/net/phy/mdio_device.c @@ -179,6 +179,16 @@ static int mdio_remove(struct device *dev) return 0; } +static void mdio_shutdown(struct device *dev) +{ + struct mdio_device *mdiodev = to_mdio_device(dev); + struct device_driver *drv = mdiodev->dev.driver; + struct mdio_driver *mdiodrv = to_mdio_driver(drv); + + if (mdiodrv->shutdown) + mdiodrv->shutdown(mdiodev); +} + /** * mdio_driver_register - register an mdio_driver with the MDIO layer * @drv: new mdio_driver to register @@ -193,6 +203,7 @@ int mdio_driver_register(struct mdio_driver *drv) mdiodrv->driver.bus = &mdio_bus_type; mdiodrv->driver.probe = mdio_probe; mdiodrv->driver.remove = mdio_remove; + mdiodrv->driver.shutdown = mdio_shutdown; retval = driver_register(&mdiodrv->driver); if (retval) { diff --git a/include/linux/mdio.h b/include/linux/mdio.h index ffb787d5ebde..5e6dc38f418e 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -80,6 +80,9 @@ struct mdio_driver { /* Clears up any memory if needed */ void (*remove)(struct mdio_device *mdiodev); + + /* Quiesces the device on system shutdown, turns off interrupts etc */ + void (*shutdown)(struct mdio_device *mdiodev); }; static inline struct mdio_driver * -- cgit v1.2.3-71-gd317 From 0650bf52b31ff35dc6430fc2e37969c36baba724 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 17 Sep 2021 16:34:33 +0300 Subject: net: dsa: be compatible with masters which unregister on shutdown Lino reports that on his system with bcmgenet as DSA master and KSZ9897 as a switch, rebooting or shutting down never works properly. What does the bcmgenet driver have special to trigger this, that other DSA masters do not? It has an implementation of ->shutdown which simply calls its ->remove implementation. Otherwise said, it unregisters its network interface on shutdown. This message can be seen in a loop, and it hangs the reboot process there: unregister_netdevice: waiting for eth0 to become free. Usage count = 3 So why 3? A usage count of 1 is normal for a registered network interface, and any virtual interface which links itself as an upper of that will increment it via dev_hold. In the case of DSA, this is the call path: dsa_slave_create -> netdev_upper_dev_link -> __netdev_upper_dev_link -> __netdev_adjacent_dev_insert -> dev_hold So a DSA switch with 3 interfaces will result in a usage count elevated by two, and netdev_wait_allrefs will wait until they have gone away. Other stacked interfaces, like VLAN, watch NETDEV_UNREGISTER events and delete themselves, but DSA cannot just vanish and go poof, at most it can unbind itself from the switch devices, but that must happen strictly earlier compared to when the DSA master unregisters its net_device, so reacting on the NETDEV_UNREGISTER event is way too late. It seems that it is a pretty established pattern to have a driver's ->shutdown hook redirect to its ->remove hook, so the same code is executed regardless of whether the driver is unbound from the device, or the system is just shutting down. As Florian puts it, it is quite a big hammer for bcmgenet to unregister its net_device during shutdown, but having a common code path with the driver unbind helps ensure it is well tested. So DSA, for better or for worse, has to live with that and engage in an arms race of implementing the ->shutdown hook too, from all individual drivers, and do something sane when paired with masters that unregister their net_device there. The only sane thing to do, of course, is to unlink from the master. However, complications arise really quickly. The pattern of redirecting ->shutdown to ->remove is not unique to bcmgenet or even to net_device drivers. In fact, SPI controllers do it too (see dspi_shutdown -> dspi_remove), and presumably, I2C controllers and MDIO controllers do it too (this is something I have not researched too deeply, but even if this is not the case today, it is certainly plausible to happen in the future, and must be taken into consideration). Since DSA switches might be SPI devices, I2C devices, MDIO devices, the insane implication is that for the exact same DSA switch device, we might have both ->shutdown and ->remove getting called. So we need to do something with that insane environment. The pattern I've come up with is "if this, then not that", so if either ->shutdown or ->remove gets called, we set the device's drvdata to NULL, and in the other hook, we check whether the drvdata is NULL and just do nothing. This is probably not necessary for platform devices, just for devices on buses, but I would really insist for consistency among drivers, because when code is copy-pasted, it is not always copy-pasted from the best sources. So depending on whether the DSA switch's ->remove or ->shutdown will get called first, we cannot really guarantee even for the same driver if rebooting will result in the same code path on all platforms. But nonetheless, we need to do something minimally reasonable on ->shutdown too to fix the bug. Of course, the ->remove will do more (a full teardown of the tree, with all data structures freed, and this is why the bug was not caught for so long). The new ->shutdown method is kept separate from dsa_unregister_switch not because we couldn't have unregistered the switch, but simply in the interest of doing something quick and to the point. The big question is: does the DSA switch's ->shutdown get called earlier than the DSA master's ->shutdown? If not, there is still a risk that we might still trigger the WARN_ON in unregister_netdevice that says we are attempting to unregister a net_device which has uppers. That's no good. Although the reference to the master net_device won't physically go away even if DSA's ->shutdown comes afterwards, remember we have a dev_hold on it. The answer to that question lies in this comment above device_link_add: * A side effect of the link creation is re-ordering of dpm_list and the * devices_kset list by moving the consumer device and all devices depending * on it to the ends of these lists (that does not happen to devices that have * not been registered when this function is called). so the fact that DSA uses device_link_add towards its master is not exactly for nothing. device_shutdown() walks devices_kset from the back, so this is our guarantee that DSA's shutdown happens before the master's shutdown. Fixes: 2f1e8ea726e9 ("net: dsa: link interfaces with the DSA master to get rid of lockdep warnings") Link: https://lore.kernel.org/netdev/20210909095324.12978-1-LinoSanfilippo@gmx.de/ Reported-by: Lino Sanfilippo Signed-off-by: Vladimir Oltean Tested-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_mdio.c | 21 +++++++++++-- drivers/net/dsa/b53/b53_mmap.c | 13 ++++++++ drivers/net/dsa/b53/b53_priv.h | 5 +++ drivers/net/dsa/b53/b53_spi.c | 13 ++++++++ drivers/net/dsa/b53/b53_srab.c | 21 +++++++++++-- drivers/net/dsa/bcm_sf2.c | 12 +++++++ drivers/net/dsa/dsa_loop.c | 22 ++++++++++++- drivers/net/dsa/lan9303-core.c | 6 ++++ drivers/net/dsa/lan9303.h | 1 + drivers/net/dsa/lan9303_i2c.c | 24 +++++++++++--- drivers/net/dsa/lan9303_mdio.c | 15 +++++++++ drivers/net/dsa/lantiq_gswip.c | 18 +++++++++++ drivers/net/dsa/microchip/ksz8795_spi.c | 11 ++++++- drivers/net/dsa/microchip/ksz9477_i2c.c | 14 +++++++-- drivers/net/dsa/microchip/ksz9477_spi.c | 8 +++-- drivers/net/dsa/mt7530.c | 18 +++++++++++ drivers/net/dsa/mv88e6060.c | 18 +++++++++++ drivers/net/dsa/mv88e6xxx/chip.c | 22 ++++++++++++- drivers/net/dsa/ocelot/felix_vsc9959.c | 20 ++++++++++-- drivers/net/dsa/ocelot/seville_vsc9953.c | 20 ++++++++++-- drivers/net/dsa/qca/ar9331.c | 18 +++++++++++ drivers/net/dsa/qca8k.c | 18 +++++++++++ drivers/net/dsa/realtek-smi-core.c | 20 +++++++++++- drivers/net/dsa/sja1105/sja1105_main.c | 21 +++++++++++-- drivers/net/dsa/vitesse-vsc73xx-core.c | 6 ++++ drivers/net/dsa/vitesse-vsc73xx-platform.c | 22 ++++++++++++- drivers/net/dsa/vitesse-vsc73xx-spi.c | 22 ++++++++++++- drivers/net/dsa/vitesse-vsc73xx.h | 1 + include/net/dsa.h | 1 + net/dsa/dsa2.c | 50 ++++++++++++++++++++++++++++++ 30 files changed, 457 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/drivers/net/dsa/b53/b53_mdio.c b/drivers/net/dsa/b53/b53_mdio.c index a533a90e3904..a7aeb3c132c9 100644 --- a/drivers/net/dsa/b53/b53_mdio.c +++ b/drivers/net/dsa/b53/b53_mdio.c @@ -351,9 +351,25 @@ static int b53_mdio_probe(struct mdio_device *mdiodev) static void b53_mdio_remove(struct mdio_device *mdiodev) { struct b53_device *dev = dev_get_drvdata(&mdiodev->dev); - struct dsa_switch *ds = dev->ds; - dsa_unregister_switch(ds); + if (!dev) + return; + + b53_switch_remove(dev); + + dev_set_drvdata(&mdiodev->dev, NULL); +} + +static void b53_mdio_shutdown(struct mdio_device *mdiodev) +{ + struct b53_device *dev = dev_get_drvdata(&mdiodev->dev); + + if (!dev) + return; + + b53_switch_shutdown(dev); + + dev_set_drvdata(&mdiodev->dev, NULL); } static const struct of_device_id b53_of_match[] = { @@ -373,6 +389,7 @@ MODULE_DEVICE_TABLE(of, b53_of_match); static struct mdio_driver b53_mdio_driver = { .probe = b53_mdio_probe, .remove = b53_mdio_remove, + .shutdown = b53_mdio_shutdown, .mdiodrv.driver = { .name = "bcm53xx", .of_match_table = b53_of_match, diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c index 82680e083cc2..ae4c79d39bc0 100644 --- a/drivers/net/dsa/b53/b53_mmap.c +++ b/drivers/net/dsa/b53/b53_mmap.c @@ -316,9 +316,21 @@ static int b53_mmap_remove(struct platform_device *pdev) if (dev) b53_switch_remove(dev); + platform_set_drvdata(pdev, NULL); + return 0; } +static void b53_mmap_shutdown(struct platform_device *pdev) +{ + struct b53_device *dev = platform_get_drvdata(pdev); + + if (dev) + b53_switch_shutdown(dev); + + platform_set_drvdata(pdev, NULL); +} + static const struct of_device_id b53_mmap_of_table[] = { { .compatible = "brcm,bcm3384-switch" }, { .compatible = "brcm,bcm6328-switch" }, @@ -331,6 +343,7 @@ MODULE_DEVICE_TABLE(of, b53_mmap_of_table); static struct platform_driver b53_mmap_driver = { .probe = b53_mmap_probe, .remove = b53_mmap_remove, + .shutdown = b53_mmap_shutdown, .driver = { .name = "b53-switch", .of_match_table = b53_mmap_of_table, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 5d068acf7cf8..959a52d41f0a 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -228,6 +228,11 @@ static inline void b53_switch_remove(struct b53_device *dev) dsa_unregister_switch(dev->ds); } +static inline void b53_switch_shutdown(struct b53_device *dev) +{ + dsa_switch_shutdown(dev->ds); +} + #define b53_build_op(type_op_size, val_type) \ static inline int b53_##type_op_size(struct b53_device *dev, u8 page, \ u8 reg, val_type val) \ diff --git a/drivers/net/dsa/b53/b53_spi.c b/drivers/net/dsa/b53/b53_spi.c index ecb9f7f6b335..01e37b75471e 100644 --- a/drivers/net/dsa/b53/b53_spi.c +++ b/drivers/net/dsa/b53/b53_spi.c @@ -321,9 +321,21 @@ static int b53_spi_remove(struct spi_device *spi) if (dev) b53_switch_remove(dev); + spi_set_drvdata(spi, NULL); + return 0; } +static void b53_spi_shutdown(struct spi_device *spi) +{ + struct b53_device *dev = spi_get_drvdata(spi); + + if (dev) + b53_switch_shutdown(dev); + + spi_set_drvdata(spi, NULL); +} + static const struct of_device_id b53_spi_of_match[] = { { .compatible = "brcm,bcm5325" }, { .compatible = "brcm,bcm5365" }, @@ -344,6 +356,7 @@ static struct spi_driver b53_spi_driver = { }, .probe = b53_spi_probe, .remove = b53_spi_remove, + .shutdown = b53_spi_shutdown, }; module_spi_driver(b53_spi_driver); diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c index 3f4249de70c5..4591bb1c05d2 100644 --- a/drivers/net/dsa/b53/b53_srab.c +++ b/drivers/net/dsa/b53/b53_srab.c @@ -629,17 +629,34 @@ static int b53_srab_probe(struct platform_device *pdev) static int b53_srab_remove(struct platform_device *pdev) { struct b53_device *dev = platform_get_drvdata(pdev); - struct b53_srab_priv *priv = dev->priv; - b53_srab_intr_set(priv, false); + if (!dev) + return 0; + + b53_srab_intr_set(dev->priv, false); b53_switch_remove(dev); + platform_set_drvdata(pdev, NULL); + return 0; } +static void b53_srab_shutdown(struct platform_device *pdev) +{ + struct b53_device *dev = platform_get_drvdata(pdev); + + if (!dev) + return; + + b53_switch_shutdown(dev); + + platform_set_drvdata(pdev, NULL); +} + static struct platform_driver b53_srab_driver = { .probe = b53_srab_probe, .remove = b53_srab_remove, + .shutdown = b53_srab_shutdown, .driver = { .name = "b53-srab-switch", .of_match_table = b53_srab_of_match, diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index b6c4b3adb171..7578a5c38df5 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1512,6 +1512,9 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev) { struct bcm_sf2_priv *priv = platform_get_drvdata(pdev); + if (!priv) + return 0; + priv->wol_ports_mask = 0; /* Disable interrupts */ bcm_sf2_intr_disable(priv); @@ -1523,6 +1526,8 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev) if (priv->type == BCM7278_DEVICE_ID) reset_control_assert(priv->rcdev); + platform_set_drvdata(pdev, NULL); + return 0; } @@ -1530,6 +1535,9 @@ static void bcm_sf2_sw_shutdown(struct platform_device *pdev) { struct bcm_sf2_priv *priv = platform_get_drvdata(pdev); + if (!priv) + return; + /* For a kernel about to be kexec'd we want to keep the GPHY on for a * successful MDIO bus scan to occur. If we did turn off the GPHY * before (e.g: port_disable), this will also power it back on. @@ -1538,6 +1546,10 @@ static void bcm_sf2_sw_shutdown(struct platform_device *pdev) */ if (priv->hw_params.num_gphy == 1) bcm_sf2_gphy_enable_set(priv->dev->ds, true); + + dsa_switch_shutdown(priv->dev->ds); + + platform_set_drvdata(pdev, NULL); } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c index bfdf3324aac3..e638e3eea911 100644 --- a/drivers/net/dsa/dsa_loop.c +++ b/drivers/net/dsa/dsa_loop.c @@ -340,10 +340,29 @@ static int dsa_loop_drv_probe(struct mdio_device *mdiodev) static void dsa_loop_drv_remove(struct mdio_device *mdiodev) { struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev); - struct dsa_loop_priv *ps = ds->priv; + struct dsa_loop_priv *ps; + + if (!ds) + return; + + ps = ds->priv; dsa_unregister_switch(ds); dev_put(ps->netdev); + + dev_set_drvdata(&mdiodev->dev, NULL); +} + +static void dsa_loop_drv_shutdown(struct mdio_device *mdiodev) +{ + struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev); + + if (!ds) + return; + + dsa_switch_shutdown(ds); + + dev_set_drvdata(&mdiodev->dev, NULL); } static struct mdio_driver dsa_loop_drv = { @@ -352,6 +371,7 @@ static struct mdio_driver dsa_loop_drv = { }, .probe = dsa_loop_drv_probe, .remove = dsa_loop_drv_remove, + .shutdown = dsa_loop_drv_shutdown, }; #define NUM_FIXED_PHYS (DSA_LOOP_NUM_PORTS - 2) diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index d7ce281570b5..89f920289ae2 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -1379,6 +1379,12 @@ int lan9303_remove(struct lan9303 *chip) } EXPORT_SYMBOL(lan9303_remove); +void lan9303_shutdown(struct lan9303 *chip) +{ + dsa_switch_shutdown(chip->ds); +} +EXPORT_SYMBOL(lan9303_shutdown); + MODULE_AUTHOR("Juergen Borleis "); MODULE_DESCRIPTION("Core driver for SMSC/Microchip LAN9303 three port ethernet switch"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/dsa/lan9303.h b/drivers/net/dsa/lan9303.h index 11f590b64701..c7f73efa50f0 100644 --- a/drivers/net/dsa/lan9303.h +++ b/drivers/net/dsa/lan9303.h @@ -10,3 +10,4 @@ extern const struct lan9303_phy_ops lan9303_indirect_phy_ops; int lan9303_probe(struct lan9303 *chip, struct device_node *np); int lan9303_remove(struct lan9303 *chip); +void lan9303_shutdown(struct lan9303 *chip); diff --git a/drivers/net/dsa/lan9303_i2c.c b/drivers/net/dsa/lan9303_i2c.c index 9bffaef65a04..8ca4713310fa 100644 --- a/drivers/net/dsa/lan9303_i2c.c +++ b/drivers/net/dsa/lan9303_i2c.c @@ -67,13 +67,28 @@ static int lan9303_i2c_probe(struct i2c_client *client, static int lan9303_i2c_remove(struct i2c_client *client) { - struct lan9303_i2c *sw_dev; + struct lan9303_i2c *sw_dev = i2c_get_clientdata(client); - sw_dev = i2c_get_clientdata(client); if (!sw_dev) - return -ENODEV; + return 0; + + lan9303_remove(&sw_dev->chip); + + i2c_set_clientdata(client, NULL); + + return 0; +} + +static void lan9303_i2c_shutdown(struct i2c_client *client) +{ + struct lan9303_i2c *sw_dev = i2c_get_clientdata(client); + + if (!sw_dev) + return; + + lan9303_shutdown(&sw_dev->chip); - return lan9303_remove(&sw_dev->chip); + i2c_set_clientdata(client, NULL); } /*-------------------------------------------------------------------------*/ @@ -97,6 +112,7 @@ static struct i2c_driver lan9303_i2c_driver = { }, .probe = lan9303_i2c_probe, .remove = lan9303_i2c_remove, + .shutdown = lan9303_i2c_shutdown, .id_table = lan9303_i2c_id, }; module_i2c_driver(lan9303_i2c_driver); diff --git a/drivers/net/dsa/lan9303_mdio.c b/drivers/net/dsa/lan9303_mdio.c index 9cbe80460b53..bbb7032409ba 100644 --- a/drivers/net/dsa/lan9303_mdio.c +++ b/drivers/net/dsa/lan9303_mdio.c @@ -138,6 +138,20 @@ static void lan9303_mdio_remove(struct mdio_device *mdiodev) return; lan9303_remove(&sw_dev->chip); + + dev_set_drvdata(&mdiodev->dev, NULL); +} + +static void lan9303_mdio_shutdown(struct mdio_device *mdiodev) +{ + struct lan9303_mdio *sw_dev = dev_get_drvdata(&mdiodev->dev); + + if (!sw_dev) + return; + + lan9303_shutdown(&sw_dev->chip); + + dev_set_drvdata(&mdiodev->dev, NULL); } /*-------------------------------------------------------------------------*/ @@ -155,6 +169,7 @@ static struct mdio_driver lan9303_mdio_driver = { }, .probe = lan9303_mdio_probe, .remove = lan9303_mdio_remove, + .shutdown = lan9303_mdio_shutdown, }; mdio_module_driver(lan9303_mdio_driver); diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 267324889dd6..3ff4b7e177f3 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -2184,6 +2184,9 @@ static int gswip_remove(struct platform_device *pdev) struct gswip_priv *priv = platform_get_drvdata(pdev); int i; + if (!priv) + return 0; + /* disable the switch */ gswip_mdio_mask(priv, GSWIP_MDIO_GLOB_ENABLE, 0, GSWIP_MDIO_GLOB); @@ -2197,9 +2200,23 @@ static int gswip_remove(struct platform_device *pdev) for (i = 0; i < priv->num_gphy_fw; i++) gswip_gphy_fw_remove(priv, &priv->gphy_fw[i]); + platform_set_drvdata(pdev, NULL); + return 0; } +static void gswip_shutdown(struct platform_device *pdev) +{ + struct gswip_priv *priv = platform_get_drvdata(pdev); + + if (!priv) + return; + + dsa_switch_shutdown(priv->ds); + + platform_set_drvdata(pdev, NULL); +} + static const struct gswip_hw_info gswip_xrx200 = { .max_ports = 7, .cpu_port = 6, @@ -2223,6 +2240,7 @@ MODULE_DEVICE_TABLE(of, gswip_of_match); static struct platform_driver gswip_driver = { .probe = gswip_probe, .remove = gswip_remove, + .shutdown = gswip_shutdown, .driver = { .name = "gswip", .of_match_table = gswip_of_match, diff --git a/drivers/net/dsa/microchip/ksz8795_spi.c b/drivers/net/dsa/microchip/ksz8795_spi.c index ea7550d1b634..866767b70d65 100644 --- a/drivers/net/dsa/microchip/ksz8795_spi.c +++ b/drivers/net/dsa/microchip/ksz8795_spi.c @@ -94,6 +94,8 @@ static int ksz8795_spi_remove(struct spi_device *spi) if (dev) ksz_switch_remove(dev); + spi_set_drvdata(spi, NULL); + return 0; } @@ -101,8 +103,15 @@ static void ksz8795_spi_shutdown(struct spi_device *spi) { struct ksz_device *dev = spi_get_drvdata(spi); - if (dev && dev->dev_ops->shutdown) + if (!dev) + return; + + if (dev->dev_ops->shutdown) dev->dev_ops->shutdown(dev); + + dsa_switch_shutdown(dev->ds); + + spi_set_drvdata(spi, NULL); } static const struct of_device_id ksz8795_dt_ids[] = { diff --git a/drivers/net/dsa/microchip/ksz9477_i2c.c b/drivers/net/dsa/microchip/ksz9477_i2c.c index 4e053a25d077..f3afb8b8c4cc 100644 --- a/drivers/net/dsa/microchip/ksz9477_i2c.c +++ b/drivers/net/dsa/microchip/ksz9477_i2c.c @@ -56,7 +56,10 @@ static int ksz9477_i2c_remove(struct i2c_client *i2c) { struct ksz_device *dev = i2c_get_clientdata(i2c); - ksz_switch_remove(dev); + if (dev) + ksz_switch_remove(dev); + + i2c_set_clientdata(i2c, NULL); return 0; } @@ -65,8 +68,15 @@ static void ksz9477_i2c_shutdown(struct i2c_client *i2c) { struct ksz_device *dev = i2c_get_clientdata(i2c); - if (dev && dev->dev_ops->shutdown) + if (!dev) + return; + + if (dev->dev_ops->shutdown) dev->dev_ops->shutdown(dev); + + dsa_switch_shutdown(dev->ds); + + i2c_set_clientdata(i2c, NULL); } static const struct i2c_device_id ksz9477_i2c_id[] = { diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c index 15bc11b3cda4..e3cb0e6c9f6f 100644 --- a/drivers/net/dsa/microchip/ksz9477_spi.c +++ b/drivers/net/dsa/microchip/ksz9477_spi.c @@ -72,6 +72,8 @@ static int ksz9477_spi_remove(struct spi_device *spi) if (dev) ksz_switch_remove(dev); + spi_set_drvdata(spi, NULL); + return 0; } @@ -79,8 +81,10 @@ static void ksz9477_spi_shutdown(struct spi_device *spi) { struct ksz_device *dev = spi_get_drvdata(spi); - if (dev && dev->dev_ops->shutdown) - dev->dev_ops->shutdown(dev); + if (dev) + dsa_switch_shutdown(dev->ds); + + spi_set_drvdata(spi, NULL); } static const struct of_device_id ksz9477_dt_ids[] = { diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index d0cba2d1cd68..094737e5084a 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -3286,6 +3286,9 @@ mt7530_remove(struct mdio_device *mdiodev) struct mt7530_priv *priv = dev_get_drvdata(&mdiodev->dev); int ret = 0; + if (!priv) + return; + ret = regulator_disable(priv->core_pwr); if (ret < 0) dev_err(priv->dev, @@ -3301,11 +3304,26 @@ mt7530_remove(struct mdio_device *mdiodev) dsa_unregister_switch(priv->ds); mutex_destroy(&priv->reg_mutex); + + dev_set_drvdata(&mdiodev->dev, NULL); +} + +static void mt7530_shutdown(struct mdio_device *mdiodev) +{ + struct mt7530_priv *priv = dev_get_drvdata(&mdiodev->dev); + + if (!priv) + return; + + dsa_switch_shutdown(priv->ds); + + dev_set_drvdata(&mdiodev->dev, NULL); } static struct mdio_driver mt7530_mdio_driver = { .probe = mt7530_probe, .remove = mt7530_remove, + .shutdown = mt7530_shutdown, .mdiodrv.driver = { .name = "mt7530", .of_match_table = mt7530_of_match, diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index 24b8219fd607..a4c6eb9a52d0 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -290,7 +290,24 @@ static void mv88e6060_remove(struct mdio_device *mdiodev) { struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev); + if (!ds) + return; + dsa_unregister_switch(ds); + + dev_set_drvdata(&mdiodev->dev, NULL); +} + +static void mv88e6060_shutdown(struct mdio_device *mdiodev) +{ + struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev); + + if (!ds) + return; + + dsa_switch_shutdown(ds); + + dev_set_drvdata(&mdiodev->dev, NULL); } static const struct of_device_id mv88e6060_of_match[] = { @@ -303,6 +320,7 @@ static const struct of_device_id mv88e6060_of_match[] = { static struct mdio_driver mv88e6060_driver = { .probe = mv88e6060_probe, .remove = mv88e6060_remove, + .shutdown = mv88e6060_shutdown, .mdiodrv.driver = { .name = "mv88e6060", .of_match_table = mv88e6060_of_match, diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index c45ca2473743..fb10422d2c33 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -6389,7 +6389,12 @@ out: static void mv88e6xxx_remove(struct mdio_device *mdiodev) { struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev); - struct mv88e6xxx_chip *chip = ds->priv; + struct mv88e6xxx_chip *chip; + + if (!ds) + return; + + chip = ds->priv; if (chip->info->ptp_support) { mv88e6xxx_hwtstamp_free(chip); @@ -6410,6 +6415,20 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev) mv88e6xxx_g1_irq_free(chip); else mv88e6xxx_irq_poll_free(chip); + + dev_set_drvdata(&mdiodev->dev, NULL); +} + +static void mv88e6xxx_shutdown(struct mdio_device *mdiodev) +{ + struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev); + + if (!ds) + return; + + dsa_switch_shutdown(ds); + + dev_set_drvdata(&mdiodev->dev, NULL); } static const struct of_device_id mv88e6xxx_of_match[] = { @@ -6433,6 +6452,7 @@ MODULE_DEVICE_TABLE(of, mv88e6xxx_of_match); static struct mdio_driver mv88e6xxx_driver = { .probe = mv88e6xxx_probe, .remove = mv88e6xxx_remove, + .shutdown = mv88e6xxx_shutdown, .mdiodrv.driver = { .name = "mv88e6085", .of_match_table = mv88e6xxx_of_match, diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 9e2ac8e46619..11b42fd812e4 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1472,9 +1472,10 @@ err_pci_enable: static void felix_pci_remove(struct pci_dev *pdev) { - struct felix *felix; + struct felix *felix = pci_get_drvdata(pdev); - felix = pci_get_drvdata(pdev); + if (!felix) + return; dsa_unregister_switch(felix->ds); @@ -1482,6 +1483,20 @@ static void felix_pci_remove(struct pci_dev *pdev) kfree(felix); pci_disable_device(pdev); + + pci_set_drvdata(pdev, NULL); +} + +static void felix_pci_shutdown(struct pci_dev *pdev) +{ + struct felix *felix = pci_get_drvdata(pdev); + + if (!felix) + return; + + dsa_switch_shutdown(felix->ds); + + pci_set_drvdata(pdev, NULL); } static struct pci_device_id felix_ids[] = { @@ -1498,6 +1513,7 @@ static struct pci_driver felix_vsc9959_pci_driver = { .id_table = felix_ids, .probe = felix_pci_probe, .remove = felix_pci_remove, + .shutdown = felix_pci_shutdown, }; module_pci_driver(felix_vsc9959_pci_driver); diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c index deae923c8b7a..de1d34a1f1e4 100644 --- a/drivers/net/dsa/ocelot/seville_vsc9953.c +++ b/drivers/net/dsa/ocelot/seville_vsc9953.c @@ -1245,18 +1245,33 @@ err_alloc_felix: static int seville_remove(struct platform_device *pdev) { - struct felix *felix; + struct felix *felix = platform_get_drvdata(pdev); - felix = platform_get_drvdata(pdev); + if (!felix) + return 0; dsa_unregister_switch(felix->ds); kfree(felix->ds); kfree(felix); + platform_set_drvdata(pdev, NULL); + return 0; } +static void seville_shutdown(struct platform_device *pdev) +{ + struct felix *felix = platform_get_drvdata(pdev); + + if (!felix) + return; + + dsa_switch_shutdown(felix->ds); + + platform_set_drvdata(pdev, NULL); +} + static const struct of_device_id seville_of_match[] = { { .compatible = "mscc,vsc9953-switch" }, { }, @@ -1266,6 +1281,7 @@ MODULE_DEVICE_TABLE(of, seville_of_match); static struct platform_driver seville_vsc9953_driver = { .probe = seville_probe, .remove = seville_remove, + .shutdown = seville_shutdown, .driver = { .name = "mscc_seville", .of_match_table = of_match_ptr(seville_of_match), diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c index 563d8a279030..a6bfb6abc51a 100644 --- a/drivers/net/dsa/qca/ar9331.c +++ b/drivers/net/dsa/qca/ar9331.c @@ -1083,6 +1083,9 @@ static void ar9331_sw_remove(struct mdio_device *mdiodev) struct ar9331_sw_priv *priv = dev_get_drvdata(&mdiodev->dev); unsigned int i; + if (!priv) + return; + for (i = 0; i < ARRAY_SIZE(priv->port); i++) { struct ar9331_sw_port *port = &priv->port[i]; @@ -1094,6 +1097,20 @@ static void ar9331_sw_remove(struct mdio_device *mdiodev) dsa_unregister_switch(&priv->ds); reset_control_assert(priv->sw_reset); + + dev_set_drvdata(&mdiodev->dev, NULL); +} + +static void ar9331_sw_shutdown(struct mdio_device *mdiodev) +{ + struct ar9331_sw_priv *priv = dev_get_drvdata(&mdiodev->dev); + + if (!priv) + return; + + dsa_switch_shutdown(&priv->ds); + + dev_set_drvdata(&mdiodev->dev, NULL); } static const struct of_device_id ar9331_sw_of_match[] = { @@ -1104,6 +1121,7 @@ static const struct of_device_id ar9331_sw_of_match[] = { static struct mdio_driver ar9331_sw_mdio_driver = { .probe = ar9331_sw_probe, .remove = ar9331_sw_remove, + .shutdown = ar9331_sw_shutdown, .mdiodrv.driver = { .name = AR9331_SW_NAME, .of_match_table = ar9331_sw_of_match, diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index bda5a9bf4f52..a984f06f6f04 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -1880,10 +1880,27 @@ qca8k_sw_remove(struct mdio_device *mdiodev) struct qca8k_priv *priv = dev_get_drvdata(&mdiodev->dev); int i; + if (!priv) + return; + for (i = 0; i < QCA8K_NUM_PORTS; i++) qca8k_port_set_status(priv, i, 0); dsa_unregister_switch(priv->ds); + + dev_set_drvdata(&mdiodev->dev, NULL); +} + +static void qca8k_sw_shutdown(struct mdio_device *mdiodev) +{ + struct qca8k_priv *priv = dev_get_drvdata(&mdiodev->dev); + + if (!priv) + return; + + dsa_switch_shutdown(priv->ds); + + dev_set_drvdata(&mdiodev->dev, NULL); } #ifdef CONFIG_PM_SLEEP @@ -1940,6 +1957,7 @@ static const struct of_device_id qca8k_of_match[] = { static struct mdio_driver qca8kmdio_driver = { .probe = qca8k_sw_probe, .remove = qca8k_sw_remove, + .shutdown = qca8k_sw_shutdown, .mdiodrv.driver = { .name = "qca8k", .of_match_table = qca8k_of_match, diff --git a/drivers/net/dsa/realtek-smi-core.c b/drivers/net/dsa/realtek-smi-core.c index 8e49d4f85d48..dd2f0d6208b3 100644 --- a/drivers/net/dsa/realtek-smi-core.c +++ b/drivers/net/dsa/realtek-smi-core.c @@ -464,16 +464,33 @@ static int realtek_smi_probe(struct platform_device *pdev) static int realtek_smi_remove(struct platform_device *pdev) { - struct realtek_smi *smi = dev_get_drvdata(&pdev->dev); + struct realtek_smi *smi = platform_get_drvdata(pdev); + + if (!smi) + return 0; dsa_unregister_switch(smi->ds); if (smi->slave_mii_bus) of_node_put(smi->slave_mii_bus->dev.of_node); gpiod_set_value(smi->reset, 1); + platform_set_drvdata(pdev, NULL); + return 0; } +static void realtek_smi_shutdown(struct platform_device *pdev) +{ + struct realtek_smi *smi = platform_get_drvdata(pdev); + + if (!smi) + return; + + dsa_switch_shutdown(smi->ds); + + platform_set_drvdata(pdev, NULL); +} + static const struct of_device_id realtek_smi_of_match[] = { { .compatible = "realtek,rtl8366rb", @@ -495,6 +512,7 @@ static struct platform_driver realtek_smi_driver = { }, .probe = realtek_smi_probe, .remove = realtek_smi_remove, + .shutdown = realtek_smi_shutdown, }; module_platform_driver(realtek_smi_driver); diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 2f8cc6686c38..7c0db80eff00 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -3335,13 +3335,29 @@ static int sja1105_probe(struct spi_device *spi) static int sja1105_remove(struct spi_device *spi) { struct sja1105_private *priv = spi_get_drvdata(spi); - struct dsa_switch *ds = priv->ds; - dsa_unregister_switch(ds); + if (!priv) + return 0; + + dsa_unregister_switch(priv->ds); + + spi_set_drvdata(spi, NULL); return 0; } +static void sja1105_shutdown(struct spi_device *spi) +{ + struct sja1105_private *priv = spi_get_drvdata(spi); + + if (!priv) + return; + + dsa_switch_shutdown(priv->ds); + + spi_set_drvdata(spi, NULL); +} + static const struct of_device_id sja1105_dt_ids[] = { { .compatible = "nxp,sja1105e", .data = &sja1105e_info }, { .compatible = "nxp,sja1105t", .data = &sja1105t_info }, @@ -3365,6 +3381,7 @@ static struct spi_driver sja1105_driver = { }, .probe = sja1105_probe, .remove = sja1105_remove, + .shutdown = sja1105_shutdown, }; module_spi_driver(sja1105_driver); diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c index 19ce4aa0973b..a4b1447ff055 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-core.c +++ b/drivers/net/dsa/vitesse-vsc73xx-core.c @@ -1225,6 +1225,12 @@ int vsc73xx_remove(struct vsc73xx *vsc) } EXPORT_SYMBOL(vsc73xx_remove); +void vsc73xx_shutdown(struct vsc73xx *vsc) +{ + dsa_switch_shutdown(vsc->ds); +} +EXPORT_SYMBOL(vsc73xx_shutdown); + MODULE_AUTHOR("Linus Walleij "); MODULE_DESCRIPTION("Vitesse VSC7385/7388/7395/7398 driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/dsa/vitesse-vsc73xx-platform.c b/drivers/net/dsa/vitesse-vsc73xx-platform.c index 2a57f337b2a2..fe4b154a0a57 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-platform.c +++ b/drivers/net/dsa/vitesse-vsc73xx-platform.c @@ -116,7 +116,26 @@ static int vsc73xx_platform_remove(struct platform_device *pdev) { struct vsc73xx_platform *vsc_platform = platform_get_drvdata(pdev); - return vsc73xx_remove(&vsc_platform->vsc); + if (!vsc_platform) + return 0; + + vsc73xx_remove(&vsc_platform->vsc); + + platform_set_drvdata(pdev, NULL); + + return 0; +} + +static void vsc73xx_platform_shutdown(struct platform_device *pdev) +{ + struct vsc73xx_platform *vsc_platform = platform_get_drvdata(pdev); + + if (!vsc_platform) + return; + + vsc73xx_shutdown(&vsc_platform->vsc); + + platform_set_drvdata(pdev, NULL); } static const struct vsc73xx_ops vsc73xx_platform_ops = { @@ -144,6 +163,7 @@ MODULE_DEVICE_TABLE(of, vsc73xx_of_match); static struct platform_driver vsc73xx_platform_driver = { .probe = vsc73xx_platform_probe, .remove = vsc73xx_platform_remove, + .shutdown = vsc73xx_platform_shutdown, .driver = { .name = "vsc73xx-platform", .of_match_table = vsc73xx_of_match, diff --git a/drivers/net/dsa/vitesse-vsc73xx-spi.c b/drivers/net/dsa/vitesse-vsc73xx-spi.c index 81eca4a5781d..645398901e05 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-spi.c +++ b/drivers/net/dsa/vitesse-vsc73xx-spi.c @@ -163,7 +163,26 @@ static int vsc73xx_spi_remove(struct spi_device *spi) { struct vsc73xx_spi *vsc_spi = spi_get_drvdata(spi); - return vsc73xx_remove(&vsc_spi->vsc); + if (!vsc_spi) + return 0; + + vsc73xx_remove(&vsc_spi->vsc); + + spi_set_drvdata(spi, NULL); + + return 0; +} + +static void vsc73xx_spi_shutdown(struct spi_device *spi) +{ + struct vsc73xx_spi *vsc_spi = spi_get_drvdata(spi); + + if (!vsc_spi) + return; + + vsc73xx_shutdown(&vsc_spi->vsc); + + spi_set_drvdata(spi, NULL); } static const struct vsc73xx_ops vsc73xx_spi_ops = { @@ -191,6 +210,7 @@ MODULE_DEVICE_TABLE(of, vsc73xx_of_match); static struct spi_driver vsc73xx_spi_driver = { .probe = vsc73xx_spi_probe, .remove = vsc73xx_spi_remove, + .shutdown = vsc73xx_spi_shutdown, .driver = { .name = "vsc73xx-spi", .of_match_table = vsc73xx_of_match, diff --git a/drivers/net/dsa/vitesse-vsc73xx.h b/drivers/net/dsa/vitesse-vsc73xx.h index 7478f8d4e0a9..30b951504e65 100644 --- a/drivers/net/dsa/vitesse-vsc73xx.h +++ b/drivers/net/dsa/vitesse-vsc73xx.h @@ -27,3 +27,4 @@ struct vsc73xx_ops { int vsc73xx_is_addr_valid(u8 block, u8 subblock); int vsc73xx_probe(struct vsc73xx *vsc); int vsc73xx_remove(struct vsc73xx *vsc); +void vsc73xx_shutdown(struct vsc73xx *vsc); diff --git a/include/net/dsa.h b/include/net/dsa.h index 258867eff230..6e29c0e080f6 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -1046,6 +1046,7 @@ static inline int dsa_ndo_eth_ioctl(struct net_device *dev, struct ifreq *ifr, void dsa_unregister_switch(struct dsa_switch *ds); int dsa_register_switch(struct dsa_switch *ds); +void dsa_switch_shutdown(struct dsa_switch *ds); struct dsa_switch *dsa_switch_find(int tree_index, int sw_index); #ifdef CONFIG_PM_SLEEP int dsa_switch_suspend(struct dsa_switch *ds); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index eef13cd20f19..fa88e58705f0 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -1562,3 +1562,53 @@ void dsa_unregister_switch(struct dsa_switch *ds) mutex_unlock(&dsa2_mutex); } EXPORT_SYMBOL_GPL(dsa_unregister_switch); + +/* If the DSA master chooses to unregister its net_device on .shutdown, DSA is + * blocking that operation from completion, due to the dev_hold taken inside + * netdev_upper_dev_link. Unlink the DSA slave interfaces from being uppers of + * the DSA master, so that the system can reboot successfully. + */ +void dsa_switch_shutdown(struct dsa_switch *ds) +{ + struct net_device *master, *slave_dev; + LIST_HEAD(unregister_list); + struct dsa_port *dp; + + mutex_lock(&dsa2_mutex); + rtnl_lock(); + + list_for_each_entry(dp, &ds->dst->ports, list) { + if (dp->ds != ds) + continue; + + if (!dsa_port_is_user(dp)) + continue; + + master = dp->cpu_dp->master; + slave_dev = dp->slave; + + netdev_upper_dev_unlink(master, slave_dev); + /* Just unlinking ourselves as uppers of the master is not + * sufficient. When the master net device unregisters, that will + * also call dev_close, which we will catch as NETDEV_GOING_DOWN + * and trigger a dev_close on our own devices (dsa_slave_close). + * In turn, that will call dev_mc_unsync on the master's net + * device. If the master is also a DSA switch port, this will + * trigger dsa_slave_set_rx_mode which will call dev_mc_sync on + * its own master. Lockdep will complain about the fact that + * all cascaded masters have the same dsa_master_addr_list_lock_key, + * which it normally would not do if the cascaded masters would + * be in a proper upper/lower relationship, which we've just + * destroyed. + * To suppress the lockdep warnings, let's actually unregister + * the DSA slave interfaces too, to avoid the nonsensical + * multicast address list synchronization on shutdown. + */ + unregister_netdevice_queue(slave_dev, &unregister_list); + } + unregister_netdevice_many(&unregister_list); + + rtnl_unlock(); + mutex_unlock(&dsa2_mutex); +} +EXPORT_SYMBOL_GPL(dsa_switch_shutdown); -- cgit v1.2.3-71-gd317 From 2dcb96bacce36021c2f3eaae0cef607b5bb71ede Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 18 Sep 2021 14:42:35 +0200 Subject: net: core: Correct the sock::sk_lock.owned lockdep annotations lock_sock_fast() and lock_sock_nested() contain lockdep annotations for the sock::sk_lock.owned 'mutex'. sock::sk_lock.owned is not a regular mutex. It is just lockdep wise equivalent. In fact it's an open coded trivial mutex implementation with some interesting features. sock::sk_lock.slock is a regular spinlock protecting the 'mutex' representation sock::sk_lock.owned which is a plain boolean. If 'owned' is true, then some other task holds the 'mutex', otherwise it is uncontended. As this locking construct is obviously endangered by lock ordering issues as any other locking primitive it got lockdep annotated via a dedicated dependency map sock::sk_lock.dep_map which has to be updated at the lock and unlock sites. lock_sock_nested() is a straight forward 'mutex' lock operation: might_sleep(); spin_lock_bh(sock::sk_lock.slock) while (!try_lock(sock::sk_lock.owned)) { spin_unlock_bh(sock::sk_lock.slock); wait_for_release(); spin_lock_bh(sock::sk_lock.slock); } The lockdep annotation for sock::sk_lock.owned is for unknown reasons _after_ the lock has been acquired, i.e. after the code block above and after releasing sock::sk_lock.slock, but inside the bottom halves disabled region: spin_unlock(sock::sk_lock.slock); mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); local_bh_enable(); The placement after the unlock is obvious because otherwise the mutex_acquire() would nest into the spin lock held region. But that's from the lockdep perspective still the wrong place: 1) The mutex_acquire() is issued _after_ the successful acquisition which is pointless because in a dead lock scenario this point is never reached which means that if the deadlock is the first instance of exposing the wrong lock order lockdep does not have a chance to detect it. 2) It only works because lockdep is rather lax on the context from which the mutex_acquire() is issued. Acquiring a mutex inside a bottom halves and therefore non-preemptible region is obviously invalid, except for a trylock which is clearly not the case here. This 'works' stops working on RT enabled kernels where the bottom halves serialization is done via a local lock, which exposes this misplacement because the 'mutex' and the local lock nest the wrong way around and lockdep complains rightfully about a lock inversion. The placement is wrong since the initial commit a5b5bb9a053a ("[PATCH] lockdep: annotate sk_locks") which introduced this. Fix it by moving the mutex_acquire() in front of the actual lock acquisition, which is what the regular mutex_lock() operation does as well. lock_sock_fast() is not that straight forward. It looks at the first glance like a convoluted trylock operation: spin_lock_bh(sock::sk_lock.slock) if (!sock::sk_lock.owned) return false; while (!try_lock(sock::sk_lock.owned)) { spin_unlock_bh(sock::sk_lock.slock); wait_for_release(); spin_lock_bh(sock::sk_lock.slock); } spin_unlock(sock::sk_lock.slock); mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); local_bh_enable(); return true; But that's not the case: lock_sock_fast() is an interesting optimization for short critical sections which can run with bottom halves disabled and sock::sk_lock.slock held. This allows to shortcut the 'mutex' operation in the non contended case by preventing other lockers to acquire sock::sk_lock.owned because they are blocked on sock::sk_lock.slock, which in turn avoids the overhead of doing the heavy processing in release_sock() including waking up wait queue waiters. In the contended case, i.e. when sock::sk_lock.owned == true the behavior is the same as lock_sock_nested(). Semantically this shortcut means, that the task acquired the 'mutex' even if it does not touch the sock::sk_lock.owned field in the non-contended case. Not telling lockdep about this shortcut acquisition is hiding potential lock ordering violations in the fast path. As a consequence the same reasoning as for the above lock_sock_nested() case vs. the placement of the lockdep annotation applies. The current placement of the lockdep annotation was just copied from the original lock_sock(), now renamed to lock_sock_nested(), implementation. Fix this by moving the mutex_acquire() in front of the actual lock acquisition and adding the corresponding mutex_release() into unlock_sock_fast(). Also document the fast path return case with a comment. Reported-by: Sebastian Siewior Signed-off-by: Thomas Gleixner Cc: netdev@vger.kernel.org Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 1 + net/core/sock.c | 37 +++++++++++++++++++++++-------------- 2 files changed, 24 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 66a9a90f9558..c005c3c750e8 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1640,6 +1640,7 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow) release_sock(sk); __release(&sk->sk_lock.slock); } else { + mutex_release(&sk->sk_lock.dep_map, _RET_IP_); spin_unlock_bh(&sk->sk_lock.slock); } } diff --git a/net/core/sock.c b/net/core/sock.c index 62627e868e03..512e629f9780 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3179,17 +3179,15 @@ EXPORT_SYMBOL(sock_init_data); void lock_sock_nested(struct sock *sk, int subclass) { + /* The sk_lock has mutex_lock() semantics here. */ + mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); + might_sleep(); spin_lock_bh(&sk->sk_lock.slock); if (sk->sk_lock.owned) __lock_sock(sk); sk->sk_lock.owned = 1; - spin_unlock(&sk->sk_lock.slock); - /* - * The sk_lock has mutex_lock() semantics here: - */ - mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); - local_bh_enable(); + spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL(lock_sock_nested); @@ -3227,24 +3225,35 @@ EXPORT_SYMBOL(release_sock); */ bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock) { + /* The sk_lock has mutex_lock() semantics here. */ + mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); + might_sleep(); spin_lock_bh(&sk->sk_lock.slock); - if (!sk->sk_lock.owned) + if (!sk->sk_lock.owned) { /* - * Note : We must disable BH + * Fast path return with bottom halves disabled and + * sock::sk_lock.slock held. + * + * The 'mutex' is not contended and holding + * sock::sk_lock.slock prevents all other lockers to + * proceed so the corresponding unlock_sock_fast() can + * avoid the slow path of release_sock() completely and + * just release slock. + * + * From a semantical POV this is equivalent to 'acquiring' + * the 'mutex', hence the corresponding lockdep + * mutex_release() has to happen in the fast path of + * unlock_sock_fast(). */ return false; + } __lock_sock(sk); sk->sk_lock.owned = 1; - spin_unlock(&sk->sk_lock.slock); - /* - * The sk_lock has mutex_lock() semantics here: - */ - mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); __acquire(&sk->sk_lock.slock); - local_bh_enable(); + spin_unlock_bh(&sk->sk_lock.slock); return true; } EXPORT_SYMBOL(lock_sock_fast); -- cgit v1.2.3-71-gd317 From fd292c189a979838622d5e03e15fa688c81dd50b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 17 Sep 2021 17:29:16 +0300 Subject: net: dsa: tear down devlink port regions when tearing down the devlink port on error Commit 86f8b1c01a0a ("net: dsa: Do not make user port errors fatal") decided it was fine to ignore errors on certain ports that fail to probe, and go on with the ports that do probe fine. Commit fb6ec87f7229 ("net: dsa: Fix type was not set for devlink port") noticed that devlink_port_type_eth_set(dlp, dp->slave); does not get called, and devlink notices after a timeout of 3600 seconds and prints a WARN_ON. So it went ahead to unregister the devlink port. And because there exists an UNUSED port flavour, we actually re-register the devlink port as UNUSED. Commit 08156ba430b4 ("net: dsa: Add devlink port regions support to DSA") added devlink port regions, which are set up by the driver and not by DSA. When we trigger the devlink port deregistration and reregistration as unused, devlink now prints another WARN_ON, from here: devlink_port_unregister: WARN_ON(!list_empty(&devlink_port->region_list)); So the port still has regions, which makes sense, because they were set up by the driver, and the driver doesn't know we're unregistering the devlink port. Somebody needs to tear them down, and optionally (actually it would be nice, to be consistent) set them up again for the new devlink port. But DSA's layering stays in our way quite badly here. The options I've considered are: 1. Introduce a function in devlink to just change a port's type and flavour. No dice, devlink keeps a lot of state, it really wants the port to not be registered when you set its parameters, so changing anything can only be done by destroying what we currently have and recreating it. 2. Make DSA cache the parameters passed to dsa_devlink_port_region_create, and the region returned, keep those in a list, then when the devlink port unregister needs to take place, the existing devlink regions are destroyed by DSA, and we replay the creation of new regions using the cached parameters. Problem: mv88e6xxx keeps the region pointers in chip->ports[port].region, and these will remain stale after DSA frees them. There are many things DSA can do, but updating mv88e6xxx's private pointers is not one of them. 3. Just let the driver do it (i.e. introduce a very specific method called ds->ops->port_reinit_as_unused, which unregisters its devlink port devlink regions, then the old devlink port, then registers the new one, then the devlink port regions for it). While it does work, as opposed to the others, it's pretty horrible from an API perspective and we can do better. 4. Introduce a new pair of methods, ->port_setup and ->port_teardown, which in the case of mv88e6xxx must register and unregister the devlink port regions. Call these 2 methods when the port must be reinitialized as unused. Naturally, I went for the 4th approach. Fixes: 08156ba430b4 ("net: dsa: Add devlink port regions support to DSA") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 16 +++++++- drivers/net/dsa/mv88e6xxx/devlink.c | 73 +++++-------------------------------- drivers/net/dsa/mv88e6xxx/devlink.h | 6 ++- include/net/dsa.h | 8 ++++ net/dsa/dsa2.c | 51 +++++++++++++++++++++++--- 5 files changed, 81 insertions(+), 73 deletions(-) (limited to 'include') diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index fb10422d2c33..8ab0be793811 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3071,7 +3071,7 @@ static void mv88e6xxx_teardown(struct dsa_switch *ds) { mv88e6xxx_teardown_devlink_params(ds); dsa_devlink_resources_unregister(ds); - mv88e6xxx_teardown_devlink_regions(ds); + mv88e6xxx_teardown_devlink_regions_global(ds); } static int mv88e6xxx_setup(struct dsa_switch *ds) @@ -3215,7 +3215,7 @@ unlock: if (err) goto out_resources; - err = mv88e6xxx_setup_devlink_regions(ds); + err = mv88e6xxx_setup_devlink_regions_global(ds); if (err) goto out_params; @@ -3229,6 +3229,16 @@ out_resources: return err; } +static int mv88e6xxx_port_setup(struct dsa_switch *ds, int port) +{ + return mv88e6xxx_setup_devlink_regions_port(ds, port); +} + +static void mv88e6xxx_port_teardown(struct dsa_switch *ds, int port) +{ + mv88e6xxx_teardown_devlink_regions_port(ds, port); +} + /* prod_id for switch families which do not have a PHY model number */ static const u16 family_prod_id_table[] = { [MV88E6XXX_FAMILY_6341] = MV88E6XXX_PORT_SWITCH_ID_PROD_6341, @@ -6116,6 +6126,8 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = { .change_tag_protocol = mv88e6xxx_change_tag_protocol, .setup = mv88e6xxx_setup, .teardown = mv88e6xxx_teardown, + .port_setup = mv88e6xxx_port_setup, + .port_teardown = mv88e6xxx_port_teardown, .phylink_validate = mv88e6xxx_validate, .phylink_mac_link_state = mv88e6xxx_serdes_pcs_get_state, .phylink_mac_config = mv88e6xxx_mac_config, diff --git a/drivers/net/dsa/mv88e6xxx/devlink.c b/drivers/net/dsa/mv88e6xxx/devlink.c index 0c0f5ea6680c..381068395c63 100644 --- a/drivers/net/dsa/mv88e6xxx/devlink.c +++ b/drivers/net/dsa/mv88e6xxx/devlink.c @@ -647,26 +647,25 @@ static struct mv88e6xxx_region mv88e6xxx_regions[] = { }, }; -static void -mv88e6xxx_teardown_devlink_regions_global(struct mv88e6xxx_chip *chip) +void mv88e6xxx_teardown_devlink_regions_global(struct dsa_switch *ds) { + struct mv88e6xxx_chip *chip = ds->priv; int i; for (i = 0; i < ARRAY_SIZE(mv88e6xxx_regions); i++) dsa_devlink_region_destroy(chip->regions[i]); } -static void -mv88e6xxx_teardown_devlink_regions_port(struct mv88e6xxx_chip *chip, - int port) +void mv88e6xxx_teardown_devlink_regions_port(struct dsa_switch *ds, int port) { + struct mv88e6xxx_chip *chip = ds->priv; + dsa_devlink_region_destroy(chip->ports[port].region); } -static int mv88e6xxx_setup_devlink_regions_port(struct dsa_switch *ds, - struct mv88e6xxx_chip *chip, - int port) +int mv88e6xxx_setup_devlink_regions_port(struct dsa_switch *ds, int port) { + struct mv88e6xxx_chip *chip = ds->priv; struct devlink_region *region; region = dsa_devlink_port_region_create(ds, @@ -681,40 +680,10 @@ static int mv88e6xxx_setup_devlink_regions_port(struct dsa_switch *ds, return 0; } -static void -mv88e6xxx_teardown_devlink_regions_ports(struct mv88e6xxx_chip *chip) -{ - int port; - - for (port = 0; port < mv88e6xxx_num_ports(chip); port++) - mv88e6xxx_teardown_devlink_regions_port(chip, port); -} - -static int mv88e6xxx_setup_devlink_regions_ports(struct dsa_switch *ds, - struct mv88e6xxx_chip *chip) -{ - int port; - int err; - - for (port = 0; port < mv88e6xxx_num_ports(chip); port++) { - err = mv88e6xxx_setup_devlink_regions_port(ds, chip, port); - if (err) - goto out; - } - - return 0; - -out: - while (port-- > 0) - mv88e6xxx_teardown_devlink_regions_port(chip, port); - - return err; -} - -static int mv88e6xxx_setup_devlink_regions_global(struct dsa_switch *ds, - struct mv88e6xxx_chip *chip) +int mv88e6xxx_setup_devlink_regions_global(struct dsa_switch *ds) { bool (*cond)(struct mv88e6xxx_chip *chip); + struct mv88e6xxx_chip *chip = ds->priv; struct devlink_region_ops *ops; struct devlink_region *region; u64 size; @@ -753,30 +722,6 @@ out: return PTR_ERR(region); } -int mv88e6xxx_setup_devlink_regions(struct dsa_switch *ds) -{ - struct mv88e6xxx_chip *chip = ds->priv; - int err; - - err = mv88e6xxx_setup_devlink_regions_global(ds, chip); - if (err) - return err; - - err = mv88e6xxx_setup_devlink_regions_ports(ds, chip); - if (err) - mv88e6xxx_teardown_devlink_regions_global(chip); - - return err; -} - -void mv88e6xxx_teardown_devlink_regions(struct dsa_switch *ds) -{ - struct mv88e6xxx_chip *chip = ds->priv; - - mv88e6xxx_teardown_devlink_regions_ports(chip); - mv88e6xxx_teardown_devlink_regions_global(chip); -} - int mv88e6xxx_devlink_info_get(struct dsa_switch *ds, struct devlink_info_req *req, struct netlink_ext_ack *extack) diff --git a/drivers/net/dsa/mv88e6xxx/devlink.h b/drivers/net/dsa/mv88e6xxx/devlink.h index 3d72db3dcf95..65ce6a6858b9 100644 --- a/drivers/net/dsa/mv88e6xxx/devlink.h +++ b/drivers/net/dsa/mv88e6xxx/devlink.h @@ -12,8 +12,10 @@ int mv88e6xxx_devlink_param_get(struct dsa_switch *ds, u32 id, struct devlink_param_gset_ctx *ctx); int mv88e6xxx_devlink_param_set(struct dsa_switch *ds, u32 id, struct devlink_param_gset_ctx *ctx); -int mv88e6xxx_setup_devlink_regions(struct dsa_switch *ds); -void mv88e6xxx_teardown_devlink_regions(struct dsa_switch *ds); +int mv88e6xxx_setup_devlink_regions_global(struct dsa_switch *ds); +void mv88e6xxx_teardown_devlink_regions_global(struct dsa_switch *ds); +int mv88e6xxx_setup_devlink_regions_port(struct dsa_switch *ds, int port); +void mv88e6xxx_teardown_devlink_regions_port(struct dsa_switch *ds, int port); int mv88e6xxx_devlink_info_get(struct dsa_switch *ds, struct devlink_info_req *req, diff --git a/include/net/dsa.h b/include/net/dsa.h index 6e29c0e080f6..d784e76113b8 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -585,8 +585,16 @@ struct dsa_switch_ops { int (*change_tag_protocol)(struct dsa_switch *ds, int port, enum dsa_tag_protocol proto); + /* Optional switch-wide initialization and destruction methods */ int (*setup)(struct dsa_switch *ds); void (*teardown)(struct dsa_switch *ds); + + /* Per-port initialization and destruction methods. Mandatory if the + * driver registers devlink port regions, optional otherwise. + */ + int (*port_setup)(struct dsa_switch *ds, int port); + void (*port_teardown)(struct dsa_switch *ds, int port); + u32 (*get_phy_flags)(struct dsa_switch *ds, int port); /* diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index fa88e58705f0..f14897d9b31d 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -429,6 +429,7 @@ static int dsa_port_setup(struct dsa_port *dp) { struct devlink_port *dlp = &dp->devlink_port; bool dsa_port_link_registered = false; + struct dsa_switch *ds = dp->ds; bool dsa_port_enabled = false; int err = 0; @@ -438,6 +439,12 @@ static int dsa_port_setup(struct dsa_port *dp) INIT_LIST_HEAD(&dp->fdbs); INIT_LIST_HEAD(&dp->mdbs); + if (ds->ops->port_setup) { + err = ds->ops->port_setup(ds, dp->index); + if (err) + return err; + } + switch (dp->type) { case DSA_PORT_TYPE_UNUSED: dsa_port_disable(dp); @@ -480,8 +487,11 @@ static int dsa_port_setup(struct dsa_port *dp) dsa_port_disable(dp); if (err && dsa_port_link_registered) dsa_port_link_unregister_of(dp); - if (err) + if (err) { + if (ds->ops->port_teardown) + ds->ops->port_teardown(ds, dp->index); return err; + } dp->setup = true; @@ -533,11 +543,15 @@ static int dsa_port_devlink_setup(struct dsa_port *dp) static void dsa_port_teardown(struct dsa_port *dp) { struct devlink_port *dlp = &dp->devlink_port; + struct dsa_switch *ds = dp->ds; struct dsa_mac_addr *a, *tmp; if (!dp->setup) return; + if (ds->ops->port_teardown) + ds->ops->port_teardown(ds, dp->index); + devlink_port_type_clear(dlp); switch (dp->type) { @@ -581,6 +595,36 @@ static void dsa_port_devlink_teardown(struct dsa_port *dp) dp->devlink_port_setup = false; } +/* Destroy the current devlink port, and create a new one which has the UNUSED + * flavour. At this point, any call to ds->ops->port_setup has been already + * balanced out by a call to ds->ops->port_teardown, so we know that any + * devlink port regions the driver had are now unregistered. We then call its + * ds->ops->port_setup again, in order for the driver to re-create them on the + * new devlink port. + */ +static int dsa_port_reinit_as_unused(struct dsa_port *dp) +{ + struct dsa_switch *ds = dp->ds; + int err; + + dsa_port_devlink_teardown(dp); + dp->type = DSA_PORT_TYPE_UNUSED; + err = dsa_port_devlink_setup(dp); + if (err) + return err; + + if (ds->ops->port_setup) { + /* On error, leave the devlink port registered, + * dsa_switch_teardown will clean it up later. + */ + err = ds->ops->port_setup(ds, dp->index); + if (err) + return err; + } + + return 0; +} + static int dsa_devlink_info_get(struct devlink *dl, struct devlink_info_req *req, struct netlink_ext_ack *extack) @@ -938,12 +982,9 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) list_for_each_entry(dp, &dst->ports, list) { err = dsa_port_setup(dp); if (err) { - dsa_port_devlink_teardown(dp); - dp->type = DSA_PORT_TYPE_UNUSED; - err = dsa_port_devlink_setup(dp); + err = dsa_port_reinit_as_unused(dp); if (err) goto teardown; - continue; } } -- cgit v1.2.3-71-gd317 From e840f42a49925707fca90e6c7a4095118fdb8c4d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 19 Sep 2021 14:09:49 +0100 Subject: KVM: arm64: Fix PMU probe ordering Russell reported that since 5.13, KVM's probing of the PMU has started to fail on his HW. As it turns out, there is an implicit ordering dependency between the architectural PMU probing code and and KVM's own probing. If, due to probe ordering reasons, KVM probes before the PMU driver, it will fail to detect the PMU and prevent it from being advertised to guests as well as the VMM. Obviously, this is one probing too many, and we should be able to deal with any ordering. Add a callback from the PMU code into KVM to advertise the registration of a host CPU PMU, allowing for any probing order. Fixes: 5421db1be3b1 ("KVM: arm64: Divorce the perf code from oprofile helpers") Reported-by: "Russell King (Oracle)" Tested-by: Russell King (Oracle) Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/YUYRKVflRtUytzy5@shell.armlinux.org.uk Cc: stable@vger.kernel.org --- arch/arm64/kvm/perf.c | 3 --- arch/arm64/kvm/pmu-emul.c | 9 ++++++++- drivers/perf/arm_pmu.c | 2 ++ include/kvm/arm_pmu.h | 3 --- include/linux/perf/arm_pmu.h | 6 ++++++ 5 files changed, 16 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/arch/arm64/kvm/perf.c b/arch/arm64/kvm/perf.c index f9bb3b14130e..c84fe24b2ea1 100644 --- a/arch/arm64/kvm/perf.c +++ b/arch/arm64/kvm/perf.c @@ -50,9 +50,6 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = { int kvm_perf_init(void) { - if (kvm_pmu_probe_pmuver() != ID_AA64DFR0_PMUVER_IMP_DEF && !is_protected_kvm_enabled()) - static_branch_enable(&kvm_arm_pmu_available); - return perf_register_guest_info_callbacks(&kvm_guest_cbs); } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index f5065f23b413..2af3c37445e0 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -740,7 +740,14 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, kvm_pmu_create_perf_event(vcpu, select_idx); } -int kvm_pmu_probe_pmuver(void) +void kvm_host_pmu_init(struct arm_pmu *pmu) +{ + if (pmu->pmuver != 0 && pmu->pmuver != ID_AA64DFR0_PMUVER_IMP_DEF && + !kvm_arm_support_pmu_v3() && !is_protected_kvm_enabled()) + static_branch_enable(&kvm_arm_pmu_available); +} + +static int kvm_pmu_probe_pmuver(void) { struct perf_event_attr attr = { }; struct perf_event *event; diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 3cbc3baf087f..295cc7952d0e 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -952,6 +952,8 @@ int armpmu_register(struct arm_pmu *pmu) pmu->name, pmu->num_events, has_nmi ? ", using NMIs" : ""); + kvm_host_pmu_init(pmu); + return 0; out_destroy: diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 864b9997efb2..90f21898aad8 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -61,7 +61,6 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu); -int kvm_pmu_probe_pmuver(void); #else struct kvm_pmu { }; @@ -118,8 +117,6 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) return 0; } -static inline int kvm_pmu_probe_pmuver(void) { return 0xf; } - #endif #endif diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 505480217cf1..2512e2f9cd4e 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -163,6 +163,12 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn); static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; } #endif +#ifdef CONFIG_KVM +void kvm_host_pmu_init(struct arm_pmu *pmu); +#else +#define kvm_host_pmu_init(x) do { } while(0) +#endif + /* Internal functions only for core arm_pmu code */ struct arm_pmu *armpmu_alloc(void); struct arm_pmu *armpmu_alloc_atomic(void); -- cgit v1.2.3-71-gd317 From 794d5b8a497ff053f56856472e2fae038fa761aa Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 17 Sep 2021 12:50:38 +0200 Subject: swiotlb-xen: this is PV-only on x86 The code is unreachable for HVM or PVH, and it also makes little sense in auto-translated environments. On Arm, with xen_{create,destroy}_contiguous_region() both being stubs, I have a hard time seeing what good the Xen specific variant does - the generic one ought to be fine for all purposes there. Still Arm code explicitly references symbols here, so the code will continue to be included there. Instead of making PCI_XEN's "select" conditional, simply drop it - SWIOTLB_XEN will be available unconditionally in the PV case anyway, and is - as explained above - dead code in non-PV environments. This in turn allows dropping the stubs for xen_{create,destroy}_contiguous_region(), the former of which was broken anyway - it failed to set the DMA handle output. Signed-off-by: Jan Beulich Reviewed-by: Christoph Hellwig Reviewed-by: Stefano Stabellini Link: https://lore.kernel.org/r/5947b8ae-fdc7-225c-4838-84712265fc1e@suse.com Signed-off-by: Juergen Gross --- arch/x86/Kconfig | 1 - drivers/xen/Kconfig | 1 + include/xen/xen-ops.h | 12 ------------ 3 files changed, 1 insertion(+), 13 deletions(-) (limited to 'include') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 88fb922c23a0..a71ced4c711f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2605,7 +2605,6 @@ config PCI_OLPC config PCI_XEN def_bool y depends on PCI && XEN - select SWIOTLB_XEN config MMCONF_FAM10H def_bool y diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index a37eb52fb401..22f5aff0c136 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -177,6 +177,7 @@ config XEN_GRANT_DMA_ALLOC config SWIOTLB_XEN def_bool y + depends on XEN_PV || ARM || ARM64 select DMA_OPS select SWIOTLB diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 39a5580f8feb..db28e79b77ee 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -46,19 +46,7 @@ extern unsigned long *xen_contiguous_bitmap; int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, unsigned int address_bits, dma_addr_t *dma_handle); - void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order); -#else -static inline int xen_create_contiguous_region(phys_addr_t pstart, - unsigned int order, - unsigned int address_bits, - dma_addr_t *dma_handle) -{ - return 0; -} - -static inline void xen_destroy_contiguous_region(phys_addr_t pstart, - unsigned int order) { } #endif #if defined(CONFIG_XEN_PV) -- cgit v1.2.3-71-gd317 From d4ffd5df9d18031b6a53f934388726775b4452d3 Mon Sep 17 00:00:00 2001 From: Jiashuo Liang Date: Fri, 30 Jul 2021 11:01:52 +0800 Subject: x86/fault: Fix wrong signal when vsyscall fails with pkey The function __bad_area_nosemaphore() calls kernelmode_fixup_or_oops() with the parameter @signal being actually @pkey, which will send a signal numbered with the argument in @pkey. This bug can be triggered when the kernel fails to access user-given memory pages that are protected by a pkey, so it can go down the do_user_addr_fault() path and pass the !user_mode() check in __bad_area_nosemaphore(). Most cases will simply run the kernel fixup code to make an -EFAULT. But when another condition current->thread.sig_on_uaccess_err is met, which is only used to emulate vsyscall, the kernel will generate the wrong signal. Add a new parameter @pkey to kernelmode_fixup_or_oops() to fix this. [ bp: Massage commit message, fix build error as reported by the 0day bot: https://lkml.kernel.org/r/202109202245.APvuT8BX-lkp@intel.com ] Fixes: 5042d40a264c ("x86/fault: Bypass no_context() for implicit kernel faults from usermode") Reported-by: kernel test robot Signed-off-by: Jiashuo Liang Signed-off-by: Borislav Petkov Acked-by: Dave Hansen Link: https://lkml.kernel.org/r/20210730030152.249106-1-liangjs@pku.edu.cn --- arch/x86/include/asm/pkeys.h | 2 -- arch/x86/mm/fault.c | 26 ++++++++++++++++++-------- include/linux/pkeys.h | 2 ++ 3 files changed, 20 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index 5c7bcaa79623..1d5f14aff5f6 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -2,8 +2,6 @@ #ifndef _ASM_X86_PKEYS_H #define _ASM_X86_PKEYS_H -#define ARCH_DEFAULT_PKEY 0 - /* * If more than 16 keys are ever supported, a thorough audit * will be necessary to ensure that the types that store key diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index b2eefdefc108..84a2c8c4af73 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -710,7 +710,8 @@ oops: static noinline void kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code, - unsigned long address, int signal, int si_code) + unsigned long address, int signal, int si_code, + u32 pkey) { WARN_ON_ONCE(user_mode(regs)); @@ -735,8 +736,12 @@ kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code, set_signal_archinfo(address, error_code); - /* XXX: hwpoison faults will set the wrong code. */ - force_sig_fault(signal, si_code, (void __user *)address); + if (si_code == SEGV_PKUERR) { + force_sig_pkuerr((void __user *)address, pkey); + } else { + /* XXX: hwpoison faults will set the wrong code. */ + force_sig_fault(signal, si_code, (void __user *)address); + } } /* @@ -798,7 +803,8 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, struct task_struct *tsk = current; if (!user_mode(regs)) { - kernelmode_fixup_or_oops(regs, error_code, address, pkey, si_code); + kernelmode_fixup_or_oops(regs, error_code, address, + SIGSEGV, si_code, pkey); return; } @@ -930,7 +936,8 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, { /* Kernel mode? Handle exceptions or die: */ if (!user_mode(regs)) { - kernelmode_fixup_or_oops(regs, error_code, address, SIGBUS, BUS_ADRERR); + kernelmode_fixup_or_oops(regs, error_code, address, + SIGBUS, BUS_ADRERR, ARCH_DEFAULT_PKEY); return; } @@ -1396,7 +1403,8 @@ good_area: */ if (!user_mode(regs)) kernelmode_fixup_or_oops(regs, error_code, address, - SIGBUS, BUS_ADRERR); + SIGBUS, BUS_ADRERR, + ARCH_DEFAULT_PKEY); return; } @@ -1416,7 +1424,8 @@ good_area: return; if (fatal_signal_pending(current) && !user_mode(regs)) { - kernelmode_fixup_or_oops(regs, error_code, address, 0, 0); + kernelmode_fixup_or_oops(regs, error_code, address, + 0, 0, ARCH_DEFAULT_PKEY); return; } @@ -1424,7 +1433,8 @@ good_area: /* Kernel mode? Handle exceptions or die: */ if (!user_mode(regs)) { kernelmode_fixup_or_oops(regs, error_code, address, - SIGSEGV, SEGV_MAPERR); + SIGSEGV, SEGV_MAPERR, + ARCH_DEFAULT_PKEY); return; } diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h index 6beb26b7151d..86be8bf27b41 100644 --- a/include/linux/pkeys.h +++ b/include/linux/pkeys.h @@ -4,6 +4,8 @@ #include +#define ARCH_DEFAULT_PKEY 0 + #ifdef CONFIG_ARCH_HAS_PKEYS #include #else /* ! CONFIG_ARCH_HAS_PKEYS */ -- cgit v1.2.3-71-gd317 From c86a2d9058c5a4a05d20ef89e699b7a6b2c89da6 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 17 Sep 2021 00:27:05 +0200 Subject: cpumask: Omit terminating null byte in cpumap_print_{list,bitmask}_to_buf The changes in the patch series [1] introduced a terminating null byte when reading from cpulist or cpumap sysfs files, for example: $ xxd /sys/devices/system/node/node0/cpulist 00000000: 302d 310a 00 0-1.. Before this change, the output looked as follows: $ xxd /sys/devices/system/node/node0/cpulist 00000000: 302d 310a 0-1. Fix this regression by excluding the terminating null byte from the returned length in cpumap_print_list_to_buf and cpumap_print_bitmask_to_buf. [1] https://lore.kernel.org/all/20210806110251.560-1-song.bao.hua@hisilicon.com/ Fixes: 1fae562983ca ("cpumask: introduce cpumap_print_list/bitmask_to_buf to support large bitmask and list") Acked-by: Barry Song Acked-by: Yury Norov Signed-off-by: Tobias Klauser Link: https://lore.kernel.org/r/20210916222705.13554-1-tklauser@distanz.ch Signed-off-by: Greg Kroah-Hartman --- include/linux/cpumask.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 5d4d07a9e1ed..1e7399fc69c0 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -996,14 +996,15 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask) * cpumask; Typically used by bin_attribute to export cpumask bitmask * ABI. * - * Returns the length of how many bytes have been copied. + * Returns the length of how many bytes have been copied, excluding + * terminating '\0'. */ static inline ssize_t cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask, loff_t off, size_t count) { return bitmap_print_bitmask_to_buf(buf, cpumask_bits(mask), - nr_cpu_ids, off, count); + nr_cpu_ids, off, count) - 1; } /** @@ -1018,7 +1019,7 @@ cpumap_print_list_to_buf(char *buf, const struct cpumask *mask, loff_t off, size_t count) { return bitmap_print_list_to_buf(buf, cpumask_bits(mask), - nr_cpu_ids, off, count); + nr_cpu_ids, off, count) - 1; } #if NR_CPUS <= BITS_PER_LONG -- cgit v1.2.3-71-gd317 From a4869faf9642518145a8aa4b52e0d5ab0e7ee896 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 15 Sep 2021 17:07:15 +0800 Subject: scsi: core: Remove 'current_tag' The 'current_tag' field in struct scsi_device is unused now; remove it. Link: https://lore.kernel.org/r/1631696835-136198-4-git-send-email-john.garry@huawei.com Reviewed-by: Bart Van Assche Signed-off-by: Hannes Reinecke Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- include/scsi/scsi_device.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 09a17f6e93a7..b97e142a7ca9 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -146,7 +146,6 @@ struct scsi_device { struct scsi_vpd __rcu *vpd_pg83; struct scsi_vpd __rcu *vpd_pg80; struct scsi_vpd __rcu *vpd_pg89; - unsigned char current_tag; /* current tag */ struct scsi_target *sdev_target; blist_flags_t sdev_bflags; /* black/white flags as also found in -- cgit v1.2.3-71-gd317 From 20c36ce2164f1774b487d443ece99b754bc6ad43 Mon Sep 17 00:00:00 2001 From: Bixuan Cui Date: Thu, 16 Sep 2021 10:52:03 +0800 Subject: irqdomain: Change the type of 'size' in __irq_domain_add() to be consistent The 'size' is used in struct_size(domain, revmap, size) and its input parameter type is 'size_t'(unsigned int). Changing the size to 'unsigned int' to make the type consistent. Signed-off-by: Bixuan Cui Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210916025203.44841-1-cuibixuan@huawei.com --- include/linux/irqdomain.h | 2 +- kernel/irq/irqdomain.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 23e4ee523576..9ee238ad29ce 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -251,7 +251,7 @@ static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa) } void irq_domain_free_fwnode(struct fwnode_handle *fwnode); -struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, +struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size, irq_hw_number_t hwirq_max, int direct_max, const struct irq_domain_ops *ops, void *host_data); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 62be16135e7c..bfa289ed57ab 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -136,7 +136,7 @@ EXPORT_SYMBOL_GPL(irq_domain_free_fwnode); * Allocates and initializes an irq_domain structure. * Returns pointer to IRQ domain, or NULL on failure. */ -struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, +struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size, irq_hw_number_t hwirq_max, int direct_max, const struct irq_domain_ops *ops, void *host_data) -- cgit v1.2.3-71-gd317 From a68de80f61f6af397bc06fb391ff2e571c9c4d80 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 1 Sep 2021 13:30:27 -0700 Subject: entry: rseq: Call rseq_handle_notify_resume() in tracehook_notify_resume() Invoke rseq_handle_notify_resume() from tracehook_notify_resume() now that the two function are always called back-to-back by architectures that have rseq. The rseq helper is stubbed out for architectures that don't support rseq, i.e. this is a nop across the board. Note, tracehook_notify_resume() is horribly named and arguably does not belong in tracehook.h as literally every line of code in it has nothing to do with tracing. But, that's been true since commit a42c6ded827d ("move key_repace_session_keyring() into tracehook_notify_resume()") first usurped tracehook_notify_resume() back in 2012. Punt cleaning that mess up to future patches. No functional change intended. Acked-by: Mathieu Desnoyers Signed-off-by: Sean Christopherson Message-Id: <20210901203030.1292304-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/arm/kernel/signal.c | 1 - arch/arm64/kernel/signal.c | 4 +--- arch/csky/kernel/signal.c | 4 +--- arch/mips/kernel/signal.c | 4 +--- arch/powerpc/kernel/signal.c | 4 +--- include/linux/tracehook.h | 2 ++ kernel/entry/common.c | 4 +--- kernel/entry/kvm.c | 4 +--- 8 files changed, 8 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index d0a800be0486..a41e27ace391 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -628,7 +628,6 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) uprobe_notify_resume(regs); } else { tracehook_notify_resume(regs); - rseq_handle_notify_resume(NULL, regs); } } local_irq_disable(); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 9fe70b12b34f..c287b9407f28 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -940,10 +940,8 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs); - if (thread_flags & _TIF_NOTIFY_RESUME) { + if (thread_flags & _TIF_NOTIFY_RESUME) tracehook_notify_resume(regs); - rseq_handle_notify_resume(NULL, regs); - } if (thread_flags & _TIF_FOREIGN_FPSTATE) fpsimd_restore_current_state(); diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c index 312f046d452d..bc4238b9f709 100644 --- a/arch/csky/kernel/signal.c +++ b/arch/csky/kernel/signal.c @@ -260,8 +260,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs); - if (thread_info_flags & _TIF_NOTIFY_RESUME) { + if (thread_info_flags & _TIF_NOTIFY_RESUME) tracehook_notify_resume(regs); - rseq_handle_notify_resume(NULL, regs); - } } diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index f1e985109da0..c9b2a75563e1 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -906,10 +906,8 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused, if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs); - if (thread_info_flags & _TIF_NOTIFY_RESUME) { + if (thread_info_flags & _TIF_NOTIFY_RESUME) tracehook_notify_resume(regs); - rseq_handle_notify_resume(NULL, regs); - } user_enter(); } diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index e600764a926c..b93b87df499d 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -293,10 +293,8 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) do_signal(current); } - if (thread_info_flags & _TIF_NOTIFY_RESUME) { + if (thread_info_flags & _TIF_NOTIFY_RESUME) tracehook_notify_resume(regs); - rseq_handle_notify_resume(NULL, regs); - } } static unsigned long get_tm_stackpointer(struct task_struct *tsk) diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 3e80c4bc66f7..2564b7434b4d 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -197,6 +197,8 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) mem_cgroup_handle_over_high(); blkcg_maybe_throttle_current(); + + rseq_handle_notify_resume(NULL, regs); } /* diff --git a/kernel/entry/common.c b/kernel/entry/common.c index bf16395b9e13..d5a61d565ad5 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -171,10 +171,8 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) handle_signal_work(regs, ti_work); - if (ti_work & _TIF_NOTIFY_RESUME) { + if (ti_work & _TIF_NOTIFY_RESUME) tracehook_notify_resume(regs); - rseq_handle_notify_resume(NULL, regs); - } /* Architecture specific TIF work */ arch_exit_to_user_mode_work(regs, ti_work); diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c index 049fd06b4c3d..49972ee99aff 100644 --- a/kernel/entry/kvm.c +++ b/kernel/entry/kvm.c @@ -19,10 +19,8 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) if (ti_work & _TIF_NEED_RESCHED) schedule(); - if (ti_work & _TIF_NOTIFY_RESUME) { + if (ti_work & _TIF_NOTIFY_RESUME) tracehook_notify_resume(NULL); - rseq_handle_notify_resume(NULL, NULL); - } ret = arch_xfer_to_guest_mode_handle_work(vcpu, ti_work); if (ret) -- cgit v1.2.3-71-gd317 From 4eeef2424153e79910d65248b5e1abf137d050e9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Sep 2021 11:32:19 -0700 Subject: KVM: x86: Query vcpu->vcpu_idx directly and drop its accessor Read vcpu->vcpu_idx directly instead of bouncing through the one-line wrapper, kvm_vcpu_get_idx(), and drop the wrapper. The wrapper is a remnant of the original implementation and serves no purpose; remove it before it gains more users. Back when kvm_vcpu_get_idx() was added by commit 497d72d80a78 ("KVM: Add kvm_vcpu_get_idx to get vcpu index in kvm->vcpus"), the implementation was more than just a simple wrapper as vcpu->vcpu_idx did not exist and retrieving the index meant walking over the vCPU array to find the given vCPU. When vcpu_idx was introduced by commit 8750e72a79dd ("KVM: remember position in kvm->vcpus array"), the helper was left behind, likely to avoid extra thrash (but even then there were only two users, the original arm usage having been removed at some point in the past). No functional change intended. Suggested-by: Vitaly Kuznetsov Signed-off-by: Sean Christopherson Reviewed-by: Maxim Levitsky Reviewed-by: Vitaly Kuznetsov Message-Id: <20210910183220.2397812-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/s390/kvm/interrupt.c | 4 ++-- arch/s390/kvm/kvm-s390.c | 2 +- arch/s390/kvm/kvm-s390.h | 2 +- arch/x86/kvm/hyperv.c | 7 +++---- arch/x86/kvm/hyperv.h | 2 +- include/linux/kvm_host.h | 5 ----- 6 files changed, 8 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 16256e17a544..10722455fd02 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -419,13 +419,13 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu) static void __set_cpu_idle(struct kvm_vcpu *vcpu) { kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT); - set_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask); + set_bit(vcpu->vcpu_idx, vcpu->kvm->arch.idle_mask); } static void __unset_cpu_idle(struct kvm_vcpu *vcpu) { kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT); - clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask); + clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.idle_mask); } static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 752a0ffab9bf..6a6dd5e1daf6 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4066,7 +4066,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) kvm_s390_patch_guest_per_regs(vcpu); } - clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask); + clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); vcpu->arch.sie_block->icptcode = 0; cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index ecd741ee3276..52bc8fbaa60a 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -79,7 +79,7 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu) static inline int is_vcpu_idle(struct kvm_vcpu *vcpu) { - return test_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask); + return test_bit(vcpu->vcpu_idx, vcpu->kvm->arch.idle_mask); } static inline int kvm_is_ucontrol(struct kvm *kvm) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 232a86a6faaf..d5124b520f76 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -939,7 +939,7 @@ static int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) stimer_init(&hv_vcpu->stimer[i], i); - hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu); + hv_vcpu->vp_index = vcpu->vcpu_idx; return 0; } @@ -1444,7 +1444,6 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) switch (msr) { case HV_X64_MSR_VP_INDEX: { struct kvm_hv *hv = to_kvm_hv(vcpu->kvm); - int vcpu_idx = kvm_vcpu_get_idx(vcpu); u32 new_vp_index = (u32)data; if (!host || new_vp_index >= KVM_MAX_VCPUS) @@ -1459,9 +1458,9 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) * VP index is changing, adjust num_mismatched_vp_indexes if * it now matches or no longer matches vcpu_idx. */ - if (hv_vcpu->vp_index == vcpu_idx) + if (hv_vcpu->vp_index == vcpu->vcpu_idx) atomic_inc(&hv->num_mismatched_vp_indexes); - else if (new_vp_index == vcpu_idx) + else if (new_vp_index == vcpu->vcpu_idx) atomic_dec(&hv->num_mismatched_vp_indexes); hv_vcpu->vp_index = new_vp_index; diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 730da8537d05..ed1c4e546d04 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -83,7 +83,7 @@ static inline u32 kvm_hv_get_vpindex(struct kvm_vcpu *vcpu) { struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); - return hv_vcpu ? hv_vcpu->vp_index : kvm_vcpu_get_idx(vcpu); + return hv_vcpu ? hv_vcpu->vp_index : vcpu->vcpu_idx; } int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 041ca7f15ea4..000ea73dd324 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -721,11 +721,6 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) return NULL; } -static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu) -{ - return vcpu->vcpu_idx; -} - #define kvm_for_each_memslot(memslot, slots) \ for (memslot = &slots->memslots[0]; \ memslot < slots->memslots + slots->used_slots; memslot++) \ -- cgit v1.2.3-71-gd317 From 09d23174402da0f10e98da2c61bb5ac8e7d79fdd Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Mon, 20 Sep 2021 19:18:50 +0200 Subject: ALSA: rawmidi: introduce SNDRV_RAWMIDI_IOCTL_USER_PVERSION The new framing mode causes the user space regression, because the alsa-lib code does not initialize the reserved space in the params structure when the device is opened. This change adds SNDRV_RAWMIDI_IOCTL_USER_PVERSION like we do for the PCM interface for the protocol acknowledgment. Cc: David Henningsson Cc: Fixes: 08fdced60ca0 ("ALSA: rawmidi: Add framing mode") BugLink: https://github.com/alsa-project/alsa-lib/issues/178 Signed-off-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20210920171850.154186-1-perex@perex.cz Signed-off-by: Takashi Iwai --- include/sound/rawmidi.h | 1 + include/uapi/sound/asound.h | 1 + sound/core/rawmidi.c | 9 +++++++++ 3 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h index 989e1517332d..7a08ed2acd60 100644 --- a/include/sound/rawmidi.h +++ b/include/sound/rawmidi.h @@ -98,6 +98,7 @@ struct snd_rawmidi_file { struct snd_rawmidi *rmidi; struct snd_rawmidi_substream *input; struct snd_rawmidi_substream *output; + unsigned int user_pversion; /* supported protocol version */ }; struct snd_rawmidi_str { diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index 1d84ec9db93b..5859ca0a1439 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -784,6 +784,7 @@ struct snd_rawmidi_status { #define SNDRV_RAWMIDI_IOCTL_PVERSION _IOR('W', 0x00, int) #define SNDRV_RAWMIDI_IOCTL_INFO _IOR('W', 0x01, struct snd_rawmidi_info) +#define SNDRV_RAWMIDI_IOCTL_USER_PVERSION _IOW('W', 0x02, int) #define SNDRV_RAWMIDI_IOCTL_PARAMS _IOWR('W', 0x10, struct snd_rawmidi_params) #define SNDRV_RAWMIDI_IOCTL_STATUS _IOWR('W', 0x20, struct snd_rawmidi_status) #define SNDRV_RAWMIDI_IOCTL_DROP _IOW('W', 0x30, int) diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index 6c0a4a67ad2e..6f30231bdb88 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -873,12 +873,21 @@ static long snd_rawmidi_ioctl(struct file *file, unsigned int cmd, unsigned long return -EINVAL; } } + case SNDRV_RAWMIDI_IOCTL_USER_PVERSION: + if (get_user(rfile->user_pversion, (unsigned int __user *)arg)) + return -EFAULT; + return 0; + case SNDRV_RAWMIDI_IOCTL_PARAMS: { struct snd_rawmidi_params params; if (copy_from_user(¶ms, argp, sizeof(struct snd_rawmidi_params))) return -EFAULT; + if (rfile->user_pversion < SNDRV_PROTOCOL_VERSION(2, 0, 2)) { + params.mode = 0; + memset(params.reserved, 0, sizeof(params.reserved)); + } switch (params.stream) { case SNDRV_RAWMIDI_STREAM_OUTPUT: if (rfile->output == NULL) -- cgit v1.2.3-71-gd317 From 6bc6db000295332bae2c1e8815d7450b72923d23 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Sat, 18 Sep 2021 08:56:29 +0800 Subject: KVM: Remove tlbs_dirty There is no user of tlbs_dirty. Signed-off-by: Lai Jiangshan Signed-off-by: Paolo Bonzini Message-Id: <20210918005636.3675-4-jiangshanlai@gmail.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 - virt/kvm/kvm_main.c | 10 ++-------- 2 files changed, 2 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 000ea73dd324..0f18df7fe874 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -608,7 +608,6 @@ struct kvm { unsigned long mmu_notifier_range_start; unsigned long mmu_notifier_range_end; #endif - long tlbs_dirty; struct list_head devices; u64 manual_dirty_log_protect; struct dentry *debugfs_dentry; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e95e7a9e4d53..7851f3a1b5f7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -326,13 +326,8 @@ EXPORT_SYMBOL_GPL(kvm_make_all_cpus_request); #ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL void kvm_flush_remote_tlbs(struct kvm *kvm) { - /* - * Read tlbs_dirty before setting KVM_REQ_TLB_FLUSH in - * kvm_make_all_cpus_request. - */ - long dirty_count = smp_load_acquire(&kvm->tlbs_dirty); - ++kvm->stat.generic.remote_tlb_flush_requests; + /* * We want to publish modifications to the page tables before reading * mode. Pairs with a memory barrier in arch-specific code. @@ -347,7 +342,6 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) if (!kvm_arch_flush_remote_tlb(kvm) || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) ++kvm->stat.generic.remote_tlb_flush; - cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); } EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); #endif @@ -552,7 +546,7 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm, } } - if (range->flush_on_ret && (ret || kvm->tlbs_dirty)) + if (range->flush_on_ret && ret) kvm_flush_remote_tlbs(kvm); if (locked) -- cgit v1.2.3-71-gd317 From 93368aab0efc87288cac65e99c9ed2e0ffc9e7d0 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 21 Sep 2021 22:35:30 +0800 Subject: erofs: fix up erofs_lookup tracepoint Fix up a misuse that the filename pointer isn't always valid in the ring buffer, and we should copy the content instead. Link: https://lore.kernel.org/r/20210921143531.81356-1-hsiangkao@linux.alibaba.com Fixes: 13f06f48f7bf ("staging: erofs: support tracepoint") Cc: stable@vger.kernel.org # 4.19+ Reviewed-by: Chao Yu Signed-off-by: Gao Xiang --- include/trace/events/erofs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h index bf9806fd1306..db4f2cec8360 100644 --- a/include/trace/events/erofs.h +++ b/include/trace/events/erofs.h @@ -35,20 +35,20 @@ TRACE_EVENT(erofs_lookup, TP_STRUCT__entry( __field(dev_t, dev ) __field(erofs_nid_t, nid ) - __field(const char *, name ) + __string(name, dentry->d_name.name ) __field(unsigned int, flags ) ), TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->nid = EROFS_I(dir)->nid; - __entry->name = dentry->d_name.name; + __assign_str(name, dentry->d_name.name); __entry->flags = flags; ), TP_printk("dev = (%d,%d), pnid = %llu, name:%s, flags:%x", show_dev_nid(__entry), - __entry->name, + __get_str(name), __entry->flags) ); -- cgit v1.2.3-71-gd317 From 5501765a02a6c324f78581e6bb8209d054fe13ae Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Wed, 15 Sep 2021 10:09:38 -0700 Subject: driver core: fw_devlink: Add support for FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD If a parent device is also a supplier to a child device, fw_devlink=on by design delays the probe() of the child device until the probe() of the parent finishes successfully. However, some drivers of such parent devices (where parent is also a supplier) expect the child device to finish probing successfully as soon as they are added using device_add() and before the probe() of the parent device has completed successfully. One example of such a case is discussed in the link mentioned below. Add a flag to make fw_devlink=on not enforce these supplier-consumer relationships, so these drivers can continue working. Link: https://lore.kernel.org/netdev/CAGETcx_uj0V4DChME-gy5HGKTYnxLBX=TH2rag29f_p=UcG+Tg@mail.gmail.com/ Fixes: ea718c699055 ("Revert "Revert "driver core: Set fw_devlink=on by default""") Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20210915170940.617415-3-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 19 +++++++++++++++++++ include/linux/fwnode.h | 11 ++++++++--- 2 files changed, 27 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/base/core.c b/drivers/base/core.c index 316df6027093..21d4cb5d3767 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1722,6 +1722,25 @@ static int fw_devlink_create_devlink(struct device *con, struct device *sup_dev; int ret = 0; + /* + * In some cases, a device P might also be a supplier to its child node + * C. However, this would defer the probe of C until the probe of P + * completes successfully. This is perfectly fine in the device driver + * model. device_add() doesn't guarantee probe completion of the device + * by the time it returns. + * + * However, there are a few drivers that assume C will finish probing + * as soon as it's added and before P finishes probing. So, we provide + * a flag to let fw_devlink know not to delay the probe of C until the + * probe of P completes successfully. + * + * When such a flag is set, we can't create device links where P is the + * supplier of C as that would delay the probe of C. + */ + if (sup_handle->flags & FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD && + fwnode_is_ancestor_of(sup_handle, con->fwnode)) + return -EINVAL; + sup_dev = get_dev_from_fwnode(sup_handle); if (sup_dev) { /* diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 59828516ebaf..9f4ad719bfe3 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -22,10 +22,15 @@ struct device; * LINKS_ADDED: The fwnode has already be parsed to add fwnode links. * NOT_DEVICE: The fwnode will never be populated as a struct device. * INITIALIZED: The hardware corresponding to fwnode has been initialized. + * NEEDS_CHILD_BOUND_ON_ADD: For this fwnode/device to probe successfully, its + * driver needs its child devices to be bound with + * their respective drivers as soon as they are + * added. */ -#define FWNODE_FLAG_LINKS_ADDED BIT(0) -#define FWNODE_FLAG_NOT_DEVICE BIT(1) -#define FWNODE_FLAG_INITIALIZED BIT(2) +#define FWNODE_FLAG_LINKS_ADDED BIT(0) +#define FWNODE_FLAG_NOT_DEVICE BIT(1) +#define FWNODE_FLAG_INITIALIZED BIT(2) +#define FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD BIT(3) struct fwnode_handle { struct fwnode_handle *secondary; -- cgit v1.2.3-71-gd317 From 12064c1768439fa0882547010afae6b52aafa7af Mon Sep 17 00:00:00 2001 From: Jia He Date: Thu, 23 Sep 2021 11:35:57 +0800 Subject: Revert "ACPI: Add memory semantics to acpi_os_map_memory()" This reverts commit 437b38c51162f8b87beb28a833c4d5dc85fa864e. The memory semantics added in commit 437b38c51162 causes SystemMemory Operation region, whose address range is not described in the EFI memory map to be mapped as NormalNC memory on arm64 platforms (through acpi_os_map_memory() in acpi_ex_system_memory_space_handler()). This triggers the following abort on an ARM64 Ampere eMAG machine, because presumably the physical address range area backing the Opregion does not support NormalNC memory attributes driven on the bus. Internal error: synchronous external abort: 96000410 [#1] SMP Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0+ #462 Hardware name: MiTAC RAPTOR EV-883832-X3-0001/RAPTOR, BIOS 0.14 02/22/2019 pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [...snip...] Call trace: acpi_ex_system_memory_space_handler+0x26c/0x2c8 acpi_ev_address_space_dispatch+0x228/0x2c4 acpi_ex_access_region+0x114/0x268 acpi_ex_field_datum_io+0x128/0x1b8 acpi_ex_extract_from_field+0x14c/0x2ac acpi_ex_read_data_from_field+0x190/0x1b8 acpi_ex_resolve_node_to_value+0x1ec/0x288 acpi_ex_resolve_to_value+0x250/0x274 acpi_ds_evaluate_name_path+0xac/0x124 acpi_ds_exec_end_op+0x90/0x410 acpi_ps_parse_loop+0x4ac/0x5d8 acpi_ps_parse_aml+0xe0/0x2c8 acpi_ps_execute_method+0x19c/0x1ac acpi_ns_evaluate+0x1f8/0x26c acpi_ns_init_one_device+0x104/0x140 acpi_ns_walk_namespace+0x158/0x1d0 acpi_ns_initialize_devices+0x194/0x218 acpi_initialize_objects+0x48/0x50 acpi_init+0xe0/0x498 If the Opregion address range is not present in the EFI memory map there is no way for us to determine the memory attributes to use to map it - defaulting to NormalNC does not work (and it is not correct on a memory region that may have read side-effects) and therefore commit 437b38c51162 should be reverted, which means reverting back to the original behavior whereby address ranges that are mapped using acpi_os_map_memory() default to the safe devicenGnRnE attributes on ARM64 if the mapped address range is not defined in the EFI memory map. Fixes: 437b38c51162 ("ACPI: Add memory semantics to acpi_os_map_memory()") Signed-off-by: Jia He Acked-by: Lorenzo Pieralisi Acked-by: Catalin Marinas Signed-off-by: Rafael J. Wysocki --- arch/arm64/include/asm/acpi.h | 3 --- arch/arm64/kernel/acpi.c | 19 +++---------------- drivers/acpi/osl.c | 23 +++++++---------------- include/acpi/acpi_io.h | 8 -------- 4 files changed, 10 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 7535dc7cc5aa..bd68e1b7f29f 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -50,9 +50,6 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr); void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); #define acpi_os_ioremap acpi_os_ioremap -void __iomem *acpi_os_memmap(acpi_physical_address phys, acpi_size size); -#define acpi_os_memmap acpi_os_memmap - typedef u64 phys_cpuid_t; #define PHYS_CPUID_INVALID INVALID_HWID diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 1c9c2f7a1c04..f3851724fe35 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -273,8 +273,7 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr) return __pgprot(PROT_DEVICE_nGnRnE); } -static void __iomem *__acpi_os_ioremap(acpi_physical_address phys, - acpi_size size, bool memory) +void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) { efi_memory_desc_t *md, *region = NULL; pgprot_t prot; @@ -300,11 +299,9 @@ static void __iomem *__acpi_os_ioremap(acpi_physical_address phys, * It is fine for AML to remap regions that are not represented in the * EFI memory map at all, as it only describes normal memory, and MMIO * regions that require a virtual mapping to make them accessible to - * the EFI runtime services. Determine the region default - * attributes by checking the requested memory semantics. + * the EFI runtime services. */ - prot = memory ? __pgprot(PROT_NORMAL_NC) : - __pgprot(PROT_DEVICE_nGnRnE); + prot = __pgprot(PROT_DEVICE_nGnRnE); if (region) { switch (region->type) { case EFI_LOADER_CODE: @@ -364,16 +361,6 @@ static void __iomem *__acpi_os_ioremap(acpi_physical_address phys, return __ioremap(phys, size, prot); } -void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) -{ - return __acpi_os_ioremap(phys, size, false); -} - -void __iomem *acpi_os_memmap(acpi_physical_address phys, acpi_size size) -{ - return __acpi_os_ioremap(phys, size, true); -} - /* * Claim Synchronous External Aborts as a firmware first notification. * diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index a43f1521efe6..45c5c0e45e33 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -284,8 +284,7 @@ acpi_map_lookup_virt(void __iomem *virt, acpi_size size) #define should_use_kmap(pfn) page_is_ram(pfn) #endif -static void __iomem *acpi_map(acpi_physical_address pg_off, unsigned long pg_sz, - bool memory) +static void __iomem *acpi_map(acpi_physical_address pg_off, unsigned long pg_sz) { unsigned long pfn; @@ -295,8 +294,7 @@ static void __iomem *acpi_map(acpi_physical_address pg_off, unsigned long pg_sz, return NULL; return (void __iomem __force *)kmap(pfn_to_page(pfn)); } else - return memory ? acpi_os_memmap(pg_off, pg_sz) : - acpi_os_ioremap(pg_off, pg_sz); + return acpi_os_ioremap(pg_off, pg_sz); } static void acpi_unmap(acpi_physical_address pg_off, void __iomem *vaddr) @@ -311,10 +309,9 @@ static void acpi_unmap(acpi_physical_address pg_off, void __iomem *vaddr) } /** - * __acpi_os_map_iomem - Get a virtual address for a given physical address range. + * acpi_os_map_iomem - Get a virtual address for a given physical address range. * @phys: Start of the physical address range to map. * @size: Size of the physical address range to map. - * @memory: true if remapping memory, false if IO * * Look up the given physical address range in the list of existing ACPI memory * mappings. If found, get a reference to it and return a pointer to it (its @@ -324,8 +321,8 @@ static void acpi_unmap(acpi_physical_address pg_off, void __iomem *vaddr) * During early init (when acpi_permanent_mmap has not been set yet) this * routine simply calls __acpi_map_table() to get the job done. */ -static void __iomem __ref -*__acpi_os_map_iomem(acpi_physical_address phys, acpi_size size, bool memory) +void __iomem __ref +*acpi_os_map_iomem(acpi_physical_address phys, acpi_size size) { struct acpi_ioremap *map; void __iomem *virt; @@ -356,7 +353,7 @@ static void __iomem __ref pg_off = round_down(phys, PAGE_SIZE); pg_sz = round_up(phys + size, PAGE_SIZE) - pg_off; - virt = acpi_map(phys, size, memory); + virt = acpi_map(phys, size); if (!virt) { mutex_unlock(&acpi_ioremap_lock); kfree(map); @@ -375,17 +372,11 @@ out: mutex_unlock(&acpi_ioremap_lock); return map->virt + (phys - map->phys); } - -void __iomem *__ref -acpi_os_map_iomem(acpi_physical_address phys, acpi_size size) -{ - return __acpi_os_map_iomem(phys, size, false); -} EXPORT_SYMBOL_GPL(acpi_os_map_iomem); void *__ref acpi_os_map_memory(acpi_physical_address phys, acpi_size size) { - return (void *)__acpi_os_map_iomem(phys, size, true); + return (void *)acpi_os_map_iomem(phys, size); } EXPORT_SYMBOL_GPL(acpi_os_map_memory); diff --git a/include/acpi/acpi_io.h b/include/acpi/acpi_io.h index a0212e67d6f4..027faa8883aa 100644 --- a/include/acpi/acpi_io.h +++ b/include/acpi/acpi_io.h @@ -14,14 +14,6 @@ static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys, } #endif -#ifndef acpi_os_memmap -static inline void __iomem *acpi_os_memmap(acpi_physical_address phys, - acpi_size size) -{ - return ioremap_cache(phys, size); -} -#endif - extern bool acpi_permanent_mmap; void __iomem __ref -- cgit v1.2.3-71-gd317 From 597aa16c782496bf74c5dc3b45ff472ade6cee64 Mon Sep 17 00:00:00 2001 From: Xiao Liang Date: Thu, 23 Sep 2021 23:03:19 +0800 Subject: net: ipv4: Fix rtnexthop len when RTA_FLOW is present Multipath RTA_FLOW is embedded in nexthop. Dump it in fib_add_nexthop() to get the length of rtnexthop correct. Fixes: b0f60193632e ("ipv4: Refactor nexthop attributes in fib_dump_info") Signed-off-by: Xiao Liang Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 +- include/net/nexthop.h | 2 +- net/ipv4/fib_semantics.c | 16 +++++++++------- net/ipv6/route.c | 5 +++-- 4 files changed, 14 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 21c5386d4a6d..ab5348e57db1 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -597,5 +597,5 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nh, u8 rt_family, unsigned char *flags, bool skip_oif); int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nh, - int nh_weight, u8 rt_family); + int nh_weight, u8 rt_family, u32 nh_tclassid); #endif /* _NET_FIB_H */ diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 10e1777877e6..28085b995ddc 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -325,7 +325,7 @@ int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh, struct fib_nh_common *nhc = &nhi->fib_nhc; int weight = nhg->nh_entries[i].weight; - if (fib_add_nexthop(skb, nhc, weight, rt_family) < 0) + if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0) return -EMSGSIZE; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index b42c429cebbe..3364cb9c67e0 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1661,7 +1661,7 @@ EXPORT_SYMBOL_GPL(fib_nexthop_info); #if IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) || IS_ENABLED(CONFIG_IPV6) int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc, - int nh_weight, u8 rt_family) + int nh_weight, u8 rt_family, u32 nh_tclassid) { const struct net_device *dev = nhc->nhc_dev; struct rtnexthop *rtnh; @@ -1679,6 +1679,9 @@ int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc, rtnh->rtnh_flags = flags; + if (nh_tclassid && nla_put_u32(skb, RTA_FLOW, nh_tclassid)) + goto nla_put_failure; + /* length of rtnetlink header + attributes */ rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; @@ -1706,14 +1709,13 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) } for_nexthops(fi) { - if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight, - AF_INET) < 0) - goto nla_put_failure; + u32 nh_tclassid = 0; #ifdef CONFIG_IP_ROUTE_CLASSID - if (nh->nh_tclassid && - nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) - goto nla_put_failure; + nh_tclassid = nh->nh_tclassid; #endif + if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight, + AF_INET, nh_tclassid) < 0) + goto nla_put_failure; } endfor_nexthops(fi); mp_end: diff --git a/net/ipv6/route.c b/net/ipv6/route.c index dbc224023977..9b9ef09382ab 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5681,14 +5681,15 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, goto nla_put_failure; if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common, - rt->fib6_nh->fib_nh_weight, AF_INET6) < 0) + rt->fib6_nh->fib_nh_weight, AF_INET6, + 0) < 0) goto nla_put_failure; list_for_each_entry_safe(sibling, next_sibling, &rt->fib6_siblings, fib6_siblings) { if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common, sibling->fib6_nh->fib_nh_weight, - AF_INET6) < 0) + AF_INET6, 0) < 0) goto nla_put_failure; } -- cgit v1.2.3-71-gd317 From 243418e3925d5b5b0657ae54c322d43035e97eed Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Fri, 24 Sep 2021 15:43:47 -0700 Subject: mm: fs: invalidate bh_lrus for only cold path The kernel test robot reported the regression of fio.write_iops[1] with commit 8cc621d2f45d ("mm: fs: invalidate BH LRU during page migration"). Since lru_add_drain is called frequently, invalidate bh_lrus there could increase bh_lrus cache miss ratio, which needs more IO in the end. This patch moves the bh_lrus invalidation from the hot path( e.g., zap_page_range, pagevec_release) to cold path(i.e., lru_add_drain_all, lru_cache_disable). Zhengjun Xing confirmed "I test the patch, the regression reduced to -2.9%" [1] https://lore.kernel.org/lkml/20210520083144.GD14190@xsang-OptiPlex-9020/ [2] 8cc621d2f45d, mm: fs: invalidate BH LRU during page migration Link: https://lkml.kernel.org/r/20210907212347.1977686-1-minchan@kernel.org Signed-off-by: Minchan Kim Reported-by: kernel test robot Reviewed-by: Chris Goldsworthy Tested-by: "Xing, Zhengjun" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 8 ++++++-- include/linux/buffer_head.h | 4 ++-- mm/swap.c | 19 ++++++++++++++++--- 3 files changed, 24 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/buffer.c b/fs/buffer.c index ab7573d72dd7..c615387aedca 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1425,12 +1425,16 @@ void invalidate_bh_lrus(void) } EXPORT_SYMBOL_GPL(invalidate_bh_lrus); -void invalidate_bh_lrus_cpu(int cpu) +/* + * It's called from workqueue context so we need a bh_lru_lock to close + * the race with preemption/irq. + */ +void invalidate_bh_lrus_cpu(void) { struct bh_lru *b; bh_lru_lock(); - b = per_cpu_ptr(&bh_lrus, cpu); + b = this_cpu_ptr(&bh_lrus); __invalidate_bh_lrus(b); bh_lru_unlock(); } diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 6486d3c19463..36f33685c8c0 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -194,7 +194,7 @@ void __breadahead_gfp(struct block_device *, sector_t block, unsigned int size, struct buffer_head *__bread_gfp(struct block_device *, sector_t block, unsigned size, gfp_t gfp); void invalidate_bh_lrus(void); -void invalidate_bh_lrus_cpu(int cpu); +void invalidate_bh_lrus_cpu(void); bool has_bh_in_lru(int cpu, void *dummy); struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); @@ -408,7 +408,7 @@ static inline int inode_has_buffers(struct inode *inode) { return 0; } static inline void invalidate_inode_buffers(struct inode *inode) {} static inline int remove_inode_buffers(struct inode *inode) { return 1; } static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; } -static inline void invalidate_bh_lrus_cpu(int cpu) {} +static inline void invalidate_bh_lrus_cpu(void) {} static inline bool has_bh_in_lru(int cpu, void *dummy) { return false; } #define buffer_heads_over_limit 0 diff --git a/mm/swap.c b/mm/swap.c index 897200d27dd0..af3cad4e5378 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -620,7 +620,6 @@ void lru_add_drain_cpu(int cpu) pagevec_lru_move_fn(pvec, lru_lazyfree_fn); activate_page_drain(cpu); - invalidate_bh_lrus_cpu(cpu); } /** @@ -703,6 +702,20 @@ void lru_add_drain(void) local_unlock(&lru_pvecs.lock); } +/* + * It's called from per-cpu workqueue context in SMP case so + * lru_add_drain_cpu and invalidate_bh_lrus_cpu should run on + * the same cpu. It shouldn't be a problem in !SMP case since + * the core is only one and the locks will disable preemption. + */ +static void lru_add_and_bh_lrus_drain(void) +{ + local_lock(&lru_pvecs.lock); + lru_add_drain_cpu(smp_processor_id()); + local_unlock(&lru_pvecs.lock); + invalidate_bh_lrus_cpu(); +} + void lru_add_drain_cpu_zone(struct zone *zone) { local_lock(&lru_pvecs.lock); @@ -717,7 +730,7 @@ static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); static void lru_add_drain_per_cpu(struct work_struct *dummy) { - lru_add_drain(); + lru_add_and_bh_lrus_drain(); } /* @@ -858,7 +871,7 @@ void lru_cache_disable(void) */ __lru_add_drain_all(true); #else - lru_add_drain(); + lru_add_and_bh_lrus_drain(); #endif } -- cgit v1.2.3-71-gd317 From 57ed7b4303a1c4d1885019fef03e6a5af2e8468a Mon Sep 17 00:00:00 2001 From: Weizhao Ouyang Date: Fri, 24 Sep 2021 15:43:53 -0700 Subject: mm/debug: sync up latest migrate_reason to migrate_reason_names Sync up MR_DEMOTION to migrate_reason_names and add a synch prompt. Link: https://lkml.kernel.org/r/20210921064553.293905-3-o451686892@gmail.com Fixes: 26aa2d199d6f ("mm/migrate: demote pages during reclaim") Signed-off-by: Weizhao Ouyang Reviewed-by: "Huang, Ying" Reviewed-by: John Hubbard Cc: Anshuman Khandual Cc: Michal Hocko Cc: Pavel Tatashin Cc: Yang Shi Cc: Zi Yan Cc: Dave Hansen Cc: Minchan Kim Cc: Mina Almasry Cc: "Matthew Wilcox (Oracle)" Cc: Oscar Salvador Cc: Wei Xu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 6 +++++- mm/debug.c | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 326250996b4e..c8077e936691 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -19,6 +19,11 @@ struct migration_target_control; */ #define MIGRATEPAGE_SUCCESS 0 +/* + * Keep sync with: + * - macro MIGRATE_REASON in include/trace/events/migrate.h + * - migrate_reason_names[MR_TYPES] in mm/debug.c + */ enum migrate_reason { MR_COMPACTION, MR_MEMORY_FAILURE, @@ -32,7 +37,6 @@ enum migrate_reason { MR_TYPES }; -/* In mm/debug.c; also keep sync with include/trace/events/migrate.h */ extern const char *migrate_reason_names[MR_TYPES]; #ifdef CONFIG_MIGRATION diff --git a/mm/debug.c b/mm/debug.c index e61037cded98..fae0f81ad831 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -26,6 +26,7 @@ const char *migrate_reason_names[MR_TYPES] = { "numa_misplaced", "contig_range", "longterm_pin", + "demotion", }; const struct trace_print_flags pageflag_names[] = { -- cgit v1.2.3-71-gd317 From b193e15ac69d56f35e1d8e2b5d16cbd47764d053 Mon Sep 17 00:00:00 2001 From: 王贇 Date: Fri, 24 Sep 2021 10:35:58 +0800 Subject: net: prevent user from passing illegal stab size We observed below report when playing with netlink sock: UBSAN: shift-out-of-bounds in net/sched/sch_api.c:580:10 shift exponent 249 is too large for 32-bit type CPU: 0 PID: 685 Comm: a.out Not tainted Call Trace: dump_stack_lvl+0x8d/0xcf ubsan_epilogue+0xa/0x4e __ubsan_handle_shift_out_of_bounds+0x161/0x182 __qdisc_calculate_pkt_len+0xf0/0x190 __dev_queue_xmit+0x2ed/0x15b0 it seems like kernel won't check the stab log value passing from user, and will use the insane value later to calculate pkt_len. This patch just add a check on the size/cell_log to avoid insane calculation. Reported-by: Abaci Signed-off-by: Michael Wang Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 1 + net/sched/sch_api.c | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 6d7b12cba015..bf79f3a890af 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -11,6 +11,7 @@ #include #define DEFAULT_TX_QUEUE_LEN 1000 +#define STAB_SIZE_LOG_MAX 30 struct qdisc_walker { int stop; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 5e90e9b160e3..12f39a2dffd4 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -513,6 +513,12 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, return stab; } + if (s->size_log > STAB_SIZE_LOG_MAX || + s->cell_log > STAB_SIZE_LOG_MAX) { + NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table"); + return ERR_PTR(-EINVAL); + } + stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); if (!stab) return ERR_PTR(-ENOMEM); -- cgit v1.2.3-71-gd317 From 33092aca857bf35a8e9cac0e8340c685a4796e90 Mon Sep 17 00:00:00 2001 From: Alexander Wetzel Date: Fri, 24 Sep 2021 22:05:14 +0200 Subject: mac80211: Fix Ptk0 rekey documentation @IEEE80211_KEY_FLAG_GENERATE_IV setting is irrelevant for RX. Move the requirement to the correct section in the PTK0 rekey documentation. Signed-off-by: Alexander Wetzel Link: https://lore.kernel.org/r/20210924200514.7936-1-alexander@wetzel-home.de Signed-off-by: Johannes Berg --- include/net/mac80211.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index af0fc13cea34..618d1f427cb2 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2818,13 +2818,13 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * Mac80211 drivers should set the @NL80211_EXT_FEATURE_CAN_REPLACE_PTK0 flag * when they are able to replace in-use PTK keys according to the following * requirements: - * 1) They do not hand over frames decrypted with the old key to - mac80211 once the call to set_key() with command %DISABLE_KEY has been - completed when also setting @IEEE80211_KEY_FLAG_GENERATE_IV for any key, + * 1) They do not hand over frames decrypted with the old key to mac80211 + once the call to set_key() with command %DISABLE_KEY has been completed, 2) either drop or continue to use the old key for any outgoing frames queued at the time of the key deletion (including re-transmits), 3) never send out a frame queued prior to the set_key() %SET_KEY command - encrypted with the new key and + encrypted with the new key when also needing + @IEEE80211_KEY_FLAG_GENERATE_IV and 4) never send out a frame unencrypted when it should be encrypted. Mac80211 will not queue any new frames for a deleted key to the driver. */ -- cgit v1.2.3-71-gd317 From 339031bafe6b281cf2dcb8364217288b9fdab555 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 23 Sep 2021 16:44:34 +0200 Subject: netfilter: conntrack: fix boot failure with nf_conntrack.enable_hooks=1 This is a revert of 7b1957b049 ("netfilter: nf_defrag_ipv4: use net_generic infra") and a partial revert of 8b0adbe3e3 ("netfilter: nf_defrag_ipv6: use net_generic infra"). If conntrack is builtin and kernel is booted with: nf_conntrack.enable_hooks=1 .... kernel will fail to boot due to a NULL deref in nf_defrag_ipv4_enable(): Its called before the ipv4 defrag initcall is made, so net_generic() returns NULL. To resolve this, move the user refcount back to struct net so calls to those functions are possible even before their initcalls have run. Fixes: 7b1957b04956 ("netfilter: nf_defrag_ipv4: use net_generic infra") Fixes: 8b0adbe3e38d ("netfilter: nf_defrag_ipv6: use net_generic infra"). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv6/nf_defrag_ipv6.h | 1 - include/net/netns/netfilter.h | 6 ++++++ net/ipv4/netfilter/nf_defrag_ipv4.c | 30 +++++++++-------------------- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 25 +++++++++--------------- 5 files changed, 25 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h index 0fd8a4159662..ceadf8ba25a4 100644 --- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h +++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -17,7 +17,6 @@ struct inet_frags_ctl; struct nft_ct_frag6_pernet { struct ctl_table_header *nf_frag_frags_hdr; struct fqdir *fqdir; - unsigned int users; }; #endif /* _NF_DEFRAG_IPV6_H */ diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 986a2a9cfdfa..b593f95e9991 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -27,5 +27,11 @@ struct netns_nf { #if IS_ENABLED(CONFIG_DECNET) struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS]; #endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) + unsigned int defrag_ipv4_users; +#endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + unsigned int defrag_ipv6_users; +#endif }; #endif diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 613432a36f0a..e61ea428ea18 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -20,13 +20,8 @@ #endif #include -static unsigned int defrag4_pernet_id __read_mostly; static DEFINE_MUTEX(defrag4_mutex); -struct defrag4_pernet { - unsigned int users; -}; - static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb, u_int32_t user) { @@ -111,19 +106,15 @@ static const struct nf_hook_ops ipv4_defrag_ops[] = { static void __net_exit defrag4_net_exit(struct net *net) { - struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id); - - if (nf_defrag->users) { + if (net->nf.defrag_ipv4_users) { nf_unregister_net_hooks(net, ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); - nf_defrag->users = 0; + net->nf.defrag_ipv4_users = 0; } } static struct pernet_operations defrag4_net_ops = { .exit = defrag4_net_exit, - .id = &defrag4_pernet_id, - .size = sizeof(struct defrag4_pernet), }; static int __init nf_defrag_init(void) @@ -138,24 +129,23 @@ static void __exit nf_defrag_fini(void) int nf_defrag_ipv4_enable(struct net *net) { - struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id); int err = 0; mutex_lock(&defrag4_mutex); - if (nf_defrag->users == UINT_MAX) { + if (net->nf.defrag_ipv4_users == UINT_MAX) { err = -EOVERFLOW; goto out_unlock; } - if (nf_defrag->users) { - nf_defrag->users++; + if (net->nf.defrag_ipv4_users) { + net->nf.defrag_ipv4_users++; goto out_unlock; } err = nf_register_net_hooks(net, ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); if (err == 0) - nf_defrag->users = 1; + net->nf.defrag_ipv4_users = 1; out_unlock: mutex_unlock(&defrag4_mutex); @@ -165,12 +155,10 @@ EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable); void nf_defrag_ipv4_disable(struct net *net) { - struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id); - mutex_lock(&defrag4_mutex); - if (nf_defrag->users) { - nf_defrag->users--; - if (nf_defrag->users == 0) + if (net->nf.defrag_ipv4_users) { + net->nf.defrag_ipv4_users--; + if (net->nf.defrag_ipv4_users == 0) nf_unregister_net_hooks(net, ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); } diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index a0108415275f..5c47be29b9ee 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -33,7 +33,7 @@ static const char nf_frags_cache_name[] = "nf-frags"; -unsigned int nf_frag_pernet_id __read_mostly; +static unsigned int nf_frag_pernet_id __read_mostly; static struct inet_frags nf_frags; static struct nft_ct_frag6_pernet *nf_frag_pernet(struct net *net) diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index e8a59d8bf2ad..cb4eb1d2c620 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -25,8 +25,6 @@ #include #include -extern unsigned int nf_frag_pernet_id; - static DEFINE_MUTEX(defrag6_mutex); static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, @@ -91,12 +89,10 @@ static const struct nf_hook_ops ipv6_defrag_ops[] = { static void __net_exit defrag6_net_exit(struct net *net) { - struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id); - - if (nf_frag->users) { + if (net->nf.defrag_ipv6_users) { nf_unregister_net_hooks(net, ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); - nf_frag->users = 0; + net->nf.defrag_ipv6_users = 0; } } @@ -134,24 +130,23 @@ static void __exit nf_defrag_fini(void) int nf_defrag_ipv6_enable(struct net *net) { - struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id); int err = 0; mutex_lock(&defrag6_mutex); - if (nf_frag->users == UINT_MAX) { + if (net->nf.defrag_ipv6_users == UINT_MAX) { err = -EOVERFLOW; goto out_unlock; } - if (nf_frag->users) { - nf_frag->users++; + if (net->nf.defrag_ipv6_users) { + net->nf.defrag_ipv6_users++; goto out_unlock; } err = nf_register_net_hooks(net, ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); if (err == 0) - nf_frag->users = 1; + net->nf.defrag_ipv6_users = 1; out_unlock: mutex_unlock(&defrag6_mutex); @@ -161,12 +156,10 @@ EXPORT_SYMBOL_GPL(nf_defrag_ipv6_enable); void nf_defrag_ipv6_disable(struct net *net) { - struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id); - mutex_lock(&defrag6_mutex); - if (nf_frag->users) { - nf_frag->users--; - if (nf_frag->users == 0) + if (net->nf.defrag_ipv6_users) { + net->nf.defrag_ipv6_users--; + if (net->nf.defrag_ipv6_users == 0) nf_unregister_net_hooks(net, ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); } -- cgit v1.2.3-71-gd317 From 4ca57d5139a0be3cf76c1667a4c0afb16d4a3b02 Mon Sep 17 00:00:00 2001 From: Rajaravi Krishna Katta Date: Tue, 14 Sep 2021 18:28:48 +0300 Subject: habanalabs: fix resetting args in wait for CS IOCTL In wait for CS IOCTL code, the driver resets the incoming args structure before returning to the user, regardless of the return value of the IOCTL. In case the IOCTL returns EINTR, resetting the args will result in error in case the userspace will repeat the ioctl call immediately (which is the behavior in the hl-thunk userspace library). The solution is to reset the args only if the driver returns success (0) as a return value for the IOCTL. Signed-off-by: Rajaravi Krishna Katta Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../misc/habanalabs/common/command_submission.c | 33 +++++++++++++--------- include/uapi/misc/habanalabs.h | 6 ++-- 2 files changed, 21 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 91b57544f7c6..6dafff375f1c 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -2649,11 +2649,18 @@ put_ctx: free_seq_arr: kfree(cs_seq_arr); - /* update output args */ - memset(args, 0, sizeof(*args)); if (rc) return rc; + if (mcs_data.wait_status == -ERESTARTSYS) { + dev_err_ratelimited(hdev->dev, + "user process got signal while waiting for Multi-CS\n"); + return -EINTR; + } + + /* update output args */ + memset(args, 0, sizeof(*args)); + if (mcs_data.completion_bitmap) { args->out.status = HL_WAIT_CS_STATUS_COMPLETED; args->out.cs_completion_map = mcs_data.completion_bitmap; @@ -2667,8 +2674,6 @@ free_seq_arr: /* update if some CS was gone */ if (mcs_data.timestamp) args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE; - } else if (mcs_data.wait_status == -ERESTARTSYS) { - args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED; } else { args->out.status = HL_WAIT_CS_STATUS_BUSY; } @@ -2688,16 +2693,17 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq, &status, ×tamp); + if (rc == -ERESTARTSYS) { + dev_err_ratelimited(hdev->dev, + "user process got signal while waiting for CS handle %llu\n", + seq); + return -EINTR; + } + memset(args, 0, sizeof(*args)); if (rc) { - if (rc == -ERESTARTSYS) { - dev_err_ratelimited(hdev->dev, - "user process got signal while waiting for CS handle %llu\n", - seq); - args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED; - rc = -EINTR; - } else if (rc == -ETIMEDOUT) { + if (rc == -ETIMEDOUT) { dev_err_ratelimited(hdev->dev, "CS %llu has timed-out while user process is waiting for it\n", seq); @@ -2823,7 +2829,6 @@ wait_again: dev_err_ratelimited(hdev->dev, "user process got signal while waiting for interrupt ID %d\n", interrupt->interrupt_id); - *status = HL_WAIT_CS_STATUS_INTERRUPTED; rc = -EINTR; } else { *status = CS_WAIT_STATUS_BUSY; @@ -2878,8 +2883,6 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) args->in.interrupt_timeout_us, args->in.addr, args->in.target, interrupt_offset, &status); - memset(args, 0, sizeof(*args)); - if (rc) { if (rc != -EINTR) dev_err_ratelimited(hdev->dev, @@ -2888,6 +2891,8 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) return rc; } + memset(args, 0, sizeof(*args)); + switch (status) { case CS_WAIT_STATUS_COMPLETED: args->out.status = HL_WAIT_CS_STATUS_COMPLETED; diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 7cc2a0f3f2f5..d13bb8c1b450 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -917,7 +917,6 @@ struct hl_wait_cs_in { #define HL_WAIT_CS_STATUS_BUSY 1 #define HL_WAIT_CS_STATUS_TIMEDOUT 2 #define HL_WAIT_CS_STATUS_ABORTED 3 -#define HL_WAIT_CS_STATUS_INTERRUPTED 4 #define HL_WAIT_CS_STATUS_FLAG_GONE 0x1 #define HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD 0x2 @@ -1286,7 +1285,8 @@ struct hl_debug_args { * EIO - The CS was aborted (usually because the device was reset) * ENODEV - The device wants to do hard-reset (so user need to close FD) * - * The driver also returns a custom define inside the IOCTL which can be: + * The driver also returns a custom define in case the IOCTL call returned 0. + * The define can be one of the following: * * HL_WAIT_CS_STATUS_COMPLETED - The CS has been completed successfully (0) * HL_WAIT_CS_STATUS_BUSY - The CS is still executing (0) @@ -1294,8 +1294,6 @@ struct hl_debug_args { * (ETIMEDOUT) * HL_WAIT_CS_STATUS_ABORTED - The CS was aborted, usually because the * device was reset (EIO) - * HL_WAIT_CS_STATUS_INTERRUPTED - Waiting for the CS was interrupted (EINTR) - * */ #define HL_IOCTL_WAIT_CS \ -- cgit v1.2.3-71-gd317 From 49054556289e8787501630b7c7a9d407da02e296 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 29 Sep 2021 11:59:17 +0200 Subject: net: introduce and use lock_sock_fast_nested() Syzkaller reported a false positive deadlock involving the nl socket lock and the subflow socket lock: MPTCP: kernel_bind error, err=-98 ============================================ WARNING: possible recursive locking detected 5.15.0-rc1-syzkaller #0 Not tainted -------------------------------------------- syz-executor998/6520 is trying to acquire lock: ffff8880795718a0 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_close+0x267/0x7b0 net/mptcp/protocol.c:2738 but task is already holding lock: ffff8880787c8c60 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1612 [inline] ffff8880787c8c60 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_close+0x23/0x7b0 net/mptcp/protocol.c:2720 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(k-sk_lock-AF_INET); lock(k-sk_lock-AF_INET); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by syz-executor998/6520: #0: ffffffff8d176c50 (cb_lock){++++}-{3:3}, at: genl_rcv+0x15/0x40 net/netlink/genetlink.c:802 #1: ffffffff8d176d08 (genl_mutex){+.+.}-{3:3}, at: genl_lock net/netlink/genetlink.c:33 [inline] #1: ffffffff8d176d08 (genl_mutex){+.+.}-{3:3}, at: genl_rcv_msg+0x3e0/0x580 net/netlink/genetlink.c:790 #2: ffff8880787c8c60 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1612 [inline] #2: ffff8880787c8c60 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_close+0x23/0x7b0 net/mptcp/protocol.c:2720 stack backtrace: CPU: 1 PID: 6520 Comm: syz-executor998 Not tainted 5.15.0-rc1-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_deadlock_bug kernel/locking/lockdep.c:2944 [inline] check_deadlock kernel/locking/lockdep.c:2987 [inline] validate_chain kernel/locking/lockdep.c:3776 [inline] __lock_acquire.cold+0x149/0x3ab kernel/locking/lockdep.c:5015 lock_acquire kernel/locking/lockdep.c:5625 [inline] lock_acquire+0x1ab/0x510 kernel/locking/lockdep.c:5590 lock_sock_fast+0x36/0x100 net/core/sock.c:3229 mptcp_close+0x267/0x7b0 net/mptcp/protocol.c:2738 inet_release+0x12e/0x280 net/ipv4/af_inet.c:431 __sock_release net/socket.c:649 [inline] sock_release+0x87/0x1b0 net/socket.c:677 mptcp_pm_nl_create_listen_socket+0x238/0x2c0 net/mptcp/pm_netlink.c:900 mptcp_nl_cmd_add_addr+0x359/0x930 net/mptcp/pm_netlink.c:1170 genl_family_rcv_msg_doit+0x228/0x320 net/netlink/genetlink.c:731 genl_family_rcv_msg net/netlink/genetlink.c:775 [inline] genl_rcv_msg+0x328/0x580 net/netlink/genetlink.c:792 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2504 genl_rcv+0x24/0x40 net/netlink/genetlink.c:803 netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1340 netlink_sendmsg+0x86d/0xdb0 net/netlink/af_netlink.c:1929 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:724 sock_no_sendpage+0x101/0x150 net/core/sock.c:2980 kernel_sendpage.part.0+0x1a0/0x340 net/socket.c:3504 kernel_sendpage net/socket.c:3501 [inline] sock_sendpage+0xe5/0x140 net/socket.c:1003 pipe_to_sendpage+0x2ad/0x380 fs/splice.c:364 splice_from_pipe_feed fs/splice.c:418 [inline] __splice_from_pipe+0x43e/0x8a0 fs/splice.c:562 splice_from_pipe fs/splice.c:597 [inline] generic_splice_sendpage+0xd4/0x140 fs/splice.c:746 do_splice_from fs/splice.c:767 [inline] direct_splice_actor+0x110/0x180 fs/splice.c:936 splice_direct_to_actor+0x34b/0x8c0 fs/splice.c:891 do_splice_direct+0x1b3/0x280 fs/splice.c:979 do_sendfile+0xae9/0x1240 fs/read_write.c:1249 __do_sys_sendfile64 fs/read_write.c:1314 [inline] __se_sys_sendfile64 fs/read_write.c:1300 [inline] __x64_sys_sendfile64+0x1cc/0x210 fs/read_write.c:1300 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f215cb69969 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 14 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffc96bb3868 EFLAGS: 00000246 ORIG_RAX: 0000000000000028 RAX: ffffffffffffffda RBX: 00007f215cbad072 RCX: 00007f215cb69969 RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000005 RBP: 0000000000000000 R08: 00007ffc96bb3a08 R09: 00007ffc96bb3a08 R10: 0000000100000002 R11: 0000000000000246 R12: 00007ffc96bb387c R13: 431bde82d7b634db R14: 0000000000000000 R15: 0000000000000000 the problem originates from uncorrect lock annotation in the mptcp code and is only visible since commit 2dcb96bacce3 ("net: core: Correct the sock::sk_lock.owned lockdep annotations"), but is present since the port-based endpoint support initial implementation. This patch addresses the issue introducing a nested variant of lock_sock_fast() and using it in the relevant code path. Fixes: 1729cf186d8a ("mptcp: create the listening socket for new port") Fixes: 2dcb96bacce3 ("net: core: Correct the sock::sk_lock.owned lockdep annotations") Suggested-by: Thomas Gleixner Reported-and-tested-by: syzbot+1dd53f7a89b299d59eaf@syzkaller.appspotmail.com Signed-off-by: Paolo Abeni Reviewed-by: Thomas Gleixner Signed-off-by: David S. Miller --- include/net/sock.h | 31 ++++++++++++++++++++++++++++++- net/core/sock.c | 20 ++------------------ net/mptcp/protocol.c | 2 +- 3 files changed, 33 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index c005c3c750e8..dc3f8169312e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1623,7 +1623,36 @@ void release_sock(struct sock *sk); SINGLE_DEPTH_NESTING) #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) -bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock); +bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock); + +/** + * lock_sock_fast - fast version of lock_sock + * @sk: socket + * + * This version should be used for very small section, where process wont block + * return false if fast path is taken: + * + * sk_lock.slock locked, owned = 0, BH disabled + * + * return true if slow path is taken: + * + * sk_lock.slock unlocked, owned = 1, BH enabled + */ +static inline bool lock_sock_fast(struct sock *sk) +{ + /* The sk_lock has mutex_lock() semantics here. */ + mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); + + return __lock_sock_fast(sk); +} + +/* fast socket lock variant for caller already holding a [different] socket lock */ +static inline bool lock_sock_fast_nested(struct sock *sk) +{ + mutex_acquire(&sk->sk_lock.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_); + + return __lock_sock_fast(sk); +} /** * unlock_sock_fast - complement of lock_sock_fast diff --git a/net/core/sock.c b/net/core/sock.c index 512e629f9780..7060d183216e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3210,24 +3210,8 @@ void release_sock(struct sock *sk) } EXPORT_SYMBOL(release_sock); -/** - * lock_sock_fast - fast version of lock_sock - * @sk: socket - * - * This version should be used for very small section, where process wont block - * return false if fast path is taken: - * - * sk_lock.slock locked, owned = 0, BH disabled - * - * return true if slow path is taken: - * - * sk_lock.slock unlocked, owned = 1, BH enabled - */ -bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock) +bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock) { - /* The sk_lock has mutex_lock() semantics here. */ - mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); - might_sleep(); spin_lock_bh(&sk->sk_lock.slock); @@ -3256,7 +3240,7 @@ bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock) spin_unlock_bh(&sk->sk_lock.slock); return true; } -EXPORT_SYMBOL(lock_sock_fast); +EXPORT_SYMBOL(__lock_sock_fast); int sock_gettstamp(struct socket *sock, void __user *userstamp, bool timeval, bool time32) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index dbcebf56798f..e5df0b5971c8 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2735,7 +2735,7 @@ cleanup: inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32; mptcp_for_each_subflow(mptcp_sk(sk), subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - bool slow = lock_sock_fast(ssk); + bool slow = lock_sock_fast_nested(ssk); sock_orphan(ssk); unlock_sock_fast(ssk, slow); -- cgit v1.2.3-71-gd317 From 35306eb23814444bd4021f8a1c3047d3cb0c8b2b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Sep 2021 15:57:50 -0700 Subject: af_unix: fix races in sk_peer_pid and sk_peer_cred accesses Jann Horn reported that SO_PEERCRED and SO_PEERGROUPS implementations are racy, as af_unix can concurrently change sk_peer_pid and sk_peer_cred. In order to fix this issue, this patch adds a new spinlock that needs to be used whenever these fields are read or written. Jann also pointed out that l2cap_sock_get_peer_pid_cb() is currently reading sk->sk_peer_pid which makes no sense, as this field is only possibly set by AF_UNIX sockets. We will have to clean this in a separate patch. This could be done by reverting b48596d1dc25 "Bluetooth: L2CAP: Add get_peer_pid callback" or implementing what was truly expected. Fixes: 109f6e39fa07 ("af_unix: Allow SO_PEERCRED to work across namespaces.") Signed-off-by: Eric Dumazet Reported-by: Jann Horn Cc: Eric W. Biederman Cc: Luiz Augusto von Dentz Cc: Marcel Holtmann Signed-off-by: David S. Miller --- include/net/sock.h | 2 ++ net/core/sock.c | 32 ++++++++++++++++++++++++++------ net/unix/af_unix.c | 34 ++++++++++++++++++++++++++++------ 3 files changed, 56 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index dc3f8169312e..ae929e21a376 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -488,8 +488,10 @@ struct sock { u8 sk_prefer_busy_poll; u16 sk_busy_poll_budget; #endif + spinlock_t sk_peer_lock; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; + long sk_rcvtimeo; ktime_t sk_stamp; #if BITS_PER_LONG==32 diff --git a/net/core/sock.c b/net/core/sock.c index 7060d183216e..c1601f75ec4b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1376,6 +1376,16 @@ set_sndbuf: } EXPORT_SYMBOL(sock_setsockopt); +static const struct cred *sk_get_peer_cred(struct sock *sk) +{ + const struct cred *cred; + + spin_lock(&sk->sk_peer_lock); + cred = get_cred(sk->sk_peer_cred); + spin_unlock(&sk->sk_peer_lock); + + return cred; +} static void cred_to_ucred(struct pid *pid, const struct cred *cred, struct ucred *ucred) @@ -1552,7 +1562,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname, struct ucred peercred; if (len > sizeof(peercred)) len = sizeof(peercred); + + spin_lock(&sk->sk_peer_lock); cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred); + spin_unlock(&sk->sk_peer_lock); + if (copy_to_user(optval, &peercred, len)) return -EFAULT; goto lenout; @@ -1560,20 +1574,23 @@ int sock_getsockopt(struct socket *sock, int level, int optname, case SO_PEERGROUPS: { + const struct cred *cred; int ret, n; - if (!sk->sk_peer_cred) + cred = sk_get_peer_cred(sk); + if (!cred) return -ENODATA; - n = sk->sk_peer_cred->group_info->ngroups; + n = cred->group_info->ngroups; if (len < n * sizeof(gid_t)) { len = n * sizeof(gid_t); + put_cred(cred); return put_user(len, optlen) ? -EFAULT : -ERANGE; } len = n * sizeof(gid_t); - ret = groups_to_user((gid_t __user *)optval, - sk->sk_peer_cred->group_info); + ret = groups_to_user((gid_t __user *)optval, cred->group_info); + put_cred(cred); if (ret) return ret; goto lenout; @@ -1935,9 +1952,10 @@ static void __sk_destruct(struct rcu_head *head) sk->sk_frag.page = NULL; } - if (sk->sk_peer_cred) - put_cred(sk->sk_peer_cred); + /* We do not need to acquire sk->sk_peer_lock, we are the last user. */ + put_cred(sk->sk_peer_cred); put_pid(sk->sk_peer_pid); + if (likely(sk->sk_net_refcnt)) put_net(sock_net(sk)); sk_prot_free(sk->sk_prot_creator, sk); @@ -3145,6 +3163,8 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_peer_pid = NULL; sk->sk_peer_cred = NULL; + spin_lock_init(&sk->sk_peer_lock); + sk->sk_write_pending = 0; sk->sk_rcvlowat = 1; sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f505b89bda6a..efac5989edb5 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -608,20 +608,42 @@ static void unix_release_sock(struct sock *sk, int embrion) static void init_peercred(struct sock *sk) { - put_pid(sk->sk_peer_pid); - if (sk->sk_peer_cred) - put_cred(sk->sk_peer_cred); + const struct cred *old_cred; + struct pid *old_pid; + + spin_lock(&sk->sk_peer_lock); + old_pid = sk->sk_peer_pid; + old_cred = sk->sk_peer_cred; sk->sk_peer_pid = get_pid(task_tgid(current)); sk->sk_peer_cred = get_current_cred(); + spin_unlock(&sk->sk_peer_lock); + + put_pid(old_pid); + put_cred(old_cred); } static void copy_peercred(struct sock *sk, struct sock *peersk) { - put_pid(sk->sk_peer_pid); - if (sk->sk_peer_cred) - put_cred(sk->sk_peer_cred); + const struct cred *old_cred; + struct pid *old_pid; + + if (sk < peersk) { + spin_lock(&sk->sk_peer_lock); + spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING); + } else { + spin_lock(&peersk->sk_peer_lock); + spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING); + } + old_pid = sk->sk_peer_pid; + old_cred = sk->sk_peer_cred; sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); + + spin_unlock(&sk->sk_peer_lock); + spin_unlock(&peersk->sk_peer_lock); + + put_pid(old_pid); + put_cred(old_cred); } static int unix_listen(struct socket *sock, int backlog) -- cgit v1.2.3-71-gd317 From f792565326825ed806626da50c6f9a928f1079c1 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 29 Sep 2021 12:43:13 -0700 Subject: perf/core: fix userpage->time_enabled of inactive events Users of rdpmc rely on the mmapped user page to calculate accurate time_enabled. Currently, userpage->time_enabled is only updated when the event is added to the pmu. As a result, inactive event (due to counter multiplexing) does not have accurate userpage->time_enabled. This can be reproduced with something like: /* open 20 task perf_event "cycles", to create multiplexing */ fd = perf_event_open(); /* open task perf_event "cycles" */ userpage = mmap(fd); /* use mmap and rdmpc */ while (true) { time_enabled_mmap = xxx; /* use logic in perf_event_mmap_page */ time_enabled_read = read(fd).time_enabled; if (time_enabled_mmap > time_enabled_read) BUG(); } Fix this by updating userpage for inactive events in merge_sched_in. Suggested-by: Peter Zijlstra (Intel) Reported-and-tested-by: Lucian Grijincu Signed-off-by: Song Liu Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210929194313.2398474-1-songliubraving@fb.com --- include/linux/perf_event.h | 4 +++- kernel/events/core.c | 34 ++++++++++++++++++++++++++++++---- 2 files changed, 33 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fe156a8170aa..9b60bb89d86a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -683,7 +683,9 @@ struct perf_event { /* * timestamp shadows the actual context timing but it can * be safely used in NMI interrupt context. It reflects the - * context time as it was when the event was last scheduled in. + * context time as it was when the event was last scheduled in, + * or when ctx_sched_in failed to schedule the event because we + * run out of PMC. * * ctx_time already accounts for ctx->timestamp. Therefore to * compute ctx_time for a sample, simply add perf_clock(). diff --git a/kernel/events/core.c b/kernel/events/core.c index 0c000cb01eeb..f23ca260307f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3707,6 +3707,29 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx, return 0; } +static inline bool event_update_userpage(struct perf_event *event) +{ + if (likely(!atomic_read(&event->mmap_count))) + return false; + + perf_event_update_time(event); + perf_set_shadow_time(event, event->ctx); + perf_event_update_userpage(event); + + return true; +} + +static inline void group_update_userpage(struct perf_event *group_event) +{ + struct perf_event *event; + + if (!event_update_userpage(group_event)) + return; + + for_each_sibling_event(event, group_event) + event_update_userpage(event); +} + static int merge_sched_in(struct perf_event *event, void *data) { struct perf_event_context *ctx = event->ctx; @@ -3725,14 +3748,15 @@ static int merge_sched_in(struct perf_event *event, void *data) } if (event->state == PERF_EVENT_STATE_INACTIVE) { + *can_add_hw = 0; if (event->attr.pinned) { perf_cgroup_event_disable(event, ctx); perf_event_set_state(event, PERF_EVENT_STATE_ERROR); + } else { + ctx->rotate_necessary = 1; + perf_mux_hrtimer_restart(cpuctx); + group_update_userpage(event); } - - *can_add_hw = 0; - ctx->rotate_necessary = 1; - perf_mux_hrtimer_restart(cpuctx); } return 0; @@ -6324,6 +6348,8 @@ accounting: ring_buffer_attach(event, rb); + perf_event_update_time(event); + perf_set_shadow_time(event, event->ctx); perf_event_init_userpage(event); perf_event_update_userpage(event); } else { -- cgit v1.2.3-71-gd317 From 83d40a61046f73103b4e5d8f1310261487ff63b0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 20 Sep 2021 15:31:11 +0200 Subject: sched: Always inline is_percpu_thread() vmlinux.o: warning: objtool: check_preemption_disabled()+0x81: call to is_percpu_thread() leaves .noinstr.text section Reported-by: Stephen Rothwell Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210928084218.063371959@infradead.org --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 39039ce8ac4c..c1a927ddec64 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1720,7 +1720,7 @@ extern struct pid *cad_pid; #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current) -static inline bool is_percpu_thread(void) +static __always_inline bool is_percpu_thread(void) { #ifdef CONFIG_SMP return (current->flags & PF_NO_SETAFFINITY) && -- cgit v1.2.3-71-gd317 From 5fb14d20f8241461d351bef73e49871e4b2330ab Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 Oct 2021 09:46:22 -0700 Subject: net: add kerneldoc comment for sk_peer_lock Fixes following warning: include/net/sock.h:533: warning: Function parameter or member 'sk_peer_lock' not described in 'sock' Fixes: 35306eb23814 ("af_unix: fix races in sk_peer_pid and sk_peer_cred accesses") Signed-off-by: Eric Dumazet Reported-by: Stephen Rothwell Link: https://lore.kernel.org/r/20211001164622.58520-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index ae929e21a376..ea6fbc88c8f9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -307,6 +307,7 @@ struct bpf_local_storage; * @sk_priority: %SO_PRIORITY setting * @sk_type: socket type (%SOCK_STREAM, etc) * @sk_protocol: which protocol this socket belongs in this network family + * @sk_peer_lock: lock protecting @sk_peer_pid and @sk_peer_cred * @sk_peer_pid: &struct pid for this socket's peer * @sk_peer_cred: %SO_PEERCRED setting * @sk_rcvlowat: %SO_RCVLOWAT setting -- cgit v1.2.3-71-gd317 From 019d9329e7481cfaccbd8ed17b1e04ca76970f13 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 30 Sep 2021 15:53:30 +0300 Subject: net: mscc: ocelot: fix VCAP filters remaining active after being deleted When ocelot_flower.c calls ocelot_vcap_filter_add(), the filter has a given filter->id.cookie. This filter is added to the block->rules list. However, when ocelot_flower.c calls ocelot_vcap_block_find_filter_by_id() which passes the cookie as argument, the filter is never found by filter->id.cookie when searching through the block->rules list. This is unsurprising, since the filter->id.cookie is an unsigned long, but the cookie argument provided to ocelot_vcap_block_find_filter_by_id() is a signed int, and the comparison fails. Fixes: 50c6cc5b9283 ("net: mscc: ocelot: store a namespaced VCAP filter ID") Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20210930125330.2078625-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_vcap.c | 4 ++-- include/soc/mscc/ocelot_vcap.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c index 7945393a0655..99d7376a70a7 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.c +++ b/drivers/net/ethernet/mscc/ocelot_vcap.c @@ -998,8 +998,8 @@ ocelot_vcap_block_find_filter_by_index(struct ocelot_vcap_block *block, } struct ocelot_vcap_filter * -ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block, int cookie, - bool tc_offload) +ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block, + unsigned long cookie, bool tc_offload) { struct ocelot_vcap_filter *filter; diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index 25fd525aaf92..4869ebbd438d 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -694,7 +694,7 @@ int ocelot_vcap_filter_add(struct ocelot *ocelot, int ocelot_vcap_filter_del(struct ocelot *ocelot, struct ocelot_vcap_filter *rule); struct ocelot_vcap_filter * -ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block, int id, - bool tc_offload); +ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block, + unsigned long cookie, bool tc_offload); #endif /* _OCELOT_VCAP_H_ */ -- cgit v1.2.3-71-gd317 From 6fb721cf781808ee2ca5e737fb0592cc68de3381 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 26 Sep 2021 09:59:35 +0200 Subject: netfilter: nf_tables: honor NLM_F_CREATE and NLM_F_EXCL in event notification Include the NLM_F_CREATE and NLM_F_EXCL flags in netlink event notifications, otherwise userspace cannot distiguish between create and add commands. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 47 +++++++++++++++++++++++++++++---------- net/netfilter/nft_quota.c | 2 +- 3 files changed, 37 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 148f5d8ee5ab..a16171c5fd9e 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1202,7 +1202,7 @@ struct nft_object *nft_obj_lookup(const struct net *net, void nft_obj_notify(struct net *net, const struct nft_table *table, struct nft_object *obj, u32 portid, u32 seq, - int event, int family, int report, gfp_t gfp); + int event, u16 flags, int family, int report, gfp_t gfp); /** * struct nft_object_type - stateful object type diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c8acd26c7201..c0851fec11d4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -780,6 +780,7 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) { struct nftables_pernet *nft_net; struct sk_buff *skb; + u16 flags = 0; int err; if (!ctx->report && @@ -790,8 +791,11 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) if (skb == NULL) goto err; + if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) + flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); + err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq, - event, 0, ctx->family, ctx->table); + event, flags, ctx->family, ctx->table); if (err < 0) { kfree_skb(skb); goto err; @@ -1563,6 +1567,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) { struct nftables_pernet *nft_net; struct sk_buff *skb; + u16 flags = 0; int err; if (!ctx->report && @@ -1573,8 +1578,11 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) if (skb == NULL) goto err; + if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) + flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); + err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq, - event, 0, ctx->family, ctx->table, + event, flags, ctx->family, ctx->table, ctx->chain); if (err < 0) { kfree_skb(skb); @@ -2945,6 +2953,8 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, } if (ctx->flags & (NLM_F_APPEND | NLM_F_REPLACE)) flags |= NLM_F_APPEND; + if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) + flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq, event, flags, ctx->family, ctx->table, @@ -3957,8 +3967,9 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx, gfp_t gfp_flags) { struct nftables_pernet *nft_net = nft_pernet(ctx->net); - struct sk_buff *skb; u32 portid = ctx->portid; + struct sk_buff *skb; + u16 flags = 0; int err; if (!ctx->report && @@ -3969,7 +3980,10 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx, if (skb == NULL) goto err; - err = nf_tables_fill_set(skb, ctx, set, event, 0); + if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) + flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); + + err = nf_tables_fill_set(skb, ctx, set, event, flags); if (err < 0) { kfree_skb(skb); goto err; @@ -5245,12 +5259,13 @@ static int nf_tables_getsetelem(struct sk_buff *skb, static void nf_tables_setelem_notify(const struct nft_ctx *ctx, const struct nft_set *set, const struct nft_set_elem *elem, - int event, u16 flags) + int event) { struct nftables_pernet *nft_net; struct net *net = ctx->net; u32 portid = ctx->portid; struct sk_buff *skb; + u16 flags = 0; int err; if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) @@ -5260,6 +5275,9 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx, if (skb == NULL) goto err; + if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) + flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); + err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags, set, elem); if (err < 0) { @@ -6935,7 +6953,7 @@ static int nf_tables_delobj(struct sk_buff *skb, const struct nfnl_info *info, void nft_obj_notify(struct net *net, const struct nft_table *table, struct nft_object *obj, u32 portid, u32 seq, int event, - int family, int report, gfp_t gfp) + u16 flags, int family, int report, gfp_t gfp) { struct nftables_pernet *nft_net = nft_pernet(net); struct sk_buff *skb; @@ -6960,8 +6978,9 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, if (skb == NULL) goto err; - err = nf_tables_fill_obj_info(skb, net, portid, seq, event, 0, family, - table, obj, false); + err = nf_tables_fill_obj_info(skb, net, portid, seq, event, + flags & (NLM_F_CREATE | NLM_F_EXCL), + family, table, obj, false); if (err < 0) { kfree_skb(skb); goto err; @@ -6978,7 +6997,7 @@ static void nf_tables_obj_notify(const struct nft_ctx *ctx, struct nft_object *obj, int event) { nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event, - ctx->family, ctx->report, GFP_KERNEL); + ctx->flags, ctx->family, ctx->report, GFP_KERNEL); } /* @@ -7759,6 +7778,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, { struct nftables_pernet *nft_net = nft_pernet(ctx->net); struct sk_buff *skb; + u16 flags = 0; int err; if (!ctx->report && @@ -7769,8 +7789,11 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, if (skb == NULL) goto err; + if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL)) + flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); + err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid, - ctx->seq, event, 0, + ctx->seq, event, flags, ctx->family, flowtable, hook_list); if (err < 0) { kfree_skb(skb); @@ -8648,7 +8671,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_setelem_activate(net, te->set, &te->elem); nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, - NFT_MSG_NEWSETELEM, 0); + NFT_MSG_NEWSETELEM); nft_trans_destroy(trans); break; case NFT_MSG_DELSETELEM: @@ -8656,7 +8679,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, - NFT_MSG_DELSETELEM, 0); + NFT_MSG_DELSETELEM); nft_setelem_remove(net, te->set, &te->elem); if (!nft_setelem_is_catchall(te->set, &te->elem)) { atomic_dec(&te->set->nelems); diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 0363f533a42b..c4d1389f7185 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -60,7 +60,7 @@ static void nft_quota_obj_eval(struct nft_object *obj, if (overquota && !test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags)) nft_obj_notify(nft_net(pkt), obj->key.table, obj, 0, 0, - NFT_MSG_NEWOBJ, nft_pf(pkt), 0, GFP_ATOMIC); + NFT_MSG_NEWOBJ, 0, nft_pf(pkt), 0, GFP_ATOMIC); } static int nft_quota_do_init(const struct nlattr * const tb[], -- cgit v1.2.3-71-gd317 From 6e9bfdcf0a3b1c8126878c21adcfc343f89d4a6a Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Fri, 1 Oct 2021 15:37:31 +0100 Subject: cachefiles: Fix oops in trace_cachefiles_mark_buried due to NULL object In cachefiles_mark_object_buried, the dentry in question may not have an owner, and thus our cachefiles_object pointer may be NULL when calling the tracepoint, in which case we will also not have a valid debug_id to print in the tracepoint. Check for NULL object in the tracepoint and if so, just set debug_id to MAX_UINT as was done in 2908f5e101e3 ("fscache: Add a cookie debug ID and use that in traces"). This fixes the following oops: FS-Cache: Cache "mycache" added (type cachefiles) CacheFiles: File cache on vdc registered ... Workqueue: fscache_object fscache_object_work_func [fscache] RIP: 0010:trace_event_raw_event_cachefiles_mark_buried+0x4e/0xa0 [cachefiles] .... Call Trace: cachefiles_mark_object_buried+0xa5/0xb0 [cachefiles] cachefiles_bury_object+0x270/0x430 [cachefiles] cachefiles_walk_to_object+0x195/0x9c0 [cachefiles] cachefiles_lookup_object+0x5a/0xc0 [cachefiles] fscache_look_up_object+0xd7/0x160 [fscache] fscache_object_work_func+0xb2/0x340 [fscache] process_one_work+0x1f1/0x390 worker_thread+0x53/0x3e0 kthread+0x127/0x150 Fixes: 2908f5e101e3 ("fscache: Add a cookie debug ID and use that in traces") Signed-off-by: Dave Wysochanski Signed-off-by: David Howells cc: linux-cachefs@redhat.com Signed-off-by: Linus Torvalds --- include/trace/events/cachefiles.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index 9a448fe9355d..695bfdbfdcad 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -305,7 +305,7 @@ TRACE_EVENT(cachefiles_mark_buried, ), TP_fast_assign( - __entry->obj = obj->fscache.debug_id; + __entry->obj = obj ? obj->fscache.debug_id : UINT_MAX; __entry->de = de; __entry->why = why; ), -- cgit v1.2.3-71-gd317 From 97315723c463679a9ecf803d6479fca24c3efda0 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 22 Sep 2021 12:18:25 +0200 Subject: xen/privcmd: drop "pages" parameter from xen_remap_pfn() The function doesn't use it and all of its callers say in a comment that their respective arguments are to be non-NULL only in auto-translated mode. Since xen_remap_domain_mfn_array() isn't supposed to be used by non-PV, drop the parameter there as well. It was bogusly passed as non- NULL (PRIV_VMA_LOCKED) by its only caller anyway. For xen_remap_domain_gfn_range(), otoh, it's not clear at all why this wouldn't want / might not need to gain auto-translated support down the road, so the parameter is retained there despite now remaining unused (and the only caller passing NULL); correct a respective comment as well. Signed-off-by: Jan Beulich Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/036ad8a2-46f9-ac3d-6219-bdc93ab9e10b@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/mmu_pv.c | 2 +- drivers/xen/privcmd.c | 5 ++--- include/xen/xen-ops.h | 15 ++++++--------- 3 files changed, 9 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 8d751939c6f3..3359c23573c5 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2398,7 +2398,7 @@ static int remap_area_pfn_pte_fn(pte_t *ptep, unsigned long addr, void *data) int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot, - unsigned int domid, bool no_translate, struct page **pages) + unsigned int domid, bool no_translate) { int err = 0; struct remap_data rmd; diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 5af2a295e32f..3369734108af 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -257,7 +257,7 @@ static long privcmd_ioctl_mmap(struct file *file, void __user *udata) LIST_HEAD(pagelist); struct mmap_gfn_state state; - /* We only support privcmd_ioctl_mmap_batch for auto translated. */ + /* We only support privcmd_ioctl_mmap_batch for non-auto-translated. */ if (xen_feature(XENFEAT_auto_translated_physmap)) return -ENOSYS; @@ -810,8 +810,7 @@ static long privcmd_ioctl_mmap_resource(struct file *file, kdata.addr & PAGE_MASK, pfns, kdata.num, errs, vma->vm_page_prot, - domid, - vma->vm_private_data); + domid); if (num < 0) rc = num; else if (num != kdata.num) { diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index db28e79b77ee..a3584a357f35 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -52,12 +52,12 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order); #if defined(CONFIG_XEN_PV) int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot, - unsigned int domid, bool no_translate, struct page **pages); + unsigned int domid, bool no_translate); #else static inline int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot, unsigned int domid, - bool no_translate, struct page **pages) + bool no_translate) { BUG(); return 0; @@ -134,7 +134,7 @@ static inline int xen_remap_domain_gfn_array(struct vm_area_struct *vma, */ BUG_ON(err_ptr == NULL); return xen_remap_pfn(vma, addr, gfn, nr, err_ptr, prot, domid, - false, pages); + false); } /* @@ -146,7 +146,6 @@ static inline int xen_remap_domain_gfn_array(struct vm_area_struct *vma, * @err_ptr: Returns per-MFN error status. * @prot: page protection mask * @domid: Domain owning the pages - * @pages: Array of pages if this domain has an auto-translated physmap * * @mfn and @err_ptr may point to the same buffer, the MFNs will be * overwritten by the error codes after they are mapped. @@ -157,14 +156,13 @@ static inline int xen_remap_domain_gfn_array(struct vm_area_struct *vma, static inline int xen_remap_domain_mfn_array(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t *mfn, int nr, int *err_ptr, - pgprot_t prot, unsigned int domid, - struct page **pages) + pgprot_t prot, unsigned int domid) { if (xen_feature(XENFEAT_auto_translated_physmap)) return -EOPNOTSUPP; return xen_remap_pfn(vma, addr, mfn, nr, err_ptr, prot, domid, - true, pages); + true); } /* xen_remap_domain_gfn_range() - map a range of foreign frames @@ -188,8 +186,7 @@ static inline int xen_remap_domain_gfn_range(struct vm_area_struct *vma, if (xen_feature(XENFEAT_auto_translated_physmap)) return -EOPNOTSUPP; - return xen_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false, - pages); + return xen_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false); } int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, -- cgit v1.2.3-71-gd317 From a0e25f0a0d39d66c048d5dbac1e7ebaa6ec885d7 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Sun, 3 Oct 2021 12:14:00 -0400 Subject: cachefiles: Fix oops with cachefiles_cull() due to NULL object When cachefiles_cull() calls cachefiles_bury_object(), it passes a NULL object. When this occurs, either trace_cachefiles_unlink() or trace_cachefiles_rename() may oops due to the NULL object. Check for NULL object in the tracepoint and if so, set debug_id to MAX_UINT as was done in 2908f5e101e3. The following oops was seen with xfstests generic/100. BUG: kernel NULL pointer dereference, address: 0000000000000010 ... RIP: 0010:trace_event_raw_event_cachefiles_unlink+0x4e/0xa0 [cachefiles] ... Call Trace: cachefiles_bury_object+0x242/0x430 [cachefiles] ? __vfs_removexattr_locked+0x10f/0x150 ? vfs_removexattr+0x51/0xd0 cachefiles_cull+0x84/0x120 [cachefiles] cachefiles_daemon_cull+0xd1/0x120 [cachefiles] cachefiles_daemon_write+0x158/0x190 [cachefiles] vfs_write+0xbc/0x260 ksys_write+0x4f/0xc0 do_syscall_64+0x3b/0x90 The following oops was seen with xfstests generic/290. BUG: kernel NULL pointer dereference, address: 0000000000000010 ... RIP: 0010:trace_event_raw_event_cachefiles_rename+0x54/0xa0 [cachefiles] ... Call Trace: cachefiles_bury_object+0x35c/0x430 [cachefiles] cachefiles_cull+0x84/0x120 [cachefiles] cachefiles_daemon_cull+0xd1/0x120 [cachefiles] cachefiles_daemon_write+0x158/0x190 [cachefiles] vfs_write+0xbc/0x260 ksys_write+0x4f/0xc0 do_syscall_64+0x3b/0x90 Fixes: 2908f5e101e3 ("fscache: Add a cookie debug ID and use that in traces") Signed-off-by: Dave Wysochanski Signed-off-by: David Howells Link: https://listman.redhat.com/archives/linux-cachefs/2021-October/msg00009.html --- include/trace/events/cachefiles.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index 695bfdbfdcad..920b6a303d60 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -178,7 +178,7 @@ TRACE_EVENT(cachefiles_unlink, ), TP_fast_assign( - __entry->obj = obj->fscache.debug_id; + __entry->obj = obj ? obj->fscache.debug_id : UINT_MAX; __entry->de = de; __entry->why = why; ), @@ -205,7 +205,7 @@ TRACE_EVENT(cachefiles_rename, ), TP_fast_assign( - __entry->obj = obj->fscache.debug_id; + __entry->obj = obj ? obj->fscache.debug_id : UINT_MAX; __entry->de = de; __entry->to = to; __entry->why = why; -- cgit v1.2.3-71-gd317 From 3f6cffb8604b537e3d7ea040d7f4368689638eaf Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 4 Oct 2021 16:01:40 -0700 Subject: etherdevice: use __dev_addr_set() Andrew points out that eth_hw_addr_set() replaces memcpy() calls so we can't use ether_addr_copy() which assumes both arguments are 2-bytes aligned. Reported-by: Andrew Lunn Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 928c411bd509..c58d50451485 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -308,7 +308,7 @@ static inline void ether_addr_copy(u8 *dst, const u8 *src) */ static inline void eth_hw_addr_set(struct net_device *dev, const u8 *addr) { - ether_addr_copy(dev->dev_addr, addr); + __dev_addr_set(dev, addr, ETH_ALEN); } /** -- cgit v1.2.3-71-gd317 From 94ad8aacbc2d4908b052c8bdb5ae13bc702f77ea Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 27 Sep 2021 16:40:50 +0200 Subject: ARM: omap1: move omap15xx local bus handling to usb.c Commit 38225f2ef2f4 ("ARM/omap1: switch to use dma_direct_set_offset for lbus DMA offsets") removed a lot of mach/memory.h, but left the USB offset handling split into arch/arm/mach-omap1/usb.c and drivers/usb/host/ohci-omap.c. This can cause a randconfig build warning that now fails the build with -Werror: arch/arm/mach-omap1/usb.c:561:30: error: 'omap_1510_usb_ohci_nb' defined but not used [-Werror=unused-variable] 561 | static struct notifier_block omap_1510_usb_ohci_nb = { | ^~~~~~~~~~~~~~~~~~~~~ Move it all into the platform file to get rid of the final location that relies on mach/memory.h. Acked-by: Felipe Balbi Acked-by: Alan Stern Acked-by: Tony Lindgren Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20210927144118.2464881-1-arnd@kernel.org' Signed-off-by: Arnd Bergmann --- arch/arm/mach-omap1/include/mach/memory.h | 12 ---- arch/arm/mach-omap1/usb.c | 116 +++++++++++++++++++++--------- drivers/usb/host/ohci-omap.c | 72 +------------------ include/linux/platform_data/usb-omap1.h | 2 + 4 files changed, 86 insertions(+), 116 deletions(-) (limited to 'include') diff --git a/arch/arm/mach-omap1/include/mach/memory.h b/arch/arm/mach-omap1/include/mach/memory.h index 36bc0000cb6a..ba3a350479c8 100644 --- a/arch/arm/mach-omap1/include/mach/memory.h +++ b/arch/arm/mach-omap1/include/mach/memory.h @@ -9,16 +9,4 @@ /* REVISIT: omap1 legacy drivers still rely on this */ #include -/* - * Bus address is physical address, except for OMAP-1510 Local Bus. - * OMAP-1510 bus address is translated into a Local Bus address if the - * OMAP bus type is lbus. We do the address translation based on the - * device overriding the defaults used in the dma-mapping API. - */ - -/* - * OMAP-1510 Local Bus address offset - */ -#define OMAP1510_LB_OFFSET UL(0x30000000) - #endif diff --git a/arch/arm/mach-omap1/usb.c b/arch/arm/mach-omap1/usb.c index 86d3b3c157af..e60831c82b78 100644 --- a/arch/arm/mach-omap1/usb.c +++ b/arch/arm/mach-omap1/usb.c @@ -11,6 +11,7 @@ #include #include #include +#include #include @@ -206,8 +207,6 @@ static inline void udc_device_init(struct omap_usb_config *pdata) #endif -#if IS_ENABLED(CONFIG_USB_OHCI_HCD) - /* The dmamask must be set for OHCI to work */ static u64 ohci_dmamask = ~(u32)0; @@ -236,20 +235,15 @@ static struct platform_device ohci_device = { static inline void ohci_device_init(struct omap_usb_config *pdata) { + if (!IS_ENABLED(CONFIG_USB_OHCI_HCD)) + return; + if (cpu_is_omap7xx()) ohci_resources[1].start = INT_7XX_USB_HHC_1; pdata->ohci_device = &ohci_device; pdata->ocpi_enable = &ocpi_enable; } -#else - -static inline void ohci_device_init(struct omap_usb_config *pdata) -{ -} - -#endif - #if defined(CONFIG_USB_OTG) && defined(CONFIG_ARCH_OMAP_OTG) static struct resource otg_resources[] = { @@ -534,33 +528,87 @@ bad: } #ifdef CONFIG_ARCH_OMAP15XX +/* OMAP-1510 OHCI has its own MMU for DMA */ +#define OMAP1510_LB_MEMSIZE 32 /* Should be same as SDRAM size */ +#define OMAP1510_LB_CLOCK_DIV 0xfffec10c +#define OMAP1510_LB_MMU_CTL 0xfffec208 +#define OMAP1510_LB_MMU_LCK 0xfffec224 +#define OMAP1510_LB_MMU_LD_TLB 0xfffec228 +#define OMAP1510_LB_MMU_CAM_H 0xfffec22c +#define OMAP1510_LB_MMU_CAM_L 0xfffec230 +#define OMAP1510_LB_MMU_RAM_H 0xfffec234 +#define OMAP1510_LB_MMU_RAM_L 0xfffec238 -/* ULPD_DPLL_CTRL */ -#define DPLL_IOB (1 << 13) -#define DPLL_PLL_ENABLE (1 << 4) -#define DPLL_LOCK (1 << 0) +/* + * Bus address is physical address, except for OMAP-1510 Local Bus. + * OMAP-1510 bus address is translated into a Local Bus address if the + * OMAP bus type is lbus. + */ +#define OMAP1510_LB_OFFSET UL(0x30000000) -/* ULPD_APLL_CTRL */ -#define APLL_NDPLL_SWITCH (1 << 0) +/* + * OMAP-1510 specific Local Bus clock on/off + */ +static int omap_1510_local_bus_power(int on) +{ + if (on) { + omap_writel((1 << 1) | (1 << 0), OMAP1510_LB_MMU_CTL); + udelay(200); + } else { + omap_writel(0, OMAP1510_LB_MMU_CTL); + } -static int omap_1510_usb_ohci_notifier(struct notifier_block *nb, - unsigned long event, void *data) + return 0; +} + +/* + * OMAP-1510 specific Local Bus initialization + * NOTE: This assumes 32MB memory size in OMAP1510LB_MEMSIZE. + * See also arch/mach-omap/memory.h for __virt_to_dma() and + * __dma_to_virt() which need to match with the physical + * Local Bus address below. + */ +static int omap_1510_local_bus_init(void) { - struct device *dev = data; + unsigned int tlb; + unsigned long lbaddr, physaddr; + + omap_writel((omap_readl(OMAP1510_LB_CLOCK_DIV) & 0xfffffff8) | 0x4, + OMAP1510_LB_CLOCK_DIV); + + /* Configure the Local Bus MMU table */ + for (tlb = 0; tlb < OMAP1510_LB_MEMSIZE; tlb++) { + lbaddr = tlb * 0x00100000 + OMAP1510_LB_OFFSET; + physaddr = tlb * 0x00100000 + PHYS_OFFSET; + omap_writel((lbaddr & 0x0fffffff) >> 22, OMAP1510_LB_MMU_CAM_H); + omap_writel(((lbaddr & 0x003ffc00) >> 6) | 0xc, + OMAP1510_LB_MMU_CAM_L); + omap_writel(physaddr >> 16, OMAP1510_LB_MMU_RAM_H); + omap_writel((physaddr & 0x0000fc00) | 0x300, OMAP1510_LB_MMU_RAM_L); + omap_writel(tlb << 4, OMAP1510_LB_MMU_LCK); + omap_writel(0x1, OMAP1510_LB_MMU_LD_TLB); + } - if (event != BUS_NOTIFY_ADD_DEVICE) - return NOTIFY_DONE; + /* Enable the walking table */ + omap_writel(omap_readl(OMAP1510_LB_MMU_CTL) | (1 << 3), OMAP1510_LB_MMU_CTL); + udelay(200); - if (strncmp(dev_name(dev), "ohci", 4) == 0 && - dma_direct_set_offset(dev, PHYS_OFFSET, OMAP1510_LB_OFFSET, - (u64)-1)) - WARN_ONCE(1, "failed to set DMA offset\n"); - return NOTIFY_OK; + return 0; } -static struct notifier_block omap_1510_usb_ohci_nb = { - .notifier_call = omap_1510_usb_ohci_notifier, -}; +static void omap_1510_local_bus_reset(void) +{ + omap_1510_local_bus_power(1); + omap_1510_local_bus_init(); +} + +/* ULPD_DPLL_CTRL */ +#define DPLL_IOB (1 << 13) +#define DPLL_PLL_ENABLE (1 << 4) +#define DPLL_LOCK (1 << 0) + +/* ULPD_APLL_CTRL */ +#define APLL_NDPLL_SWITCH (1 << 0) static void __init omap_1510_usb_init(struct omap_usb_config *config) { @@ -616,19 +664,19 @@ static void __init omap_1510_usb_init(struct omap_usb_config *config) } #endif -#if IS_ENABLED(CONFIG_USB_OHCI_HCD) - if (config->register_host) { + if (IS_ENABLED(CONFIG_USB_OHCI_HCD) && config->register_host) { int status; - bus_register_notifier(&platform_bus_type, - &omap_1510_usb_ohci_nb); ohci_device.dev.platform_data = config; + dma_direct_set_offset(&ohci_device.dev, PHYS_OFFSET, + OMAP1510_LB_OFFSET, (u64)-1); status = platform_device_register(&ohci_device); if (status) pr_debug("can't register OHCI device, %d\n", status); /* hcd explicitly gates 48MHz */ + + config->lb_reset = omap_1510_local_bus_reset; } -#endif } #else diff --git a/drivers/usb/host/ohci-omap.c b/drivers/usb/host/ohci-omap.c index 0b3722770760..ded9738392e4 100644 --- a/drivers/usb/host/ohci-omap.c +++ b/drivers/usb/host/ohci-omap.c @@ -40,17 +40,6 @@ #include -/* OMAP-1510 OHCI has its own MMU for DMA */ -#define OMAP1510_LB_MEMSIZE 32 /* Should be same as SDRAM size */ -#define OMAP1510_LB_CLOCK_DIV 0xfffec10c -#define OMAP1510_LB_MMU_CTL 0xfffec208 -#define OMAP1510_LB_MMU_LCK 0xfffec224 -#define OMAP1510_LB_MMU_LD_TLB 0xfffec228 -#define OMAP1510_LB_MMU_CAM_H 0xfffec22c -#define OMAP1510_LB_MMU_CAM_L 0xfffec230 -#define OMAP1510_LB_MMU_RAM_H 0xfffec234 -#define OMAP1510_LB_MMU_RAM_L 0xfffec238 - #define DRIVER_DESC "OHCI OMAP driver" struct ohci_omap_priv { @@ -104,61 +93,6 @@ static int omap_ohci_transceiver_power(struct ohci_omap_priv *priv, int on) return 0; } -#ifdef CONFIG_ARCH_OMAP15XX -/* - * OMAP-1510 specific Local Bus clock on/off - */ -static int omap_1510_local_bus_power(int on) -{ - if (on) { - omap_writel((1 << 1) | (1 << 0), OMAP1510_LB_MMU_CTL); - udelay(200); - } else { - omap_writel(0, OMAP1510_LB_MMU_CTL); - } - - return 0; -} - -/* - * OMAP-1510 specific Local Bus initialization - * NOTE: This assumes 32MB memory size in OMAP1510LB_MEMSIZE. - * See also arch/mach-omap/memory.h for __virt_to_dma() and - * __dma_to_virt() which need to match with the physical - * Local Bus address below. - */ -static int omap_1510_local_bus_init(void) -{ - unsigned int tlb; - unsigned long lbaddr, physaddr; - - omap_writel((omap_readl(OMAP1510_LB_CLOCK_DIV) & 0xfffffff8) | 0x4, - OMAP1510_LB_CLOCK_DIV); - - /* Configure the Local Bus MMU table */ - for (tlb = 0; tlb < OMAP1510_LB_MEMSIZE; tlb++) { - lbaddr = tlb * 0x00100000 + OMAP1510_LB_OFFSET; - physaddr = tlb * 0x00100000 + PHYS_OFFSET; - omap_writel((lbaddr & 0x0fffffff) >> 22, OMAP1510_LB_MMU_CAM_H); - omap_writel(((lbaddr & 0x003ffc00) >> 6) | 0xc, - OMAP1510_LB_MMU_CAM_L); - omap_writel(physaddr >> 16, OMAP1510_LB_MMU_RAM_H); - omap_writel((physaddr & 0x0000fc00) | 0x300, OMAP1510_LB_MMU_RAM_L); - omap_writel(tlb << 4, OMAP1510_LB_MMU_LCK); - omap_writel(0x1, OMAP1510_LB_MMU_LD_TLB); - } - - /* Enable the walking table */ - omap_writel(omap_readl(OMAP1510_LB_MMU_CTL) | (1 << 3), OMAP1510_LB_MMU_CTL); - udelay(200); - - return 0; -} -#else -#define omap_1510_local_bus_power(x) {} -#define omap_1510_local_bus_init() {} -#endif - #ifdef CONFIG_USB_OTG static void start_hnp(struct ohci_hcd *ohci) @@ -229,10 +163,8 @@ static int ohci_omap_reset(struct usb_hcd *hcd) omap_ohci_clock_power(priv, 1); - if (cpu_is_omap15xx()) { - omap_1510_local_bus_power(1); - omap_1510_local_bus_init(); - } + if (config->lb_reset) + config->lb_reset(); ret = ohci_setup(hcd); if (ret < 0) diff --git a/include/linux/platform_data/usb-omap1.h b/include/linux/platform_data/usb-omap1.h index 43b5ce139c37..878e572a78bf 100644 --- a/include/linux/platform_data/usb-omap1.h +++ b/include/linux/platform_data/usb-omap1.h @@ -48,6 +48,8 @@ struct omap_usb_config { u32 (*usb2_init)(unsigned nwires, unsigned alt_pingroup); int (*ocpi_enable)(void); + + void (*lb_reset)(void); }; #endif /* __LINUX_USB_OMAP1_H */ -- cgit v1.2.3-71-gd317 From 95a13ee858c9e426e63c97063677736f74af7163 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 1 Oct 2021 16:55:44 +0300 Subject: hyper-v: Replace uuid.h with types.h There is no user of anything in uuid.h in the hyperv.h. Replace it with more appropriate types.h. Fixes: f081bbb3fd03 ("hyper-v: Remove internal types from UAPI header") Reported-by: Greg Kroah-Hartman Signed-off-by: Andy Shevchenko Reviewed-by: Haiyang Zhang Link: https://lore.kernel.org/r/20211001135544.1823-1-andriy.shevchenko@linux.intel.com Signed-off-by: Wei Liu --- include/uapi/linux/hyperv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h index 6135d92e0d47..daf82a230c0e 100644 --- a/include/uapi/linux/hyperv.h +++ b/include/uapi/linux/hyperv.h @@ -26,7 +26,7 @@ #ifndef _UAPI_HYPERV_H #define _UAPI_HYPERV_H -#include +#include /* * Framework version for util services. -- cgit v1.2.3-71-gd317 From 361b57df62de249dc0b2acbf48823662a5001bcd Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Tue, 5 Oct 2021 13:46:32 -0700 Subject: kunit: fix kernel-doc warnings due to mismatched arg names Commit 7122debb4367 ("kunit: introduce kunit_kmalloc_array/kunit_kcalloc() helpers") added new functions but called last arg `flags`, unlike the existing code that used `gfp`. This only is an issue in test.h, test.c still used `gfp`. But the documentation was copy-pasted with the old names, leading to kernel-doc warnings. Do s/flags/gfp to make the names consistent and fix the warnings. Fixes: 7122debb4367 ("kunit: introduce kunit_kmalloc_array/kunit_kcalloc() helpers") Reported-by: Randy Dunlap Signed-off-by: Daniel Latypov Reviewed-by: Randy Dunlap Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- include/kunit/test.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/kunit/test.h b/include/kunit/test.h index 24b40e5c160b..018e776a34b9 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -613,7 +613,7 @@ void kunit_remove_resource(struct kunit *test, struct kunit_resource *res); * and is automatically cleaned up after the test case concludes. See &struct * kunit_resource for more information. */ -void *kunit_kmalloc_array(struct kunit *test, size_t n, size_t size, gfp_t flags); +void *kunit_kmalloc_array(struct kunit *test, size_t n, size_t size, gfp_t gfp); /** * kunit_kmalloc() - Like kmalloc() except the allocation is *test managed*. @@ -657,9 +657,9 @@ static inline void *kunit_kzalloc(struct kunit *test, size_t size, gfp_t gfp) * * See kcalloc() and kunit_kmalloc_array() for more information. */ -static inline void *kunit_kcalloc(struct kunit *test, size_t n, size_t size, gfp_t flags) +static inline void *kunit_kcalloc(struct kunit *test, size_t n, size_t size, gfp_t gfp) { - return kunit_kmalloc_array(test, n, size, flags | __GFP_ZERO); + return kunit_kmalloc_array(test, n, size, gfp | __GFP_ZERO); } void kunit_cleanup(struct kunit *test); -- cgit v1.2.3-71-gd317 From c0f1886de7e173865f1a0fa7680a1c07954a987f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 6 Oct 2021 16:19:40 +0200 Subject: ALSA: hda: intel: Allow repeatedly probing on codec configuration errors It seems that a few recent AMD systems show the codec configuration errors at the early boot, while loading the driver at a later stage works magically. Although the root cause of the error isn't clear, it's certainly not bad to allow retrying the codec probe in such a case if that helps. This patch adds the capability for retrying the probe upon codec probe errors on the certain AMD platforms. The probe_work is changed to a delayed work, and at the secondary call, it'll jump to the codec probing. Note that, not only adding the re-probing, this includes the behavior changes in the codec configuration function. Namely, snd_hda_codec_configure() won't unregister the codec at errors any longer. Instead, its caller, azx_codec_configure() unregisters the codecs with the probe failures *if* any codec has been successfully configured. If all codec probe failed, it doesn't unregister but let it re-probed -- which is the most case we're seeing and this patch tries to improve. Even if the driver doesn't re-probe or give up, it will go to the "free-all" error path, hence the leftover codecs shall be disabled / deleted in anyway. BugLink: https://bugzilla.suse.com/show_bug.cgi?id=1190801 Link: https://lore.kernel.org/r/20211006141940.2897-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/hda_codec.h | 1 + sound/pci/hda/hda_bind.c | 20 +++++++++++--------- sound/pci/hda/hda_codec.c | 1 + sound/pci/hda/hda_controller.c | 24 ++++++++++++++++-------- sound/pci/hda/hda_controller.h | 2 +- sound/pci/hda/hda_intel.c | 29 +++++++++++++++++++++++------ sound/pci/hda/hda_intel.h | 4 +++- 7 files changed, 56 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/sound/hda_codec.h b/include/sound/hda_codec.h index 01570dbda503..0e45963bb767 100644 --- a/include/sound/hda_codec.h +++ b/include/sound/hda_codec.h @@ -224,6 +224,7 @@ struct hda_codec { #endif /* misc flags */ + unsigned int configured:1; /* codec was configured */ unsigned int in_freeing:1; /* being released */ unsigned int registered:1; /* codec was registered */ unsigned int display_power_control:1; /* needs display power */ diff --git a/sound/pci/hda/hda_bind.c b/sound/pci/hda/hda_bind.c index 2523b23389e9..1c8bffc3eec6 100644 --- a/sound/pci/hda/hda_bind.c +++ b/sound/pci/hda/hda_bind.c @@ -298,29 +298,31 @@ int snd_hda_codec_configure(struct hda_codec *codec) { int err; + if (codec->configured) + return 0; + if (is_generic_config(codec)) codec->probe_id = HDA_CODEC_ID_GENERIC; else codec->probe_id = 0; - err = snd_hdac_device_register(&codec->core); - if (err < 0) - return err; + if (!device_is_registered(&codec->core.dev)) { + err = snd_hdac_device_register(&codec->core); + if (err < 0) + return err; + } if (!codec->preset) codec_bind_module(codec); if (!codec->preset) { err = codec_bind_generic(codec); if (err < 0) { - codec_err(codec, "Unable to bind the codec\n"); - goto error; + codec_dbg(codec, "Unable to bind the codec\n"); + return err; } } + codec->configured = 1; return 0; - - error: - snd_hdac_device_unregister(&codec->core); - return err; } EXPORT_SYMBOL_GPL(snd_hda_codec_configure); diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index a9ebefd60cf6..0c4a337c9fc0 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -791,6 +791,7 @@ void snd_hda_codec_cleanup_for_unbind(struct hda_codec *codec) snd_array_free(&codec->nids); remove_conn_list(codec); snd_hdac_regmap_exit(&codec->core); + codec->configured = 0; } EXPORT_SYMBOL_GPL(snd_hda_codec_cleanup_for_unbind); diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c index 7cd452831fd3..930ae4002a81 100644 --- a/sound/pci/hda/hda_controller.c +++ b/sound/pci/hda/hda_controller.c @@ -25,6 +25,7 @@ #include #include #include "hda_controller.h" +#include "hda_local.h" #define CREATE_TRACE_POINTS #include "hda_controller_trace.h" @@ -1248,17 +1249,24 @@ EXPORT_SYMBOL_GPL(azx_probe_codecs); int azx_codec_configure(struct azx *chip) { struct hda_codec *codec, *next; + int success = 0; - /* use _safe version here since snd_hda_codec_configure() deregisters - * the device upon error and deletes itself from the bus list. - */ - list_for_each_codec_safe(codec, next, &chip->bus) { - snd_hda_codec_configure(codec); + list_for_each_codec(codec, &chip->bus) { + if (!snd_hda_codec_configure(codec)) + success++; } - if (!azx_bus(chip)->num_codecs) - return -ENODEV; - return 0; + if (success) { + /* unregister failed codecs if any codec has been probed */ + list_for_each_codec_safe(codec, next, &chip->bus) { + if (!codec->configured) { + codec_err(codec, "Unable to configure, disabling\n"); + snd_hdac_device_unregister(&codec->core); + } + } + } + + return success ? 0 : -ENODEV; } EXPORT_SYMBOL_GPL(azx_codec_configure); diff --git a/sound/pci/hda/hda_controller.h b/sound/pci/hda/hda_controller.h index 3062f87380b1..f5bf295eb830 100644 --- a/sound/pci/hda/hda_controller.h +++ b/sound/pci/hda/hda_controller.h @@ -41,7 +41,7 @@ /* 24 unused */ #define AZX_DCAPS_COUNT_LPIB_DELAY (1 << 25) /* Take LPIB as delay */ #define AZX_DCAPS_PM_RUNTIME (1 << 26) /* runtime PM support */ -/* 27 unused */ +#define AZX_DCAPS_RETRY_PROBE (1 << 27) /* retry probe if no codec is configured */ #define AZX_DCAPS_CORBRP_SELF_CLEAR (1 << 28) /* CORBRP clears itself after reset */ #define AZX_DCAPS_NO_MSI64 (1 << 29) /* Stick to 32-bit MSIs */ #define AZX_DCAPS_SEPARATE_STREAM_TAG (1 << 30) /* capture and playback use separate stream tag */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 47777439961c..4d22e7adeee8 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -307,7 +307,8 @@ enum { /* quirks for AMD SB */ #define AZX_DCAPS_PRESET_AMD_SB \ (AZX_DCAPS_NO_TCSEL | AZX_DCAPS_AMD_WORKAROUND |\ - AZX_DCAPS_SNOOP_TYPE(ATI) | AZX_DCAPS_PM_RUNTIME) + AZX_DCAPS_SNOOP_TYPE(ATI) | AZX_DCAPS_PM_RUNTIME |\ + AZX_DCAPS_RETRY_PROBE) /* quirks for Nvidia */ #define AZX_DCAPS_PRESET_NVIDIA \ @@ -1723,7 +1724,7 @@ static void azx_check_snoop_available(struct azx *chip) static void azx_probe_work(struct work_struct *work) { - struct hda_intel *hda = container_of(work, struct hda_intel, probe_work); + struct hda_intel *hda = container_of(work, struct hda_intel, probe_work.work); azx_probe_continue(&hda->chip); } @@ -1828,7 +1829,7 @@ static int azx_create(struct snd_card *card, struct pci_dev *pci, } /* continue probing in work context as may trigger request module */ - INIT_WORK(&hda->probe_work, azx_probe_work); + INIT_DELAYED_WORK(&hda->probe_work, azx_probe_work); *rchip = chip; @@ -2142,7 +2143,7 @@ static int azx_probe(struct pci_dev *pci, #endif if (schedule_probe) - schedule_work(&hda->probe_work); + schedule_delayed_work(&hda->probe_work, 0); dev++; if (chip->disabled) @@ -2228,6 +2229,11 @@ static int azx_probe_continue(struct azx *chip) int dev = chip->dev_index; int err; + if (chip->disabled || hda->init_failed) + return -EIO; + if (hda->probe_retry) + goto probe_retry; + to_hda_bus(bus)->bus_probing = 1; hda->probe_continued = 1; @@ -2289,10 +2295,20 @@ static int azx_probe_continue(struct azx *chip) #endif } #endif + + probe_retry: if (bus->codec_mask && !(probe_only[dev] & 1)) { err = azx_codec_configure(chip); - if (err < 0) + if (err) { + if ((chip->driver_caps & AZX_DCAPS_RETRY_PROBE) && + ++hda->probe_retry < 60) { + schedule_delayed_work(&hda->probe_work, + msecs_to_jiffies(1000)); + return 0; /* keep things up */ + } + dev_err(chip->card->dev, "Cannot probe codecs, giving up\n"); goto out_free; + } } err = snd_card_register(chip->card); @@ -2322,6 +2338,7 @@ out_free: display_power(chip, false); complete_all(&hda->probe_wait); to_hda_bus(bus)->bus_probing = 0; + hda->probe_retry = 0; return 0; } @@ -2347,7 +2364,7 @@ static void azx_remove(struct pci_dev *pci) * device during cancel_work_sync() call. */ device_unlock(&pci->dev); - cancel_work_sync(&hda->probe_work); + cancel_delayed_work_sync(&hda->probe_work); device_lock(&pci->dev); snd_card_free(card); diff --git a/sound/pci/hda/hda_intel.h b/sound/pci/hda/hda_intel.h index 3fb119f09040..0f39418f9328 100644 --- a/sound/pci/hda/hda_intel.h +++ b/sound/pci/hda/hda_intel.h @@ -14,7 +14,7 @@ struct hda_intel { /* sync probing */ struct completion probe_wait; - struct work_struct probe_work; + struct delayed_work probe_work; /* card list (for power_save trigger) */ struct list_head list; @@ -30,6 +30,8 @@ struct hda_intel { unsigned int freed:1; /* resources already released */ bool need_i915_power:1; /* the hda controller needs i915 power */ + + int probe_retry; /* being probe-retry */ }; #endif -- cgit v1.2.3-71-gd317 From 424953cf3c6657f1e67e1a2c5d6e3bb518ea4e9a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 Sep 2021 09:50:27 +0200 Subject: qcom_scm: hide Kconfig symbol Now that SCM can be a loadable module, we have to add another dependency to avoid link failures when ipa or adreno-gpu are built-in: aarch64-linux-ld: drivers/net/ipa/ipa_main.o: in function `ipa_probe': ipa_main.c:(.text+0xfc4): undefined reference to `qcom_scm_is_available' ld.lld: error: undefined symbol: qcom_scm_is_available >>> referenced by adreno_gpu.c >>> gpu/drm/msm/adreno/adreno_gpu.o:(adreno_zap_shader_load) in archive drivers/built-in.a This can happen when CONFIG_ARCH_QCOM is disabled and we don't select QCOM_MDT_LOADER, but some other module selects QCOM_SCM. Ideally we'd use a similar dependency here to what we have for QCOM_RPROC_COMMON, but that causes dependency loops from other things selecting QCOM_SCM. This appears to be an endless problem, so try something different this time: - CONFIG_QCOM_SCM becomes a hidden symbol that nothing 'depends on' but that is simply selected by all of its users - All the stubs in include/linux/qcom_scm.h can go away - arm-smccc.h needs to provide a stub for __arm_smccc_smc() to allow compile-testing QCOM_SCM on all architectures. - To avoid a circular dependency chain involving RESET_CONTROLLER and PINCTRL_SUNXI, drop the 'select RESET_CONTROLLER' statement. According to my testing this still builds fine, and the QCOM platform selects this symbol already. Acked-by: Kalle Valo Acked-by: Alex Elder Signed-off-by: Arnd Bergmann --- drivers/firmware/Kconfig | 5 +-- drivers/gpu/drm/msm/Kconfig | 4 +- drivers/iommu/Kconfig | 3 +- drivers/iommu/arm/arm-smmu/Makefile | 3 +- drivers/iommu/arm/arm-smmu/arm-smmu-impl.c | 3 +- drivers/media/platform/Kconfig | 2 +- drivers/mmc/host/Kconfig | 2 +- drivers/net/ipa/Kconfig | 1 + drivers/net/wireless/ath/ath10k/Kconfig | 2 +- drivers/pinctrl/qcom/Kconfig | 3 +- include/linux/arm-smccc.h | 10 +++++ include/linux/qcom_scm.h | 71 ------------------------------ 12 files changed, 24 insertions(+), 85 deletions(-) (limited to 'include') diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index 220a58cf0a44..cda7d7162cbb 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -203,10 +203,7 @@ config INTEL_STRATIX10_RSU Say Y here if you want Intel RSU support. config QCOM_SCM - tristate "Qcom SCM driver" - depends on ARM || ARM64 - depends on HAVE_ARM_SMCCC - select RESET_CONTROLLER + tristate config QCOM_SCM_DOWNLOAD_MODE_DEFAULT bool "Qualcomm download mode enabled by default" diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index e9c6af78b1d7..3ddf739a6f9b 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -17,7 +17,7 @@ config DRM_MSM select DRM_SCHED select SHMEM select TMPFS - select QCOM_SCM if ARCH_QCOM + select QCOM_SCM select WANT_DEV_COREDUMP select SND_SOC_HDMI_CODEC if SND_SOC select SYNC_FILE @@ -55,7 +55,7 @@ config DRM_MSM_GPU_SUDO config DRM_MSM_HDMI_HDCP bool "Enable HDMI HDCP support in MSM DRM driver" - depends on DRM_MSM && QCOM_SCM + depends on DRM_MSM default y help Choose this option to enable HDCP state machine diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 124c41adeca1..c5c71b7ab7e8 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -308,7 +308,6 @@ config APPLE_DART config ARM_SMMU tristate "ARM Ltd. System MMU (SMMU) Support" depends on ARM64 || ARM || (COMPILE_TEST && !GENERIC_ATOMIC64) - depends on QCOM_SCM || !QCOM_SCM #if QCOM_SCM=m this can't be =y select IOMMU_API select IOMMU_IO_PGTABLE_LPAE select ARM_DMA_USE_IOMMU if ARM @@ -438,7 +437,7 @@ config QCOM_IOMMU # Note: iommu drivers cannot (yet?) be built as modules bool "Qualcomm IOMMU Support" depends on ARCH_QCOM || (COMPILE_TEST && !GENERIC_ATOMIC64) - depends on QCOM_SCM=y + select QCOM_SCM select IOMMU_API select IOMMU_IO_PGTABLE_LPAE select ARM_DMA_USE_IOMMU diff --git a/drivers/iommu/arm/arm-smmu/Makefile b/drivers/iommu/arm/arm-smmu/Makefile index e240a7bcf310..b0cc01aa20c9 100644 --- a/drivers/iommu/arm/arm-smmu/Makefile +++ b/drivers/iommu/arm/arm-smmu/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o obj-$(CONFIG_ARM_SMMU) += arm_smmu.o -arm_smmu-objs += arm-smmu.o arm-smmu-impl.o arm-smmu-nvidia.o arm-smmu-qcom.o +arm_smmu-objs += arm-smmu.o arm-smmu-impl.o arm-smmu-nvidia.o +arm_smmu-$(CONFIG_ARM_SMMU_QCOM) += arm-smmu-qcom.o diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c index 9f465e146799..2c25cce38060 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c @@ -215,7 +215,8 @@ struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu) of_device_is_compatible(np, "nvidia,tegra186-smmu")) return nvidia_smmu_impl_init(smmu); - smmu = qcom_smmu_impl_init(smmu); + if (IS_ENABLED(CONFIG_ARM_SMMU_QCOM)) + smmu = qcom_smmu_impl_init(smmu); if (of_device_is_compatible(np, "marvell,ap806-smmu-500")) smmu->impl = &mrvl_mmu500_impl; diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig index 157c924686e4..80321e03809a 100644 --- a/drivers/media/platform/Kconfig +++ b/drivers/media/platform/Kconfig @@ -565,7 +565,7 @@ config VIDEO_QCOM_VENUS depends on VIDEO_DEV && VIDEO_V4L2 && QCOM_SMEM depends on (ARCH_QCOM && IOMMU_DMA) || COMPILE_TEST select QCOM_MDT_LOADER if ARCH_QCOM - select QCOM_SCM if ARCH_QCOM + select QCOM_SCM select VIDEOBUF2_DMA_CONTIG select V4L2_MEM2MEM_DEV help diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 71313961cc54..95b3511b0560 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -547,7 +547,7 @@ config MMC_SDHCI_MSM depends on MMC_SDHCI_PLTFM select MMC_SDHCI_IO_ACCESSORS select MMC_CQHCI - select QCOM_SCM if MMC_CRYPTO && ARCH_QCOM + select QCOM_SCM if MMC_CRYPTO help This selects the Secure Digital Host Controller Interface (SDHCI) support present in Qualcomm SOCs. The controller supports diff --git a/drivers/net/ipa/Kconfig b/drivers/net/ipa/Kconfig index 8f99cfa14680..d037682fb7ad 100644 --- a/drivers/net/ipa/Kconfig +++ b/drivers/net/ipa/Kconfig @@ -4,6 +4,7 @@ config QCOM_IPA depends on ARCH_QCOM || COMPILE_TEST depends on QCOM_RPROC_COMMON || (QCOM_RPROC_COMMON=n && COMPILE_TEST) select QCOM_MDT_LOADER if ARCH_QCOM + select QCOM_SCM select QCOM_QMI_HELPERS help Choose Y or M here to include support for the Qualcomm diff --git a/drivers/net/wireless/ath/ath10k/Kconfig b/drivers/net/wireless/ath/ath10k/Kconfig index 741289e385d5..ca007b800f75 100644 --- a/drivers/net/wireless/ath/ath10k/Kconfig +++ b/drivers/net/wireless/ath/ath10k/Kconfig @@ -44,7 +44,7 @@ config ATH10K_SNOC tristate "Qualcomm ath10k SNOC support" depends on ATH10K depends on ARCH_QCOM || COMPILE_TEST - depends on QCOM_SCM || !QCOM_SCM #if QCOM_SCM=m this can't be =y + select QCOM_SCM select QCOM_QMI_HELPERS help This module adds support for integrated WCN3990 chip connected diff --git a/drivers/pinctrl/qcom/Kconfig b/drivers/pinctrl/qcom/Kconfig index 32ea2a8ec02b..5ff4207df66e 100644 --- a/drivers/pinctrl/qcom/Kconfig +++ b/drivers/pinctrl/qcom/Kconfig @@ -3,7 +3,8 @@ if (ARCH_QCOM || COMPILE_TEST) config PINCTRL_MSM tristate "Qualcomm core pin controller driver" - depends on GPIOLIB && (QCOM_SCM || !QCOM_SCM) #if QCOM_SCM=m this can't be =y + depends on GPIOLIB + select QCOM_SCM select PINMUX select PINCONF select GENERIC_PINCONF diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 7d1cabe15262..63ccb5252190 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -321,10 +321,20 @@ asmlinkage unsigned long __arm_smccc_sve_check(unsigned long x0); * from register 0 to 3 on return from the SMC instruction. An optional * quirk structure provides vendor specific behavior. */ +#ifdef CONFIG_HAVE_ARM_SMCCC asmlinkage void __arm_smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3, unsigned long a4, unsigned long a5, unsigned long a6, unsigned long a7, struct arm_smccc_res *res, struct arm_smccc_quirk *quirk); +#else +static inline void __arm_smccc_smc(unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, unsigned long a4, + unsigned long a5, unsigned long a6, unsigned long a7, + struct arm_smccc_res *res, struct arm_smccc_quirk *quirk) +{ + *res = (struct arm_smccc_res){}; +} +#endif /** * __arm_smccc_hvc() - make HVC calls diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index c0475d1c9885..81cad9e1e412 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -61,7 +61,6 @@ enum qcom_scm_ice_cipher { #define QCOM_SCM_PERM_RW (QCOM_SCM_PERM_READ | QCOM_SCM_PERM_WRITE) #define QCOM_SCM_PERM_RWX (QCOM_SCM_PERM_RW | QCOM_SCM_PERM_EXEC) -#if IS_ENABLED(CONFIG_QCOM_SCM) extern bool qcom_scm_is_available(void); extern int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus); @@ -115,74 +114,4 @@ extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, extern int qcom_scm_lmh_profile_change(u32 profile_id); extern bool qcom_scm_lmh_dcvsh_available(void); -#else - -#include - -static inline bool qcom_scm_is_available(void) { return false; } - -static inline int qcom_scm_set_cold_boot_addr(void *entry, - const cpumask_t *cpus) { return -ENODEV; } -static inline int qcom_scm_set_warm_boot_addr(void *entry, - const cpumask_t *cpus) { return -ENODEV; } -static inline void qcom_scm_cpu_power_down(u32 flags) {} -static inline u32 qcom_scm_set_remote_state(u32 state,u32 id) - { return -ENODEV; } - -static inline int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, - size_t size) { return -ENODEV; } -static inline int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, - phys_addr_t size) { return -ENODEV; } -static inline int qcom_scm_pas_auth_and_reset(u32 peripheral) - { return -ENODEV; } -static inline int qcom_scm_pas_shutdown(u32 peripheral) { return -ENODEV; } -static inline bool qcom_scm_pas_supported(u32 peripheral) { return false; } - -static inline int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val) - { return -ENODEV; } -static inline int qcom_scm_io_writel(phys_addr_t addr, unsigned int val) - { return -ENODEV; } - -static inline bool qcom_scm_restore_sec_cfg_available(void) { return false; } -static inline int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare) - { return -ENODEV; } -static inline int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size) - { return -ENODEV; } -static inline int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare) - { return -ENODEV; } -extern inline int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size, - u32 cp_nonpixel_start, - u32 cp_nonpixel_size) - { return -ENODEV; } -static inline int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, - unsigned int *src, const struct qcom_scm_vmperm *newvm, - unsigned int dest_cnt) { return -ENODEV; } - -static inline bool qcom_scm_ocmem_lock_available(void) { return false; } -static inline int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, - u32 size, u32 mode) { return -ENODEV; } -static inline int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id, - u32 offset, u32 size) { return -ENODEV; } - -static inline bool qcom_scm_ice_available(void) { return false; } -static inline int qcom_scm_ice_invalidate_key(u32 index) { return -ENODEV; } -static inline int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size, - enum qcom_scm_ice_cipher cipher, - u32 data_unit_size) { return -ENODEV; } - -static inline bool qcom_scm_hdcp_available(void) { return false; } -static inline int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, - u32 *resp) { return -ENODEV; } - -static inline int qcom_scm_qsmmu500_wait_safe_toggle(bool en) - { return -ENODEV; } - -static inline int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, - u64 limit_node, u32 node_id, u64 version) - { return -ENODEV; } - -static inline int qcom_scm_lmh_profile_change(u32 profile_id) { return -ENODEV; } - -static inline bool qcom_scm_lmh_dcvsh_available(void) { return -ENODEV; } -#endif #endif -- cgit v1.2.3-71-gd317 From 2fbc349911e45d4ea5187b608c8d58db66496260 Mon Sep 17 00:00:00 2001 From: Adam Borowski Date: Sun, 12 Sep 2021 23:26:06 +0200 Subject: asm-generic/io.h: give stub iounmap() on !MMU same prototype as elsewhere It made -Werror sad. Signed-off-by: Adam Borowski Signed-off-by: Arnd Bergmann --- include/asm-generic/io.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index e93375c710b9..dea1d36a6402 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -957,7 +957,7 @@ static inline void __iomem *ioremap(phys_addr_t offset, size_t size) #ifndef iounmap #define iounmap iounmap -static inline void iounmap(void __iomem *addr) +static inline void iounmap(volatile void __iomem *addr) { } #endif -- cgit v1.2.3-71-gd317 From 5bded8259ee3815a91791462dfb3312480779c3d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 7 Oct 2021 19:47:11 +0300 Subject: net: dsa: mv88e6xxx: isolate the ATU databases of standalone and bridged ports Similar to commit 6087175b7991 ("net: dsa: mt7530: use independent VLAN learning on VLAN-unaware bridges"), software forwarding between an unoffloaded LAG port (a bonding interface with an unsupported policy) and a mv88e6xxx user port directly under a bridge is broken. We adopt the same strategy, which is to make the standalone ports not find any ATU entry learned on a bridge port. Theory: the mv88e6xxx ATU is looked up by FID and MAC address. There are as many FIDs as VIDs (4096). The FID is derived from the VID when possible (the VTU maps a VID to a FID), with a fallback to the port based default FID value when not (802.1Q Mode is disabled on the port, or the classified VID isn't present in the VTU). The mv88e6xxx driver makes the following use of FIDs and VIDs: - the port's DefaultVID (to which untagged & pvid-tagged packets get classified) is 0 and is absent from the VTU, so this kind of packets is processed in FID 0, the default FID assigned by mv88e6xxx_setup_port. - every time a bridge VLAN is created, mv88e6xxx_port_vlan_join() -> mv88e6xxx_atu_new() associates a FID with that VID which increases linearly starting from 1. Like this: bridge vlan add dev lan0 vid 100 # FID 1 bridge vlan add dev lan1 vid 100 # still FID 1 bridge vlan add dev lan2 vid 1024 # FID 2 The FID allocation made by the driver is sub-optimal for the following reasons: (a) A standalone port has a DefaultPVID of 0 and a default FID of 0 too. A VLAN-unaware bridged port has a DefaultPVID of 0 and a default FID of 0 too. The difference is that the bridged ports may learn ATU entries, while the standalone port has the requirement that it must not, and must not find them either. Standalone ports must not use the same FID as ports belonging to a bridge. All standalone ports can use the same FID, since the ATU will never have an entry in that FID. (b) Multiple VLAN-unaware bridges will all use a DefaultPVID of 0 and a default FID of 0 on all their ports. The FDBs will not be isolated between these bridges. Every VLAN-unaware bridge must use the same FID on all its ports, different from the FID of other bridge ports. (c) Each bridge VLAN uses a unique FID which is useful for Independent VLAN Learning, but the same VLAN ID on multiple VLAN-aware bridges will result in the same FID being used by mv88e6xxx_atu_new(). The correct behavior is for VLAN 1 in br0 to have a different FID compared to VLAN 1 in br1. This patch cannot fix all the above. Traditionally the DSA framework did not care about this, and the reality is that DSA core involvement is needed for the aforementioned issues to be solved. The only thing we can solve here is an issue which does not require API changes, and that is issue (a), aka use a different FID for standalone ports vs ports under VLAN-unaware bridges. The first step is deciding what VID and FID to use for standalone ports, and what VID and FID for bridged ports. The 0/0 pair for standalone ports is what they used up till now, let's keep using that. For bridged ports, there are 2 cases: - VLAN-aware ports will never end up using the port default FID, because packets will always be classified to a VID in the VTU or dropped otherwise. The FID is the one associated with the VID in the VTU. - On VLAN-unaware ports, we _could_ leave their DefaultVID (pvid) at zero (just as in the case of standalone ports), and just change the port's default FID from 0 to a different number (say 1). However, Tobias points out that there is one more requirement to cater to: cross-chip bridging. The Marvell DSA header does not carry the FID in it, only the VID. So once a packet crosses a DSA link, if it has a VID of zero it will get classified to the default FID of that cascade port. Relying on a port default FID for upstream cascade ports results in contradictions: a default FID of 0 breaks ATU isolation of bridged ports on the downstream switch, a default FID of 1 breaks standalone ports on the downstream switch. So not only must standalone ports have different FIDs compared to bridged ports, they must also have different DefaultVID values. IEEE 802.1Q defines two reserved VID values: 0 and 4095. So we simply choose 4095 as the DefaultVID of ports belonging to VLAN-unaware bridges, and VID 4095 maps to FID 1. For the xmit operation to look up the same ATU database, we need to put VID 4095 in DSA tags sent to ports belonging to VLAN-unaware bridges too. All shared ports are configured to map this VID to the bridging FID, because they are members of that VLAN in the VTU. Shared ports don't need to have 802.1QMode enabled in any way, they always parse the VID from the DSA header, they don't need to look at the 802.1Q header. We install VID 4095 to the VTU in mv88e6xxx_setup_port(), with the mention that mv88e6xxx_vtu_setup() which was located right below that call was flushing the VTU so those entries wouldn't be preserved. So we need to relocate the VTU flushing prior to the port initialization during ->setup(). Also note that this is why it is safe to assume that VID 4095 will get associated with FID 1: the user ports haven't been created, so there is no avenue for the user to create a bridge VLAN which could otherwise race with the creation of another FID which would otherwise use up the non-reserved FID value of 1. [ Currently mv88e6xxx_port_vlan_join() doesn't have the option of specifying a preferred FID, it always calls mv88e6xxx_atu_new(). ] mv88e6xxx_port_db_load_purge() is the function to access the ATU for FDB/MDB entries, and it used to determine the FID to use for VLAN-unaware FDB entries (VID=0) using mv88e6xxx_port_get_fid(). But the driver only called mv88e6xxx_port_set_fid() once, during probe, so no surprises, the port FID was always 0, the call to get_fid() was redundant. As much as I would have wanted to not touch that code, the logic is broken when we add a new FID which is not the port-based default. Now the port-based default FID only corresponds to standalone ports, and FDB/MDB entries belong to the bridging service. So while in the future, when the DSA API will support FDB isolation, we will have to figure out the FID based on the bridge number, for now there's a single bridging FID, so hardcode that. Lastly, the tagger needs to check, when it is transmitting a VLAN untagged skb, whether it is sending it towards a bridged or a standalone port. When we see it is bridged we assume the bridge is VLAN-unaware. Not because it cannot be VLAN-aware but: - if we are transmitting from a VLAN-aware bridge we are likely doing so using TX forwarding offload. That code path guarantees that skbs have a vlan hwaccel tag in them, so we would not enter the "else" branch of the "if (skb->protocol == htons(ETH_P_8021Q))" condition. - if we are transmitting on behalf of a VLAN-aware bridge but with no TX forwarding offload (no PVT support, out of space in the PVT, whatever), we would indeed be transmitting with VLAN 4095 instead of the bridge device's pvid. However we would be injecting a "From CPU" frame, and the switch won't learn from that - it only learns from "Forward" frames. So it is inconsequential for address learning. And VLAN 4095 is absolutely enough for the frame to exit the switch, since we never remove that VLAN from any port. Fixes: 57e661aae6a8 ("net: dsa: mv88e6xxx: Link aggregation support") Reported-by: Tobias Waldekranz Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + drivers/net/dsa/mv88e6xxx/chip.c | 67 ++++++++++++++++++++++++++++++++-------- drivers/net/dsa/mv88e6xxx/chip.h | 3 ++ include/linux/dsa/mv88e6xxx.h | 13 ++++++++ net/dsa/tag_dsa.c | 12 +++++-- 5 files changed, 80 insertions(+), 16 deletions(-) create mode 100644 include/linux/dsa/mv88e6xxx.h (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index a4a0c2baaf27..17f652b2f653 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11153,6 +11153,7 @@ S: Maintained F: Documentation/devicetree/bindings/net/dsa/marvell.txt F: Documentation/networking/devlink/mv88e6xxx.rst F: drivers/net/dsa/mv88e6xxx/ +F: include/linux/dsa/mv88e6xxx.h F: include/linux/platform_data/mv88e6xxx.h MARVELL ARMADA 3700 PHY DRIVERS diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index d672112afffd..d7b29792732b 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -1681,13 +1682,17 @@ static int mv88e6xxx_port_commit_pvid(struct mv88e6xxx_chip *chip, int port) { struct dsa_port *dp = dsa_to_port(chip->ds, port); struct mv88e6xxx_port *p = &chip->ports[port]; + u16 pvid = MV88E6XXX_VID_STANDALONE; bool drop_untagged = false; - u16 pvid = 0; int err; - if (dp->bridge_dev && br_vlan_enabled(dp->bridge_dev)) { - pvid = p->bridge_pvid.vid; - drop_untagged = !p->bridge_pvid.valid; + if (dp->bridge_dev) { + if (br_vlan_enabled(dp->bridge_dev)) { + pvid = p->bridge_pvid.vid; + drop_untagged = !p->bridge_pvid.valid; + } else { + pvid = MV88E6XXX_VID_BRIDGED; + } } err = mv88e6xxx_port_set_pvid(chip, port, pvid); @@ -1754,11 +1759,15 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, u16 fid; int err; - /* Null VLAN ID corresponds to the port private database */ + /* Ports have two private address databases: one for when the port is + * standalone and one for when the port is under a bridge and the + * 802.1Q mode is disabled. When the port is standalone, DSA wants its + * address database to remain 100% empty, so we never load an ATU entry + * into a standalone port's database. Therefore, translate the null + * VLAN ID into the port's database used for VLAN-unaware bridging. + */ if (vid == 0) { - err = mv88e6xxx_port_get_fid(chip, port, &fid); - if (err) - return err; + fid = MV88E6XXX_FID_BRIDGED; } else { err = mv88e6xxx_vtu_get(chip, vid, &vlan); if (err) @@ -2434,7 +2443,16 @@ static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, int err; mv88e6xxx_reg_lock(chip); + err = mv88e6xxx_bridge_map(chip, br); + if (err) + goto unlock; + + err = mv88e6xxx_port_commit_pvid(chip, port); + if (err) + goto unlock; + +unlock: mv88e6xxx_reg_unlock(chip); return err; @@ -2444,11 +2462,20 @@ static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port, struct net_device *br) { struct mv88e6xxx_chip *chip = ds->priv; + int err; mv88e6xxx_reg_lock(chip); + if (mv88e6xxx_bridge_map(chip, br) || mv88e6xxx_port_vlan_map(chip, port)) dev_err(ds->dev, "failed to remap in-chip Port VLAN\n"); + + err = mv88e6xxx_port_commit_pvid(chip, port); + if (err) + dev_err(ds->dev, + "port %d failed to restore standalone pvid: %pe\n", + port, ERR_PTR(err)); + mv88e6xxx_reg_unlock(chip); } @@ -2894,6 +2921,20 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) if (err) return err; + /* Associate MV88E6XXX_VID_BRIDGED with MV88E6XXX_FID_BRIDGED in the + * ATU by virtue of the fact that mv88e6xxx_atu_new() will pick it as + * the first free FID after MV88E6XXX_FID_STANDALONE. This will be used + * as the private PVID on ports under a VLAN-unaware bridge. + * Shared (DSA and CPU) ports must also be members of it, to translate + * the VID from the DSA tag into MV88E6XXX_FID_BRIDGED, instead of + * relying on their port default FID. + */ + err = mv88e6xxx_port_vlan_join(chip, port, MV88E6XXX_VID_BRIDGED, + MV88E6XXX_G1_VTU_DATA_MEMBER_TAG_UNTAGGED, + false); + if (err) + return err; + if (chip->info->ops->port_set_jumbo_size) { err = chip->info->ops->port_set_jumbo_size(chip, port, 10218); if (err) @@ -2966,7 +3007,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) * database, and allow bidirectional communication between the * CPU and DSA port(s), and the other ports. */ - err = mv88e6xxx_port_set_fid(chip, port, 0); + err = mv88e6xxx_port_set_fid(chip, port, MV88E6XXX_FID_STANDALONE); if (err) return err; @@ -3156,6 +3197,10 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) } } + err = mv88e6xxx_vtu_setup(chip); + if (err) + goto unlock; + /* Setup Switch Port Registers */ for (i = 0; i < mv88e6xxx_num_ports(chip); i++) { if (dsa_is_unused_port(ds, i)) @@ -3185,10 +3230,6 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) if (err) goto unlock; - err = mv88e6xxx_vtu_setup(chip); - if (err) - goto unlock; - err = mv88e6xxx_pvt_setup(chip); if (err) goto unlock; diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index 33d067e8396d..8271b8aa7b71 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -21,6 +21,9 @@ #define EDSA_HLEN 8 #define MV88E6XXX_N_FID 4096 +#define MV88E6XXX_FID_STANDALONE 0 +#define MV88E6XXX_FID_BRIDGED 1 + /* PVT limits for 4-bit port and 5-bit switch */ #define MV88E6XXX_MAX_PVT_SWITCHES 32 #define MV88E6XXX_MAX_PVT_PORTS 16 diff --git a/include/linux/dsa/mv88e6xxx.h b/include/linux/dsa/mv88e6xxx.h new file mode 100644 index 000000000000..8c3d45eca46b --- /dev/null +++ b/include/linux/dsa/mv88e6xxx.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright 2021 NXP + */ + +#ifndef _NET_DSA_TAG_MV88E6XXX_H +#define _NET_DSA_TAG_MV88E6XXX_H + +#include + +#define MV88E6XXX_VID_STANDALONE 0 +#define MV88E6XXX_VID_BRIDGED (VLAN_N_VID - 1) + +#endif diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 68d5ddc3ef35..b3da4b2ea11c 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -45,6 +45,7 @@ * 6 6 2 2 4 2 N */ +#include #include #include #include @@ -164,16 +165,21 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev, dsa_header[2] &= ~0x10; } } else { + struct net_device *br = dp->bridge_dev; + u16 vid; + + vid = br ? MV88E6XXX_VID_BRIDGED : MV88E6XXX_VID_STANDALONE; + skb_push(skb, DSA_HLEN + extra); dsa_alloc_etype_header(skb, DSA_HLEN + extra); - /* Construct untagged DSA tag. */ + /* Construct DSA header from untagged frame. */ dsa_header = dsa_etype_header_pos_tx(skb) + extra; dsa_header[0] = (cmd << 6) | tag_dev; dsa_header[1] = tag_port << 3; - dsa_header[2] = 0; - dsa_header[3] = 0; + dsa_header[2] = vid >> 8; + dsa_header[3] = vid & 0xff; } return skb; -- cgit v1.2.3-71-gd317 From 0bc73ad46a76ed6ece4dcacb28858e7b38561e1c Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Sun, 26 Sep 2021 17:55:41 +0300 Subject: net/mlx5e: Mutually exclude RX-FCS and RX-port-timestamp Due to current HW arch limitations, RX-FCS (scattering FCS frame field to software) and RX-port-timestamp (improved timestamp accuracy on the receive side) can't work together. RX-port-timestamp is not controlled by the user and it is enabled by default when supported by the HW/FW. This patch sets RX-port-timestamp opposite to RX-FCS configuration. Fixes: 102722fc6832 ("net/mlx5e: Add support for RXFCS feature flag") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 57 +++++++++++++++++++++-- include/linux/mlx5/mlx5_ifc.h | 10 +++- 2 files changed, 60 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 336aa07313da..09c8b71b186c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3325,20 +3325,67 @@ static int set_feature_rx_all(struct net_device *netdev, bool enable) return mlx5_set_port_fcs(mdev, !enable); } +static int mlx5e_set_rx_port_ts(struct mlx5_core_dev *mdev, bool enable) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {}; + bool supported, curr_state; + int err; + + if (!MLX5_CAP_GEN(mdev, ports_check)) + return 0; + + err = mlx5_query_ports_check(mdev, in, sizeof(in)); + if (err) + return err; + + supported = MLX5_GET(pcmr_reg, in, rx_ts_over_crc_cap); + curr_state = MLX5_GET(pcmr_reg, in, rx_ts_over_crc); + + if (!supported || enable == curr_state) + return 0; + + MLX5_SET(pcmr_reg, in, local_port, 1); + MLX5_SET(pcmr_reg, in, rx_ts_over_crc, enable); + + return mlx5_set_ports_check(mdev, in, sizeof(in)); +} + static int set_feature_rx_fcs(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_channels *chs = &priv->channels; + struct mlx5_core_dev *mdev = priv->mdev; int err; mutex_lock(&priv->state_lock); - priv->channels.params.scatter_fcs_en = enable; - err = mlx5e_modify_channels_scatter_fcs(&priv->channels, enable); - if (err) - priv->channels.params.scatter_fcs_en = !enable; + if (enable) { + err = mlx5e_set_rx_port_ts(mdev, false); + if (err) + goto out; - mutex_unlock(&priv->state_lock); + chs->params.scatter_fcs_en = true; + err = mlx5e_modify_channels_scatter_fcs(chs, true); + if (err) { + chs->params.scatter_fcs_en = false; + mlx5e_set_rx_port_ts(mdev, true); + } + } else { + chs->params.scatter_fcs_en = false; + err = mlx5e_modify_channels_scatter_fcs(chs, false); + if (err) { + chs->params.scatter_fcs_en = true; + goto out; + } + err = mlx5e_set_rx_port_ts(mdev, true); + if (err) { + mlx5_core_warn(mdev, "Failed to set RX port timestamp %d\n", err); + err = 0; + } + } +out: + mutex_unlock(&priv->state_lock); return err; } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f3638d09ba77..993204a6c1a1 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9475,16 +9475,22 @@ struct mlx5_ifc_pcmr_reg_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; u8 reserved_at_10[0x10]; + u8 entropy_force_cap[0x1]; u8 entropy_calc_cap[0x1]; u8 entropy_gre_calc_cap[0x1]; - u8 reserved_at_23[0x1b]; + u8 reserved_at_23[0xf]; + u8 rx_ts_over_crc_cap[0x1]; + u8 reserved_at_33[0xb]; u8 fcs_cap[0x1]; u8 reserved_at_3f[0x1]; + u8 entropy_force[0x1]; u8 entropy_calc[0x1]; u8 entropy_gre_calc[0x1]; - u8 reserved_at_43[0x1b]; + u8 reserved_at_43[0xf]; + u8 rx_ts_over_crc[0x1]; + u8 reserved_at_53[0xb]; u8 fcs_chk[0x1]; u8 reserved_at_5f[0x1]; }; -- cgit v1.2.3-71-gd317 From 28da0555c3b542d605e4ca26eea6a740cf2c9174 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 22 Sep 2021 17:37:25 +0300 Subject: net: dsa: move sja1110_process_meta_tstamp inside the tagging protocol driver The problem is that DSA tagging protocols really must not depend on the switch driver, because this creates a circular dependency at insmod time, and the switch driver will effectively not load when the tagging protocol driver is missing. The code was structured in the way it was for a reason, though. The DSA driver-facing API for PTP timestamping relies on the assumption that two-step TX timestamps are provided by the hardware in an out-of-band manner, typically by raising an interrupt and making that timestamp available inside some sort of FIFO which is to be accessed over SPI/MDIO/etc. So the API puts .port_txtstamp into dsa_switch_ops, because it is expected that the switch driver needs to save some state (like put the skb into a queue until its TX timestamp arrives). On SJA1110, TX timestamps are provided by the switch as Ethernet packets, so this makes them be received and processed by the tagging protocol driver. This in itself is great, because the timestamps are full 64-bit and do not require reconstruction, and since Ethernet is the fastest I/O method available to/from the switch, PTP timestamps arrive very quickly, no matter how bottlenecked the SPI connection is, because SPI interaction is not needed at all. DSA's code structure and strict isolation between the tagging protocol driver and the switch driver break the natural code organization. When the tagging protocol driver receives a packet which is classified as a metadata packet containing timestamps, it passes those timestamps one by one to the switch driver, which then proceeds to compare them based on the recorded timestamp ID that was generated in .port_txtstamp. The communication between the tagging protocol and the switch driver is done through a method exported by the switch driver, sja1110_process_meta_tstamp. To satisfy build requirements, we force a dependency to build the tagging protocol driver as a module when the switch driver is a module. However, as explained in the first paragraph, that causes the circular dependency. To solve this, move the skb queue from struct sja1105_private :: struct sja1105_ptp_data to struct sja1105_private :: struct sja1105_tagger_data. The latter is a data structure for which hacks have already been put into place to be able to create persistent storage per switch that is accessible from the tagging protocol driver (see sja1105_setup_ports). With the skb queue directly accessible from the tagging protocol driver, we can now move sja1110_process_meta_tstamp into the tagging driver itself, and avoid exporting a symbol. Fixes: 566b18c8b752 ("net: dsa: sja1105: implement TX timestamping for SJA1110") Link: https://lore.kernel.org/netdev/20210908220834.d7gmtnwrorhharna@skbuf/ Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/sja1105/sja1105_ptp.c | 45 +++++------------------------------ drivers/net/dsa/sja1105/sja1105_ptp.h | 19 --------------- include/linux/dsa/sja1105.h | 29 +++++++++++----------- net/dsa/tag_sja1105.c | 43 +++++++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 73 deletions(-) (limited to 'include') diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c index 691f6dd7e669..54396992a919 100644 --- a/drivers/net/dsa/sja1105/sja1105_ptp.c +++ b/drivers/net/dsa/sja1105/sja1105_ptp.c @@ -64,6 +64,7 @@ enum sja1105_ptp_clk_mode { static int sja1105_change_rxtstamping(struct sja1105_private *priv, bool on) { + struct sja1105_tagger_data *tagger_data = &priv->tagger_data; struct sja1105_ptp_data *ptp_data = &priv->ptp_data; struct sja1105_general_params_entry *general_params; struct sja1105_table *table; @@ -79,7 +80,7 @@ static int sja1105_change_rxtstamping(struct sja1105_private *priv, priv->tagger_data.stampable_skb = NULL; } ptp_cancel_worker_sync(ptp_data->clock); - skb_queue_purge(&ptp_data->skb_txtstamp_queue); + skb_queue_purge(&tagger_data->skb_txtstamp_queue); skb_queue_purge(&ptp_data->skb_rxtstamp_queue); return sja1105_static_config_reload(priv, SJA1105_RX_HWTSTAMPING); @@ -452,40 +453,6 @@ bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port, return priv->info->rxtstamp(ds, port, skb); } -void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port, u8 ts_id, - enum sja1110_meta_tstamp dir, u64 tstamp) -{ - struct sja1105_private *priv = ds->priv; - struct sja1105_ptp_data *ptp_data = &priv->ptp_data; - struct sk_buff *skb, *skb_tmp, *skb_match = NULL; - struct skb_shared_hwtstamps shwt = {0}; - - /* We don't care about RX timestamps on the CPU port */ - if (dir == SJA1110_META_TSTAMP_RX) - return; - - spin_lock(&ptp_data->skb_txtstamp_queue.lock); - - skb_queue_walk_safe(&ptp_data->skb_txtstamp_queue, skb, skb_tmp) { - if (SJA1105_SKB_CB(skb)->ts_id != ts_id) - continue; - - __skb_unlink(skb, &ptp_data->skb_txtstamp_queue); - skb_match = skb; - - break; - } - - spin_unlock(&ptp_data->skb_txtstamp_queue.lock); - - if (WARN_ON(!skb_match)) - return; - - shwt.hwtstamp = ns_to_ktime(sja1105_ticks_to_ns(tstamp)); - skb_complete_tx_timestamp(skb_match, &shwt); -} -EXPORT_SYMBOL_GPL(sja1110_process_meta_tstamp); - /* In addition to cloning the skb which is done by the common * sja1105_port_txtstamp, we need to generate a timestamp ID and save the * packet to the TX timestamping queue. @@ -494,7 +461,6 @@ void sja1110_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb) { struct sk_buff *clone = SJA1105_SKB_CB(skb)->clone; struct sja1105_private *priv = ds->priv; - struct sja1105_ptp_data *ptp_data = &priv->ptp_data; struct sja1105_port *sp = &priv->ports[port]; u8 ts_id; @@ -510,7 +476,7 @@ void sja1110_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb) spin_unlock(&sp->data->meta_lock); - skb_queue_tail(&ptp_data->skb_txtstamp_queue, clone); + skb_queue_tail(&sp->data->skb_txtstamp_queue, clone); } /* Called from dsa_skb_tx_timestamp. This callback is just to clone @@ -953,7 +919,7 @@ int sja1105_ptp_clock_register(struct dsa_switch *ds) /* Only used on SJA1105 */ skb_queue_head_init(&ptp_data->skb_rxtstamp_queue); /* Only used on SJA1110 */ - skb_queue_head_init(&ptp_data->skb_txtstamp_queue); + skb_queue_head_init(&tagger_data->skb_txtstamp_queue); spin_lock_init(&tagger_data->meta_lock); ptp_data->clock = ptp_clock_register(&ptp_data->caps, ds->dev); @@ -971,6 +937,7 @@ int sja1105_ptp_clock_register(struct dsa_switch *ds) void sja1105_ptp_clock_unregister(struct dsa_switch *ds) { struct sja1105_private *priv = ds->priv; + struct sja1105_tagger_data *tagger_data = &priv->tagger_data; struct sja1105_ptp_data *ptp_data = &priv->ptp_data; if (IS_ERR_OR_NULL(ptp_data->clock)) @@ -978,7 +945,7 @@ void sja1105_ptp_clock_unregister(struct dsa_switch *ds) del_timer_sync(&ptp_data->extts_timer); ptp_cancel_worker_sync(ptp_data->clock); - skb_queue_purge(&ptp_data->skb_txtstamp_queue); + skb_queue_purge(&tagger_data->skb_txtstamp_queue); skb_queue_purge(&ptp_data->skb_rxtstamp_queue); ptp_clock_unregister(ptp_data->clock); ptp_data->clock = NULL; diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.h b/drivers/net/dsa/sja1105/sja1105_ptp.h index 3c874bb4c17b..3ae6b9fdd492 100644 --- a/drivers/net/dsa/sja1105/sja1105_ptp.h +++ b/drivers/net/dsa/sja1105/sja1105_ptp.h @@ -8,21 +8,6 @@ #if IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP) -/* Timestamps are in units of 8 ns clock ticks (equivalent to - * a fixed 125 MHz clock). - */ -#define SJA1105_TICK_NS 8 - -static inline s64 ns_to_sja1105_ticks(s64 ns) -{ - return ns / SJA1105_TICK_NS; -} - -static inline s64 sja1105_ticks_to_ns(s64 ticks) -{ - return ticks * SJA1105_TICK_NS; -} - /* Calculate the first base_time in the future that satisfies this * relationship: * @@ -77,10 +62,6 @@ struct sja1105_ptp_data { struct timer_list extts_timer; /* Used only on SJA1105 to reconstruct partial timestamps */ struct sk_buff_head skb_rxtstamp_queue; - /* Used on SJA1110 where meta frames are generated only for - * 2-step TX timestamps - */ - struct sk_buff_head skb_txtstamp_queue; struct ptp_clock_info caps; struct ptp_clock *clock; struct sja1105_ptp_cmd cmd; diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index 171106202fe5..0485ab2fcc46 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -48,6 +48,10 @@ struct sja1105_tagger_data { spinlock_t meta_lock; unsigned long state; u8 ts_id; + /* Used on SJA1110 where meta frames are generated only for + * 2-step TX timestamps + */ + struct sk_buff_head skb_txtstamp_queue; }; struct sja1105_skb_cb { @@ -69,25 +73,20 @@ struct sja1105_port { bool hwts_tx_en; }; -enum sja1110_meta_tstamp { - SJA1110_META_TSTAMP_TX = 0, - SJA1110_META_TSTAMP_RX = 1, -}; - -#if IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP) - -void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port, u8 ts_id, - enum sja1110_meta_tstamp dir, u64 tstamp); - -#else +/* Timestamps are in units of 8 ns clock ticks (equivalent to + * a fixed 125 MHz clock). + */ +#define SJA1105_TICK_NS 8 -static inline void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port, - u8 ts_id, enum sja1110_meta_tstamp dir, - u64 tstamp) +static inline s64 ns_to_sja1105_ticks(s64 ns) { + return ns / SJA1105_TICK_NS; } -#endif /* IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP) */ +static inline s64 sja1105_ticks_to_ns(s64 ticks) +{ + return ticks * SJA1105_TICK_NS; +} #if IS_ENABLED(CONFIG_NET_DSA_SJA1105) diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index c054f48541c8..2edede9ddac9 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include "dsa_priv.h" @@ -53,6 +54,11 @@ #define SJA1110_TX_TRAILER_LEN 4 #define SJA1110_MAX_PADDING_LEN 15 +enum sja1110_meta_tstamp { + SJA1110_META_TSTAMP_TX = 0, + SJA1110_META_TSTAMP_RX = 1, +}; + /* Similar to is_link_local_ether_addr(hdr->h_dest) but also covers PTP */ static inline bool sja1105_is_link_local(const struct sk_buff *skb) { @@ -520,6 +526,43 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, is_meta); } +static void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port, + u8 ts_id, enum sja1110_meta_tstamp dir, + u64 tstamp) +{ + struct sk_buff *skb, *skb_tmp, *skb_match = NULL; + struct dsa_port *dp = dsa_to_port(ds, port); + struct skb_shared_hwtstamps shwt = {0}; + struct sja1105_port *sp = dp->priv; + + if (!dsa_port_is_sja1105(dp)) + return; + + /* We don't care about RX timestamps on the CPU port */ + if (dir == SJA1110_META_TSTAMP_RX) + return; + + spin_lock(&sp->data->skb_txtstamp_queue.lock); + + skb_queue_walk_safe(&sp->data->skb_txtstamp_queue, skb, skb_tmp) { + if (SJA1105_SKB_CB(skb)->ts_id != ts_id) + continue; + + __skb_unlink(skb, &sp->data->skb_txtstamp_queue); + skb_match = skb; + + break; + } + + spin_unlock(&sp->data->skb_txtstamp_queue.lock); + + if (WARN_ON(!skb_match)) + return; + + shwt.hwtstamp = ns_to_ktime(sja1105_ticks_to_ns(tstamp)); + skb_complete_tx_timestamp(skb_match, &shwt); +} + static struct sk_buff *sja1110_rcv_meta(struct sk_buff *skb, u16 rx_header) { u8 *buf = dsa_etype_header_pos_rx(skb) + SJA1110_HEADER_LEN; -- cgit v1.2.3-71-gd317 From 4ac0567e40b334b54988e3c28a2425ff9c8bdd35 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 22 Sep 2021 17:37:26 +0300 Subject: net: dsa: sja1105: break dependency between dsa_port_is_sja1105 and switch driver It's nice to be able to test a tagging protocol with dsa_loop, but not at the cost of losing the ability of building the tagging protocol and switch driver as modules, because as things stand, there is a circular dependency between the two. Tagging protocol drivers cannot depend on switch drivers, that is a hard fact. The reasoning behind the blamed patch was that accessing dp->priv should first make sure that the structure behind that pointer is what we really think it is. Currently the "sja1105" and "sja1110" tagging protocols only operate with the sja1105 switch driver, just like any other tagging protocol and switch combination. The only way to mix and match them is by modifying the code, and this applies to dsa_loop as well (by default that uses DSA_TAG_PROTO_NONE). So while in principle there is an issue, in practice there isn't one. Until we extend dsa_loop to allow user space configuration, treat the problem as a non-issue and just say that DSA ports found by tag_sja1105 are always sja1105 ports, which is in fact true. But keep the dsa_port_is_sja1105 function so that it's easy to patch it during testing, and rely on dead code elimination. Fixes: 994d2cbb08ca ("net: dsa: tag_sja1105: be dsa_loop-safe") Link: https://lore.kernel.org/netdev/20210908220834.d7gmtnwrorhharna@skbuf/ Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/sja1105/sja1105_main.c | 3 +-- include/linux/dsa/sja1105.h | 15 +-------------- net/dsa/Kconfig | 1 - 3 files changed, 2 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 7c0db80eff00..924c3f129992 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -3117,7 +3117,7 @@ static void sja1105_teardown(struct dsa_switch *ds) sja1105_static_config_free(&priv->static_config); } -const struct dsa_switch_ops sja1105_switch_ops = { +static const struct dsa_switch_ops sja1105_switch_ops = { .get_tag_protocol = sja1105_get_tag_protocol, .setup = sja1105_setup, .teardown = sja1105_teardown, @@ -3166,7 +3166,6 @@ const struct dsa_switch_ops sja1105_switch_ops = { .port_bridge_tx_fwd_offload = dsa_tag_8021q_bridge_tx_fwd_offload, .port_bridge_tx_fwd_unoffload = dsa_tag_8021q_bridge_tx_fwd_unoffload, }; -EXPORT_SYMBOL_GPL(sja1105_switch_ops); static const struct of_device_id sja1105_dt_ids[]; diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index 0485ab2fcc46..9e07079528a5 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -88,22 +88,9 @@ static inline s64 sja1105_ticks_to_ns(s64 ticks) return ticks * SJA1105_TICK_NS; } -#if IS_ENABLED(CONFIG_NET_DSA_SJA1105) - -extern const struct dsa_switch_ops sja1105_switch_ops; - -static inline bool dsa_port_is_sja1105(struct dsa_port *dp) -{ - return dp->ds->ops == &sja1105_switch_ops; -} - -#else - static inline bool dsa_port_is_sja1105(struct dsa_port *dp) { - return false; + return true; } -#endif - #endif /* _NET_DSA_SJA1105_H */ diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 548285539752..bca1b5d66df2 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -138,7 +138,6 @@ config NET_DSA_TAG_LAN9303 config NET_DSA_TAG_SJA1105 tristate "Tag driver for NXP SJA1105 switches" - depends on NET_DSA_SJA1105 || !NET_DSA_SJA1105 select PACKING help Say Y or M if you want to enable support for tagging frames with the -- cgit v1.2.3-71-gd317 From c57fe0037a4e3863d9b740f8c14df9c51ac31aa1 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Oct 2021 14:40:35 +0300 Subject: net: mscc: ocelot: make use of all 63 PTP timestamp identifiers At present, there is a problem when user space bombards a port with PTP event frames which have TX timestamping requests (or when a tc-taprio offload is installed on a port, which delays the TX timestamps by a significant amount of time). The driver will happily roll over the 2-bit timestamp ID and this will cause incorrect matches between an skb and the TX timestamp collected from the FIFO. The Ocelot switches have a 6-bit PTP timestamp identifier, and the value 63 is reserved, so that leaves identifiers 0-62 to be used. The timestamp identifiers are selected by the REW_OP packet field, and are actually shared between CPU-injected frames and frames which match a VCAP IS2 rule that modifies the REW_OP. The hardware supports partitioning between the two uses of the REW_OP field through the PTP_ID_LOW and PTP_ID_HIGH registers, and by default reserves the PTP IDs 0-3 for CPU-injected traffic and the rest for VCAP IS2. The driver does not use VCAP IS2 to set REW_OP for 2-step timestamping, and it also writes 0xffffffff to both PTP_ID_HIGH and PTP_ID_LOW in ocelot_init_timestamp() which makes all timestamp identifiers available to CPU injection. Therefore, we can make use of all 63 timestamp identifiers, which should allow more timestampable packets to be in flight on each port. This is only part of the solution, more issues will be addressed in future changes. Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 4 +++- include/soc/mscc/ocelot_ptp.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 4de58321907c..c43c8f53faaf 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -579,7 +579,9 @@ static void ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port, skb_shinfo(clone)->tx_flags |= SKBTX_IN_PROGRESS; /* Store timestamp ID in OCELOT_SKB_CB(clone)->ts_id */ OCELOT_SKB_CB(clone)->ts_id = ocelot_port->ts_id; - ocelot_port->ts_id = (ocelot_port->ts_id + 1) % 4; + ocelot_port->ts_id++; + if (ocelot_port->ts_id == OCELOT_MAX_PTP_ID) + ocelot_port->ts_id = 0; skb_queue_tail(&ocelot_port->tx_skbs, clone); spin_unlock(&ocelot_port->ts_id_lock); diff --git a/include/soc/mscc/ocelot_ptp.h b/include/soc/mscc/ocelot_ptp.h index ded497d72bdb..6e54442b49ad 100644 --- a/include/soc/mscc/ocelot_ptp.h +++ b/include/soc/mscc/ocelot_ptp.h @@ -13,6 +13,8 @@ #include #include +#define OCELOT_MAX_PTP_ID 63 + #define PTP_PIN_CFG_RSZ 0x20 #define PTP_PIN_TOD_SEC_MSB_RSZ PTP_PIN_CFG_RSZ #define PTP_PIN_TOD_SEC_LSB_RSZ PTP_PIN_CFG_RSZ -- cgit v1.2.3-71-gd317 From 52849bcf0029ccc553be304e4f804938a39112e2 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Oct 2021 14:40:36 +0300 Subject: net: mscc: ocelot: avoid overflowing the PTP timestamp FIFO PTP packets with 2-step TX timestamp requests are matched to packets based on the egress port number and a 6-bit timestamp identifier. All PTP timestamps are held in a common FIFO that is 128 entry deep. This patch ensures that back-to-back timestamping requests cannot exceed the hardware FIFO capacity. If that happens, simply send the packets without requesting a TX timestamp to be taken (in the case of felix, since the DSA API has a void return code in ds->ops->port_txtstamp) or drop them (in the case of ocelot). I've moved the ts_id_lock from a per-port basis to a per-switch basis, because we need separate accounting for both numbers of PTP frames in flight. And since we need locking to inc/dec the per-switch counter, that also offers protection for the per-port counter and hence there is no reason to have a per-port counter anymore. Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix.c | 6 +++++- drivers/net/ethernet/mscc/ocelot.c | 37 ++++++++++++++++++++++++++++++------- include/soc/mscc/ocelot.h | 5 ++++- include/soc/mscc/ocelot_ptp.h | 1 + 4 files changed, 40 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index a3a9636430d6..50ef20724958 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -1291,8 +1291,12 @@ static void felix_txtstamp(struct dsa_switch *ds, int port, if (!ocelot->ptp) return; - if (ocelot_port_txtstamp_request(ocelot, port, skb, &clone)) + if (ocelot_port_txtstamp_request(ocelot, port, skb, &clone)) { + dev_err_ratelimited(ds->dev, + "port %d delivering skb without TX timestamp\n", + port); return; + } if (clone) OCELOT_SKB_CB(skb)->clone = clone; diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index c43c8f53faaf..9c62f1d13adc 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -569,22 +569,36 @@ void ocelot_phylink_mac_link_up(struct ocelot *ocelot, int port, } EXPORT_SYMBOL_GPL(ocelot_phylink_mac_link_up); -static void ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port, - struct sk_buff *clone) +static int ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port, + struct sk_buff *clone) { struct ocelot_port *ocelot_port = ocelot->ports[port]; + unsigned long flags; + + spin_lock_irqsave(&ocelot->ts_id_lock, flags); - spin_lock(&ocelot_port->ts_id_lock); + if (ocelot_port->ptp_skbs_in_flight == OCELOT_MAX_PTP_ID || + ocelot->ptp_skbs_in_flight == OCELOT_PTP_FIFO_SIZE) { + spin_unlock_irqrestore(&ocelot->ts_id_lock, flags); + return -EBUSY; + } skb_shinfo(clone)->tx_flags |= SKBTX_IN_PROGRESS; /* Store timestamp ID in OCELOT_SKB_CB(clone)->ts_id */ OCELOT_SKB_CB(clone)->ts_id = ocelot_port->ts_id; + ocelot_port->ts_id++; if (ocelot_port->ts_id == OCELOT_MAX_PTP_ID) ocelot_port->ts_id = 0; + + ocelot_port->ptp_skbs_in_flight++; + ocelot->ptp_skbs_in_flight++; + skb_queue_tail(&ocelot_port->tx_skbs, clone); - spin_unlock(&ocelot_port->ts_id_lock); + spin_unlock_irqrestore(&ocelot->ts_id_lock, flags); + + return 0; } u32 ocelot_ptp_rew_op(struct sk_buff *skb) @@ -633,6 +647,7 @@ int ocelot_port_txtstamp_request(struct ocelot *ocelot, int port, { struct ocelot_port *ocelot_port = ocelot->ports[port]; u8 ptp_cmd = ocelot_port->ptp_cmd; + int err; /* Store ptp_cmd in OCELOT_SKB_CB(skb)->ptp_cmd */ if (ptp_cmd == IFH_REW_OP_ORIGIN_PTP) { @@ -650,7 +665,10 @@ int ocelot_port_txtstamp_request(struct ocelot *ocelot, int port, if (!(*clone)) return -ENOMEM; - ocelot_port_add_txtstamp_skb(ocelot, port, *clone); + err = ocelot_port_add_txtstamp_skb(ocelot, port, *clone); + if (err) + return err; + OCELOT_SKB_CB(skb)->ptp_cmd = ptp_cmd; } @@ -709,9 +727,14 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) id = SYS_PTP_STATUS_PTP_MESS_ID_X(val); txport = SYS_PTP_STATUS_PTP_MESS_TXPORT_X(val); - /* Retrieve its associated skb */ port = ocelot->ports[txport]; + spin_lock(&ocelot->ts_id_lock); + port->ptp_skbs_in_flight--; + ocelot->ptp_skbs_in_flight--; + spin_unlock(&ocelot->ts_id_lock); + + /* Retrieve its associated skb */ spin_lock_irqsave(&port->tx_skbs.lock, flags); skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) { @@ -1950,7 +1973,6 @@ void ocelot_init_port(struct ocelot *ocelot, int port) struct ocelot_port *ocelot_port = ocelot->ports[port]; skb_queue_head_init(&ocelot_port->tx_skbs); - spin_lock_init(&ocelot_port->ts_id_lock); /* Basic L2 initialization */ @@ -2083,6 +2105,7 @@ int ocelot_init(struct ocelot *ocelot) mutex_init(&ocelot->stats_lock); mutex_init(&ocelot->ptp_lock); spin_lock_init(&ocelot->ptp_clock_lock); + spin_lock_init(&ocelot->ts_id_lock); snprintf(queue_name, sizeof(queue_name), "%s-stats", dev_name(ocelot->dev)); ocelot->stats_queue = create_singlethread_workqueue(queue_name); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 06706a9fd5b1..b0ece85d9a76 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -603,10 +603,10 @@ struct ocelot_port { /* The VLAN ID that will be transmitted as untagged, on egress */ struct ocelot_vlan native_vlan; + unsigned int ptp_skbs_in_flight; u8 ptp_cmd; struct sk_buff_head tx_skbs; u8 ts_id; - spinlock_t ts_id_lock; phy_interface_t phy_mode; @@ -680,6 +680,9 @@ struct ocelot { struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_info; struct hwtstamp_config hwtstamp_config; + unsigned int ptp_skbs_in_flight; + /* Protects the 2-step TX timestamp ID logic */ + spinlock_t ts_id_lock; /* Protects the PTP interface state */ struct mutex ptp_lock; /* Protects the PTP clock */ diff --git a/include/soc/mscc/ocelot_ptp.h b/include/soc/mscc/ocelot_ptp.h index 6e54442b49ad..f085884b1fa2 100644 --- a/include/soc/mscc/ocelot_ptp.h +++ b/include/soc/mscc/ocelot_ptp.h @@ -14,6 +14,7 @@ #include #define OCELOT_MAX_PTP_ID 63 +#define OCELOT_PTP_FIFO_SIZE 128 #define PTP_PIN_CFG_RSZ 0x20 #define PTP_PIN_TOD_SEC_MSB_RSZ PTP_PIN_CFG_RSZ -- cgit v1.2.3-71-gd317 From ebb4c6a990f786d7e0e4618a0d3766cd660125d8 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Oct 2021 14:40:39 +0300 Subject: net: mscc: ocelot: cross-check the sequence id from the timestamp FIFO with the skb PTP header The sad reality is that when a PTP frame with a TX timestamping request is transmitted, it isn't guaranteed that it will make it all the way to the wire (due to congestion inside the switch), and that a timestamp will be taken by the hardware and placed in the timestamp FIFO where an IRQ will be raised for it. The implication is that if enough PTP frames are silently dropped by the hardware such that the timestamp ID has rolled over, it is possible to match a timestamp to an old skb. Furthermore, nobody will match on the real skb corresponding to this timestamp, since we stupidly matched on a previous one that was stale in the queue, and stopped there. So PTP timestamping will be broken and there will be no way to recover. It looks like the hardware parses the sequenceID from the PTP header, and also provides that metadata for each timestamp. The driver currently ignores this, but it shouldn't. As an extra resiliency measure, do the following: - check whether the PTP sequenceID also matches between the skb and the timestamp, treat the skb as stale otherwise and free it - if we see a stale skb, don't stop there and try to match an skb one more time, chances are there's one more skb in the queue with the same timestamp ID, otherwise we wouldn't have ever found the stale one (it is by timestamp ID that we matched it). While this does not prevent PTP packet drops, it at least prevents the catastrophic consequences of incorrect timestamp matching. Since we already call ptp_classify_raw in the TX path, save the result in the skb->cb of the clone, and just use that result in the interrupt code path. Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 24 +++++++++++++++++++++++- include/soc/mscc/ocelot.h | 1 + 2 files changed, 24 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 3b1f0bb6a414..f0044329e3d7 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -675,6 +675,7 @@ int ocelot_port_txtstamp_request(struct ocelot *ocelot, int port, return err; OCELOT_SKB_CB(skb)->ptp_cmd = ptp_cmd; + OCELOT_SKB_CB(*clone)->ptp_class = ptp_class; } return 0; @@ -708,6 +709,17 @@ static void ocelot_get_hwtimestamp(struct ocelot *ocelot, spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags); } +static bool ocelot_validate_ptp_skb(struct sk_buff *clone, u16 seqid) +{ + struct ptp_header *hdr; + + hdr = ptp_parse_header(clone, OCELOT_SKB_CB(clone)->ptp_class); + if (WARN_ON(!hdr)) + return false; + + return seqid == ntohs(hdr->sequence_id); +} + void ocelot_get_txtstamp(struct ocelot *ocelot) { int budget = OCELOT_PTP_QUEUE_SZ; @@ -715,10 +727,10 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) while (budget--) { struct sk_buff *skb, *skb_tmp, *skb_match = NULL; struct skb_shared_hwtstamps shhwtstamps; + u32 val, id, seqid, txport; struct ocelot_port *port; struct timespec64 ts; unsigned long flags; - u32 val, id, txport; val = ocelot_read(ocelot, SYS_PTP_STATUS); @@ -731,6 +743,7 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) /* Retrieve the ts ID and Tx port */ id = SYS_PTP_STATUS_PTP_MESS_ID_X(val); txport = SYS_PTP_STATUS_PTP_MESS_TXPORT_X(val); + seqid = SYS_PTP_STATUS_PTP_MESS_SEQ_ID(val); port = ocelot->ports[txport]; @@ -740,6 +753,7 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) spin_unlock(&ocelot->ts_id_lock); /* Retrieve its associated skb */ +try_again: spin_lock_irqsave(&port->tx_skbs.lock, flags); skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) { @@ -755,6 +769,14 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) if (WARN_ON(!skb_match)) continue; + if (!ocelot_validate_ptp_skb(skb_match, seqid)) { + dev_err_ratelimited(ocelot->dev, + "port %d received stale TX timestamp for seqid %d, discarding\n", + txport, seqid); + dev_kfree_skb_any(skb); + goto try_again; + } + /* Get the h/w timestamp */ ocelot_get_hwtimestamp(ocelot, &ts); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index b0ece85d9a76..cabacef8731c 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -697,6 +697,7 @@ struct ocelot_policer { struct ocelot_skb_cb { struct sk_buff *clone; + unsigned int ptp_class; /* valid only for clones */ u8 ptp_cmd; u8 ts_id; }; -- cgit v1.2.3-71-gd317 From deab6b1cd9789bb9bd466d5e76aecb8b336259b4 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Oct 2021 14:40:40 +0300 Subject: net: dsa: tag_ocelot: break circular dependency with ocelot switch lib driver As explained here: https://lore.kernel.org/netdev/20210908220834.d7gmtnwrorhharna@skbuf/ DSA tagging protocol drivers cannot depend on symbols exported by switch drivers, because this creates a circular dependency that breaks module autoloading. The tag_ocelot.c file depends on the ocelot_ptp_rew_op() function exported by the common ocelot switch lib. This function looks at OCELOT_SKB_CB(skb) and computes how to populate the REW_OP field of the DSA tag, for PTP timestamping (the command: one-step/two-step, and the TX timestamp identifier). None of that requires deep insight into the driver, it is quite stateless, as it only depends upon the skb->cb. So let's make it a static inline function and put it in include/linux/dsa/ocelot.h, a file that despite its name is used by the ocelot switch driver for populating the injection header too - since commit 40d3f295b5fe ("net: mscc: ocelot: use common tag parsing code with DSA"). With that function declared as static inline, its body is expanded inside each call site, so the dependency is broken and the DSA tagger can be built without the switch library, upon which the felix driver depends. Fixes: 39e5308b3250 ("net: mscc: ocelot: support PTP Sync one-step timestamping") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 17 ---------------- drivers/net/ethernet/mscc/ocelot_net.c | 1 + include/linux/dsa/ocelot.h | 37 ++++++++++++++++++++++++++++++++++ include/soc/mscc/ocelot.h | 24 ---------------------- net/dsa/Kconfig | 2 -- net/dsa/tag_ocelot.c | 1 - net/dsa/tag_ocelot_8021q.c | 1 + 7 files changed, 39 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index f0044329e3d7..a08e4f530c1c 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -601,23 +601,6 @@ static int ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port, return 0; } -u32 ocelot_ptp_rew_op(struct sk_buff *skb) -{ - struct sk_buff *clone = OCELOT_SKB_CB(skb)->clone; - u8 ptp_cmd = OCELOT_SKB_CB(skb)->ptp_cmd; - u32 rew_op = 0; - - if (ptp_cmd == IFH_REW_OP_TWO_STEP_PTP && clone) { - rew_op = ptp_cmd; - rew_op |= OCELOT_SKB_CB(clone)->ts_id << 3; - } else if (ptp_cmd == IFH_REW_OP_ORIGIN_PTP) { - rew_op = ptp_cmd; - } - - return rew_op; -} -EXPORT_SYMBOL(ocelot_ptp_rew_op); - static bool ocelot_ptp_is_onestep_sync(struct sk_buff *skb, unsigned int ptp_class) { diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 2a85bcb5d0c2..2545727fd5b2 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -8,6 +8,7 @@ * Copyright 2020-2021 NXP */ +#include #include #include #include diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h index 435777a0073c..50641a7529ad 100644 --- a/include/linux/dsa/ocelot.h +++ b/include/linux/dsa/ocelot.h @@ -6,6 +6,26 @@ #define _NET_DSA_TAG_OCELOT_H #include +#include + +struct ocelot_skb_cb { + struct sk_buff *clone; + unsigned int ptp_class; /* valid only for clones */ + u8 ptp_cmd; + u8 ts_id; +}; + +#define OCELOT_SKB_CB(skb) \ + ((struct ocelot_skb_cb *)((skb)->cb)) + +#define IFH_TAG_TYPE_C 0 +#define IFH_TAG_TYPE_S 1 + +#define IFH_REW_OP_NOOP 0x0 +#define IFH_REW_OP_DSCP 0x1 +#define IFH_REW_OP_ONE_STEP_PTP 0x2 +#define IFH_REW_OP_TWO_STEP_PTP 0x3 +#define IFH_REW_OP_ORIGIN_PTP 0x5 #define OCELOT_TAG_LEN 16 #define OCELOT_SHORT_PREFIX_LEN 4 @@ -215,4 +235,21 @@ static inline void ocelot_ifh_set_vid(void *injection, u64 vid) packing(injection, &vid, 11, 0, OCELOT_TAG_LEN, PACK, 0); } +/* Determine the PTP REW_OP to use for injecting the given skb */ +static inline u32 ocelot_ptp_rew_op(struct sk_buff *skb) +{ + struct sk_buff *clone = OCELOT_SKB_CB(skb)->clone; + u8 ptp_cmd = OCELOT_SKB_CB(skb)->ptp_cmd; + u32 rew_op = 0; + + if (ptp_cmd == IFH_REW_OP_TWO_STEP_PTP && clone) { + rew_op = ptp_cmd; + rew_op |= OCELOT_SKB_CB(clone)->ts_id << 3; + } else if (ptp_cmd == IFH_REW_OP_ORIGIN_PTP) { + rew_op = ptp_cmd; + } + + return rew_op; +} + #endif diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index cabacef8731c..66b2e65c1179 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -89,15 +89,6 @@ /* Source PGIDs, one per physical port */ #define PGID_SRC 80 -#define IFH_TAG_TYPE_C 0 -#define IFH_TAG_TYPE_S 1 - -#define IFH_REW_OP_NOOP 0x0 -#define IFH_REW_OP_DSCP 0x1 -#define IFH_REW_OP_ONE_STEP_PTP 0x2 -#define IFH_REW_OP_TWO_STEP_PTP 0x3 -#define IFH_REW_OP_ORIGIN_PTP 0x5 - #define OCELOT_NUM_TC 8 #define OCELOT_SPEED_2500 0 @@ -695,16 +686,6 @@ struct ocelot_policer { u32 burst; /* bytes */ }; -struct ocelot_skb_cb { - struct sk_buff *clone; - unsigned int ptp_class; /* valid only for clones */ - u8 ptp_cmd; - u8 ts_id; -}; - -#define OCELOT_SKB_CB(skb) \ - ((struct ocelot_skb_cb *)((skb)->cb)) - #define ocelot_read_ix(ocelot, reg, gi, ri) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri)) #define ocelot_read_gix(ocelot, reg, gi) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi)) #define ocelot_read_rix(ocelot, reg, ri) __ocelot_read_ix(ocelot, reg, reg##_RSZ * (ri)) @@ -765,7 +746,6 @@ void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **skb); void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp); -u32 ocelot_ptp_rew_op(struct sk_buff *skb); #else static inline bool ocelot_can_inject(struct ocelot *ocelot, int grp) @@ -789,10 +769,6 @@ static inline void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp) { } -static inline u32 ocelot_ptp_rew_op(struct sk_buff *skb) -{ - return 0; -} #endif /* Hardware initialization */ diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index bca1b5d66df2..d166377d7085 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -101,8 +101,6 @@ config NET_DSA_TAG_RTL4_A config NET_DSA_TAG_OCELOT tristate "Tag driver for Ocelot family of switches, using NPI port" - depends on MSCC_OCELOT_SWITCH_LIB || \ - (MSCC_OCELOT_SWITCH_LIB=n && COMPILE_TEST) select PACKING help Say Y or M if you want to enable NPI tagging for the Ocelot switches diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index 8025ed778d33..605b51ca6921 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -2,7 +2,6 @@ /* Copyright 2019 NXP */ #include -#include #include "dsa_priv.h" static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c index 59072930cb02..1e4e66ea6796 100644 --- a/net/dsa/tag_ocelot_8021q.c +++ b/net/dsa/tag_ocelot_8021q.c @@ -9,6 +9,7 @@ * that on egress */ #include +#include #include #include #include "dsa_priv.h" -- cgit v1.2.3-71-gd317 From 49f885b2d97093451410e7279aa29d81e094e108 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Oct 2021 14:40:41 +0300 Subject: net: dsa: tag_ocelot_8021q: break circular dependency with ocelot switch lib Michael reported that when using the "ocelot-8021q" tagging protocol, the switch driver module must be manually loaded before the tagging protocol can be loaded/is available. This appears to be the same problem described here: https://lore.kernel.org/netdev/20210908220834.d7gmtnwrorhharna@skbuf/ where due to the fact that DSA tagging protocols make use of symbols exported by the switch drivers, circular dependencies appear and this breaks module autoloading. The ocelot_8021q driver needs the ocelot_can_inject() and ocelot_port_inject_frame() functions from the switch library. Previously the wrong approach was taken to solve that dependency: shims were provided for the case where the ocelot switch library was compiled out, but that turns out to be insufficient, because the dependency when the switch lib _is_ compiled is problematic too. We cannot declare ocelot_can_inject() and ocelot_port_inject_frame() as static inline functions, because these access I/O functions like __ocelot_write_ix() which is called by ocelot_write_rix(). Making those static inline basically means exposing the whole guts of the ocelot switch library, not ideal... We already have one tagging protocol driver which calls into the switch driver during xmit but not using any exported symbol: sja1105_defer_xmit. We can do the same thing here: create a kthread worker and one work item per skb, and let the switch driver itself do the register accesses to send the skb, and then consume it. Fixes: 0a6f17c6ae21 ("net: dsa: tag_ocelot_8021q: add support for PTP timestamping") Reported-by: Michael Walle Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix.c | 96 ++++++++++++++++++++++++++++++++++++++++-- drivers/net/dsa/ocelot/felix.h | 1 + include/linux/dsa/ocelot.h | 12 ++++++ include/soc/mscc/ocelot.h | 27 ------------ net/dsa/Kconfig | 2 - net/dsa/tag_ocelot_8021q.c | 38 +++++++++++------ 6 files changed, 130 insertions(+), 46 deletions(-) (limited to 'include') diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 50ef20724958..f8603e068e7c 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -1074,6 +1074,73 @@ static int felix_init_structs(struct felix *felix, int num_phys_ports) return 0; } +#define work_to_xmit_work(w) \ + container_of((w), struct felix_deferred_xmit_work, work) + +static void felix_port_deferred_xmit(struct kthread_work *work) +{ + struct felix_deferred_xmit_work *xmit_work = work_to_xmit_work(work); + struct dsa_switch *ds = xmit_work->dp->ds; + struct sk_buff *skb = xmit_work->skb; + u32 rew_op = ocelot_ptp_rew_op(skb); + struct ocelot *ocelot = ds->priv; + int port = xmit_work->dp->index; + int retries = 10; + + do { + if (ocelot_can_inject(ocelot, 0)) + break; + + cpu_relax(); + } while (--retries); + + if (!retries) { + dev_err(ocelot->dev, "port %d failed to inject skb\n", + port); + kfree_skb(skb); + return; + } + + ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb); + + consume_skb(skb); + kfree(xmit_work); +} + +static int felix_port_setup_tagger_data(struct dsa_switch *ds, int port) +{ + struct dsa_port *dp = dsa_to_port(ds, port); + struct ocelot *ocelot = ds->priv; + struct felix *felix = ocelot_to_felix(ocelot); + struct felix_port *felix_port; + + if (!dsa_port_is_user(dp)) + return 0; + + felix_port = kzalloc(sizeof(*felix_port), GFP_KERNEL); + if (!felix_port) + return -ENOMEM; + + felix_port->xmit_worker = felix->xmit_worker; + felix_port->xmit_work_fn = felix_port_deferred_xmit; + + dp->priv = felix_port; + + return 0; +} + +static void felix_port_teardown_tagger_data(struct dsa_switch *ds, int port) +{ + struct dsa_port *dp = dsa_to_port(ds, port); + struct felix_port *felix_port = dp->priv; + + if (!felix_port) + return; + + dp->priv = NULL; + kfree(felix_port); +} + /* Hardware initialization done here so that we can allocate structures with * devm without fear of dsa_register_switch returning -EPROBE_DEFER and causing * us to allocate structures twice (leak memory) and map PCI memory twice @@ -1102,6 +1169,12 @@ static int felix_setup(struct dsa_switch *ds) } } + felix->xmit_worker = kthread_create_worker(0, "felix_xmit"); + if (IS_ERR(felix->xmit_worker)) { + err = PTR_ERR(felix->xmit_worker); + goto out_deinit_timestamp; + } + for (port = 0; port < ds->num_ports; port++) { if (dsa_is_unused_port(ds, port)) continue; @@ -1112,6 +1185,14 @@ static int felix_setup(struct dsa_switch *ds) * bits of vlan tag. */ felix_port_qos_map_init(ocelot, port); + + err = felix_port_setup_tagger_data(ds, port); + if (err) { + dev_err(ds->dev, + "port %d failed to set up tagger data: %pe\n", + port, ERR_PTR(err)); + goto out_deinit_ports; + } } err = ocelot_devlink_sb_register(ocelot); @@ -1138,9 +1219,13 @@ out_deinit_ports: if (dsa_is_unused_port(ds, port)) continue; + felix_port_teardown_tagger_data(ds, port); ocelot_deinit_port(ocelot, port); } + kthread_destroy_worker(felix->xmit_worker); + +out_deinit_timestamp: ocelot_deinit_timestamp(ocelot); ocelot_deinit(ocelot); @@ -1164,17 +1249,20 @@ static void felix_teardown(struct dsa_switch *ds) felix_del_tag_protocol(ds, port, felix->tag_proto); } - ocelot_devlink_sb_unregister(ocelot); - ocelot_deinit_timestamp(ocelot); - ocelot_deinit(ocelot); - for (port = 0; port < ocelot->num_phys_ports; port++) { if (dsa_is_unused_port(ds, port)) continue; + felix_port_teardown_tagger_data(ds, port); ocelot_deinit_port(ocelot, port); } + kthread_destroy_worker(felix->xmit_worker); + + ocelot_devlink_sb_unregister(ocelot); + ocelot_deinit_timestamp(ocelot); + ocelot_deinit(ocelot); + if (felix->info->mdio_bus_free) felix->info->mdio_bus_free(ocelot); } diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h index 54024b6f9498..be3e42e135c0 100644 --- a/drivers/net/dsa/ocelot/felix.h +++ b/drivers/net/dsa/ocelot/felix.h @@ -62,6 +62,7 @@ struct felix { resource_size_t switch_base; resource_size_t imdio_base; enum dsa_tag_protocol tag_proto; + struct kthread_worker *xmit_worker; }; struct net_device *felix_port_to_netdev(struct ocelot *ocelot, int port); diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h index 50641a7529ad..8ae999f587c4 100644 --- a/include/linux/dsa/ocelot.h +++ b/include/linux/dsa/ocelot.h @@ -5,6 +5,7 @@ #ifndef _NET_DSA_TAG_OCELOT_H #define _NET_DSA_TAG_OCELOT_H +#include #include #include @@ -160,6 +161,17 @@ struct ocelot_skb_cb { * +------+------+------+------+------+------+------+------+ */ +struct felix_deferred_xmit_work { + struct dsa_port *dp; + struct sk_buff *skb; + struct kthread_work work; +}; + +struct felix_port { + void (*xmit_work_fn)(struct kthread_work *work); + struct kthread_worker *xmit_worker; +}; + static inline void ocelot_xfh_get_rew_val(void *extraction, u64 *rew_val) { packing(extraction, rew_val, 116, 85, OCELOT_TAG_LEN, UNPACK, 0); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 66b2e65c1179..d7055b41982d 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -737,8 +737,6 @@ u32 __ocelot_target_read_ix(struct ocelot *ocelot, enum ocelot_target target, void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target, u32 val, u32 reg, u32 offset); -#if IS_ENABLED(CONFIG_MSCC_OCELOT_SWITCH_LIB) - /* Packet I/O */ bool ocelot_can_inject(struct ocelot *ocelot, int grp); void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, @@ -746,31 +744,6 @@ void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **skb); void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp); -#else - -static inline bool ocelot_can_inject(struct ocelot *ocelot, int grp) -{ - return false; -} - -static inline void ocelot_port_inject_frame(struct ocelot *ocelot, int port, - int grp, u32 rew_op, - struct sk_buff *skb) -{ -} - -static inline int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, - struct sk_buff **skb) -{ - return -EIO; -} - -static inline void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp) -{ -} - -#endif - /* Hardware initialization */ int ocelot_regfields_init(struct ocelot *ocelot, const struct reg_field *const regfields); diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index d166377d7085..d8ee15f1c7a9 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -112,8 +112,6 @@ config NET_DSA_TAG_OCELOT config NET_DSA_TAG_OCELOT_8021Q tristate "Tag driver for Ocelot family of switches, using VLAN" - depends on MSCC_OCELOT_SWITCH_LIB || \ - (MSCC_OCELOT_SWITCH_LIB=n && COMPILE_TEST) help Say Y or M if you want to enable support for tagging frames with a custom VLAN-based header. Frames that require timestamping, such as diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c index 1e4e66ea6796..d05c352f96e5 100644 --- a/net/dsa/tag_ocelot_8021q.c +++ b/net/dsa/tag_ocelot_8021q.c @@ -10,10 +10,31 @@ */ #include #include -#include -#include #include "dsa_priv.h" +static struct sk_buff *ocelot_defer_xmit(struct dsa_port *dp, + struct sk_buff *skb) +{ + struct felix_deferred_xmit_work *xmit_work; + struct felix_port *felix_port = dp->priv; + + xmit_work = kzalloc(sizeof(*xmit_work), GFP_ATOMIC); + if (!xmit_work) + return NULL; + + /* Calls felix_port_deferred_xmit in felix.c */ + kthread_init_work(&xmit_work->work, felix_port->xmit_work_fn); + /* Increase refcount so the kfree_skb in dsa_slave_xmit + * won't really free the packet. + */ + xmit_work->dp = dp; + xmit_work->skb = skb_get(skb); + + kthread_queue_work(felix_port->xmit_worker, &xmit_work->work); + + return NULL; +} + static struct sk_buff *ocelot_xmit(struct sk_buff *skb, struct net_device *netdev) { @@ -21,18 +42,9 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb, u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index); u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); - struct ocelot *ocelot = dp->ds->priv; - int port = dp->index; - u32 rew_op = 0; - rew_op = ocelot_ptp_rew_op(skb); - if (rew_op) { - if (!ocelot_can_inject(ocelot, 0)) - return NULL; - - ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb); - return NULL; - } + if (ocelot_ptp_rew_op(skb)) + return ocelot_defer_xmit(dp, skb); return dsa_8021q_xmit(skb, netdev, ETH_P_8021Q, ((pcp << VLAN_PRIO_SHIFT) | tx_vid)); -- cgit v1.2.3-71-gd317 From 6098475d4cb48d821bdf453c61118c56e26294f0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 8 Oct 2021 14:31:57 +0100 Subject: spi: Fix deadlock when adding SPI controllers on SPI buses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we have a global spi_add_lock which we take when adding new devices so that we can check that we're not trying to reuse a chip select that's already controlled. This means that if the SPI device is itself a SPI controller and triggers the instantiation of further SPI devices we trigger a deadlock as we try to register and instantiate those devices while in the process of doing so for the parent controller and hence already holding the global spi_add_lock. Since we only care about concurrency within a single SPI bus move the lock to be per controller, avoiding the deadlock. This can be easily triggered in the case of spi-mux. Reported-by: Uwe Kleine-König Signed-off-by: Mark Brown --- drivers/spi/spi.c | 17 ++++++----------- include/linux/spi/spi.h | 3 +++ 2 files changed, 9 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index aea037c65985..412a10586233 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -478,12 +478,6 @@ static LIST_HEAD(spi_controller_list); */ static DEFINE_MUTEX(board_lock); -/* - * Prevents addition of devices with same chip select and - * addition of devices below an unregistering controller. - */ -static DEFINE_MUTEX(spi_add_lock); - /** * spi_alloc_device - Allocate a new SPI device * @ctlr: Controller to which device is connected @@ -636,9 +630,9 @@ int spi_add_device(struct spi_device *spi) * chipselect **BEFORE** we call setup(), else we'll trash * its configuration. Lock against concurrent add() calls. */ - mutex_lock(&spi_add_lock); + mutex_lock(&ctlr->add_lock); status = __spi_add_device(spi); - mutex_unlock(&spi_add_lock); + mutex_unlock(&ctlr->add_lock); return status; } EXPORT_SYMBOL_GPL(spi_add_device); @@ -658,7 +652,7 @@ static int spi_add_device_locked(struct spi_device *spi) /* Set the bus ID string */ spi_dev_set_name(spi); - WARN_ON(!mutex_is_locked(&spi_add_lock)); + WARN_ON(!mutex_is_locked(&ctlr->add_lock)); return __spi_add_device(spi); } @@ -2830,6 +2824,7 @@ int spi_register_controller(struct spi_controller *ctlr) spin_lock_init(&ctlr->bus_lock_spinlock); mutex_init(&ctlr->bus_lock_mutex); mutex_init(&ctlr->io_mutex); + mutex_init(&ctlr->add_lock); ctlr->bus_lock_flag = 0; init_completion(&ctlr->xfer_completion); if (!ctlr->max_dma_len) @@ -2966,7 +2961,7 @@ void spi_unregister_controller(struct spi_controller *ctlr) /* Prevent addition of new devices, unregister existing ones */ if (IS_ENABLED(CONFIG_SPI_DYNAMIC)) - mutex_lock(&spi_add_lock); + mutex_lock(&ctlr->add_lock); device_for_each_child(&ctlr->dev, NULL, __unregister); @@ -2997,7 +2992,7 @@ void spi_unregister_controller(struct spi_controller *ctlr) mutex_unlock(&board_lock); if (IS_ENABLED(CONFIG_SPI_DYNAMIC)) - mutex_unlock(&spi_add_lock); + mutex_unlock(&ctlr->add_lock); } EXPORT_SYMBOL_GPL(spi_unregister_controller); diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 8371bca13729..6b0b686f6f90 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -531,6 +531,9 @@ struct spi_controller { /* I/O mutex */ struct mutex io_mutex; + /* Used to avoid adding the same CS twice */ + struct mutex add_lock; + /* lock and mutex for SPI bus locking */ spinlock_t bus_lock_spinlock; struct mutex bus_lock_mutex; -- cgit v1.2.3-71-gd317 From 8e141f9eb803e209714a80aa6ec073893f94c526 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 29 Sep 2021 09:12:40 +0200 Subject: block: drain file system I/O on del_gendisk Instead of delaying draining of file system I/O related items like the blk-qos queues, the integrity read workqueue and timeouts only when the request_queue is removed, do that when del_gendisk is called. This is important for SCSI where the upper level drivers that control the gendisk are separate entities, and the disk can be freed much earlier than the request_queue, or can even be unbound without tearing down the queue. Fixes: edb0872f44ec ("block: move the bdi from the request_queue to the gendisk") Reported-by: Ming Lei Signed-off-by: Christoph Hellwig Tested-by: Darrick J. Wong Link: https://lore.kernel.org/r/20210929071241.934472-5-hch@lst.de Tested-by: Yi Zhang Signed-off-by: Jens Axboe --- block/blk-core.c | 27 ++++++++++++--------------- block/blk.h | 1 + block/genhd.c | 21 +++++++++++++++++++++ include/linux/genhd.h | 1 + 4 files changed, 35 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 43f5da707d8e..4d8f5fe91588 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -49,7 +49,6 @@ #include "blk-mq.h" #include "blk-mq-sched.h" #include "blk-pm.h" -#include "blk-rq-qos.h" struct dentry *blk_debugfs_root; @@ -337,23 +336,25 @@ void blk_put_queue(struct request_queue *q) } EXPORT_SYMBOL(blk_put_queue); -void blk_set_queue_dying(struct request_queue *q) +void blk_queue_start_drain(struct request_queue *q) { - blk_queue_flag_set(QUEUE_FLAG_DYING, q); - /* * When queue DYING flag is set, we need to block new req * entering queue, so we call blk_freeze_queue_start() to * prevent I/O from crossing blk_queue_enter(). */ blk_freeze_queue_start(q); - if (queue_is_mq(q)) blk_mq_wake_waiters(q); - /* Make blk_queue_enter() reexamine the DYING flag. */ wake_up_all(&q->mq_freeze_wq); } + +void blk_set_queue_dying(struct request_queue *q) +{ + blk_queue_flag_set(QUEUE_FLAG_DYING, q); + blk_queue_start_drain(q); +} EXPORT_SYMBOL_GPL(blk_set_queue_dying); /** @@ -385,13 +386,8 @@ void blk_cleanup_queue(struct request_queue *q) */ blk_freeze_queue(q); - rq_qos_exit(q); - blk_queue_flag_set(QUEUE_FLAG_DEAD, q); - /* for synchronous bio-based driver finish in-flight integrity i/o */ - blk_flush_integrity(); - blk_sync_queue(q); if (queue_is_mq(q)) blk_mq_exit_queue(q); @@ -474,11 +470,12 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) static inline int bio_queue_enter(struct bio *bio) { - struct request_queue *q = bio->bi_bdev->bd_disk->queue; + struct gendisk *disk = bio->bi_bdev->bd_disk; + struct request_queue *q = disk->queue; while (!blk_try_enter_queue(q, false)) { if (bio->bi_opf & REQ_NOWAIT) { - if (blk_queue_dying(q)) + if (test_bit(GD_DEAD, &disk->state)) goto dead; bio_wouldblock_error(bio); return -EBUSY; @@ -495,8 +492,8 @@ static inline int bio_queue_enter(struct bio *bio) wait_event(q->mq_freeze_wq, (!q->mq_freeze_depth && blk_pm_resume_queue(false, q)) || - blk_queue_dying(q)); - if (blk_queue_dying(q)) + test_bit(GD_DEAD, &disk->state)); + if (test_bit(GD_DEAD, &disk->state)) goto dead; } diff --git a/block/blk.h b/block/blk.h index 7d2a0ba7ed21..e2ed2257709a 100644 --- a/block/blk.h +++ b/block/blk.h @@ -51,6 +51,7 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, void blk_free_flush_queue(struct blk_flush_queue *q); void blk_freeze_queue(struct request_queue *q); +void blk_queue_start_drain(struct request_queue *q); #define BIO_INLINE_VECS 4 struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs, diff --git a/block/genhd.c b/block/genhd.c index 496e8458c357..1fe816be9bcd 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -26,6 +26,7 @@ #include #include "blk.h" +#include "blk-rq-qos.h" static struct kobject *block_depr; @@ -559,6 +560,8 @@ EXPORT_SYMBOL(device_add_disk); */ void del_gendisk(struct gendisk *disk) { + struct request_queue *q = disk->queue; + might_sleep(); if (WARN_ON_ONCE(!disk_live(disk) && !(disk->flags & GENHD_FL_HIDDEN))) @@ -575,8 +578,26 @@ void del_gendisk(struct gendisk *disk) fsync_bdev(disk->part0); __invalidate_device(disk->part0, true); + /* + * Fail any new I/O. + */ + set_bit(GD_DEAD, &disk->state); set_capacity(disk, 0); + /* + * Prevent new I/O from crossing bio_queue_enter(). + */ + blk_queue_start_drain(q); + blk_mq_freeze_queue_wait(q); + + rq_qos_exit(q); + blk_sync_queue(q); + blk_flush_integrity(); + /* + * Allow using passthrough request again after the queue is torn down. + */ + blk_mq_unfreeze_queue(q); + if (!(disk->flags & GENHD_FL_HIDDEN)) { sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index c68d83c87f83..0f5315c2b5a3 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -149,6 +149,7 @@ struct gendisk { unsigned long state; #define GD_NEED_PART_SCAN 0 #define GD_READ_ONLY 1 +#define GD_DEAD 2 struct mutex open_mutex; /* open/close mutex */ unsigned open_partitions; /* number of open partitions */ -- cgit v1.2.3-71-gd317 From c41108049d143304faf06852c7181262b7b0d242 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Oct 2021 11:33:01 +0200 Subject: kyber: avoid q->disk dereferences in trace points q->disk becomes invalid after the gendisk is removed. Work around this by caching the dev_t for the tracepoints. The real fix would be to properly tear down the I/O schedulers with the gendisk, but that is a much more invasive change. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211012093301.GA27795@lst.de Tested-by: Yi Zhang Signed-off-by: Jens Axboe --- block/kyber-iosched.c | 10 ++++++---- include/trace/events/kyber.h | 19 +++++++++---------- 2 files changed, 15 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 15a8be57203d..a0ffbabfac2c 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -151,6 +151,7 @@ struct kyber_ctx_queue { struct kyber_queue_data { struct request_queue *q; + dev_t dev; /* * Each scheduling domain has a limited number of in-flight requests @@ -257,7 +258,7 @@ static int calculate_percentile(struct kyber_queue_data *kqd, } memset(buckets, 0, sizeof(kqd->latency_buckets[sched_domain][type])); - trace_kyber_latency(kqd->q, kyber_domain_names[sched_domain], + trace_kyber_latency(kqd->dev, kyber_domain_names[sched_domain], kyber_latency_type_names[type], percentile, bucket + 1, 1 << KYBER_LATENCY_SHIFT, samples); @@ -270,7 +271,7 @@ static void kyber_resize_domain(struct kyber_queue_data *kqd, depth = clamp(depth, 1U, kyber_depth[sched_domain]); if (depth != kqd->domain_tokens[sched_domain].sb.depth) { sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth); - trace_kyber_adjust(kqd->q, kyber_domain_names[sched_domain], + trace_kyber_adjust(kqd->dev, kyber_domain_names[sched_domain], depth); } } @@ -366,6 +367,7 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q) goto err; kqd->q = q; + kqd->dev = disk_devt(q->disk); kqd->cpu_latency = alloc_percpu_gfp(struct kyber_cpu_latency, GFP_KERNEL | __GFP_ZERO); @@ -774,7 +776,7 @@ kyber_dispatch_cur_domain(struct kyber_queue_data *kqd, list_del_init(&rq->queuelist); return rq; } else { - trace_kyber_throttled(kqd->q, + trace_kyber_throttled(kqd->dev, kyber_domain_names[khd->cur_domain]); } } else if (sbitmap_any_bit_set(&khd->kcq_map[khd->cur_domain])) { @@ -787,7 +789,7 @@ kyber_dispatch_cur_domain(struct kyber_queue_data *kqd, list_del_init(&rq->queuelist); return rq; } else { - trace_kyber_throttled(kqd->q, + trace_kyber_throttled(kqd->dev, kyber_domain_names[khd->cur_domain]); } } diff --git a/include/trace/events/kyber.h b/include/trace/events/kyber.h index 491098a0d8ed..bf7533f171ff 100644 --- a/include/trace/events/kyber.h +++ b/include/trace/events/kyber.h @@ -13,11 +13,11 @@ TRACE_EVENT(kyber_latency, - TP_PROTO(struct request_queue *q, const char *domain, const char *type, + TP_PROTO(dev_t dev, const char *domain, const char *type, unsigned int percentile, unsigned int numerator, unsigned int denominator, unsigned int samples), - TP_ARGS(q, domain, type, percentile, numerator, denominator, samples), + TP_ARGS(dev, domain, type, percentile, numerator, denominator, samples), TP_STRUCT__entry( __field( dev_t, dev ) @@ -30,7 +30,7 @@ TRACE_EVENT(kyber_latency, ), TP_fast_assign( - __entry->dev = disk_devt(q->disk); + __entry->dev = dev; strlcpy(__entry->domain, domain, sizeof(__entry->domain)); strlcpy(__entry->type, type, sizeof(__entry->type)); __entry->percentile = percentile; @@ -47,10 +47,9 @@ TRACE_EVENT(kyber_latency, TRACE_EVENT(kyber_adjust, - TP_PROTO(struct request_queue *q, const char *domain, - unsigned int depth), + TP_PROTO(dev_t dev, const char *domain, unsigned int depth), - TP_ARGS(q, domain, depth), + TP_ARGS(dev, domain, depth), TP_STRUCT__entry( __field( dev_t, dev ) @@ -59,7 +58,7 @@ TRACE_EVENT(kyber_adjust, ), TP_fast_assign( - __entry->dev = disk_devt(q->disk); + __entry->dev = dev; strlcpy(__entry->domain, domain, sizeof(__entry->domain)); __entry->depth = depth; ), @@ -71,9 +70,9 @@ TRACE_EVENT(kyber_adjust, TRACE_EVENT(kyber_throttled, - TP_PROTO(struct request_queue *q, const char *domain), + TP_PROTO(dev_t dev, const char *domain), - TP_ARGS(q, domain), + TP_ARGS(dev, domain), TP_STRUCT__entry( __field( dev_t, dev ) @@ -81,7 +80,7 @@ TRACE_EVENT(kyber_throttled, ), TP_fast_assign( - __entry->dev = disk_devt(q->disk); + __entry->dev = dev; strlcpy(__entry->domain, domain, sizeof(__entry->domain)); ), -- cgit v1.2.3-71-gd317 From cf6a8b1b24d675afc35a01cccd081160014a0125 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Tue, 12 Oct 2021 13:26:30 +0300 Subject: RDMA/mlx5: Remove iova from struct mlx5_core_mkey iova is already stored in ibmr->iova, no need to store it here. Signed-off-by: Aharon Landau Reviewed-by: Shay Drory Acked-by: Michael S. Tsirkin Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/devx.c | 1 - drivers/infiniband/hw/mlx5/mr.c | 16 ++++++++-------- drivers/infiniband/hw/mlx5/odp.c | 8 ++++---- drivers/net/ethernet/mellanox/mlx5/core/mr.c | 1 - drivers/vdpa/mlx5/core/resources.c | 1 - include/linux/mlx5/driver.h | 1 - 6 files changed, 12 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index e95967aefe78..00ad24044c3a 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1303,7 +1303,6 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj, mkey->key = mlx5_idx_to_mkey( MLX5_GET(create_mkey_out, out, mkey_index)) | key; mkey->type = MLX5_MKEY_INDIRECT_DEVX; - mkey->iova = MLX5_GET64(mkc, mkc, start_addr); mkey->size = MLX5_GET64(mkc, mkc, len); mkey->pd = MLX5_GET(mkc, mkc, pd); devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 3be36ebbf67a..6815500cfdaa 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -911,12 +911,13 @@ static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev, } static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, - u64 length, int access_flags) + u64 length, int access_flags, u64 iova) { mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; mr->ibmr.length = length; mr->ibmr.device = &dev->ib_dev; + mr->ibmr.iova = iova; mr->access_flags = access_flags; } @@ -974,11 +975,10 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, mr->ibmr.pd = pd; mr->umem = umem; - mr->mmkey.iova = iova; mr->mmkey.size = umem->length; mr->mmkey.pd = to_mpd(pd)->pdn; mr->page_shift = order_base_2(page_size); - set_mr_fields(dev, mr, umem->length, access_flags); + set_mr_fields(dev, mr, umem->length, access_flags, iova); return mr; } @@ -1088,7 +1088,7 @@ static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr, wr->pd = mr->ibmr.pd; wr->mkey = mr->mmkey.key; wr->length = mr->mmkey.size; - wr->virt_addr = mr->mmkey.iova; + wr->virt_addr = mr->ibmr.iova; wr->access_flags = mr->access_flags; wr->page_shift = mr->page_shift; wr->xlt_size = sg->length; @@ -1341,7 +1341,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, mr->mmkey.type = MLX5_MKEY_MR; mr->desc_size = sizeof(struct mlx5_mtt); mr->umem = umem; - set_mr_fields(dev, mr, umem->length, access_flags); + set_mr_fields(dev, mr, umem->length, access_flags, iova); kvfree(in); mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); @@ -1388,7 +1388,7 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, kfree(in); - set_mr_fields(dev, mr, length, acc); + set_mr_fields(dev, mr, length, acc, start_addr); return &mr->ibmr; @@ -1763,7 +1763,7 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd, } mr->ibmr.length = new_umem->length; - mr->mmkey.iova = iova; + mr->ibmr.iova = iova; mr->mmkey.size = new_umem->length; mr->page_shift = order_base_2(page_size); mr->umem = new_umem; @@ -1834,7 +1834,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, mr->umem = NULL; atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages); - return create_real_mr(new_pd, umem, mr->mmkey.iova, + return create_real_mr(new_pd, umem, mr->ibmr.iova, new_access_flags); } diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index d0d98e584ebc..d119ba3101a3 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -430,7 +430,7 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, mr->umem = &odp->umem; mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; - mr->mmkey.iova = idx * MLX5_IMR_MTT_SIZE; + mr->ibmr.iova = idx * MLX5_IMR_MTT_SIZE; mr->parent = imr; odp->private = mr; @@ -500,7 +500,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, } imr->ibmr.pd = &pd->ibpd; - imr->mmkey.iova = 0; + imr->ibmr.iova = 0; imr->umem = &umem_odp->umem; imr->ibmr.lkey = imr->mmkey.key; imr->ibmr.rkey = imr->mmkey.key; @@ -738,7 +738,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - if (unlikely(io_virt < mr->mmkey.iova)) + if (unlikely(io_virt < mr->ibmr.iova)) return -EFAULT; if (mr->umem->is_dmabuf) @@ -747,7 +747,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, if (!odp->is_implicit_odp) { u64 user_va; - if (check_add_overflow(io_virt - mr->mmkey.iova, + if (check_add_overflow(io_virt - mr->ibmr.iova, (u64)odp->umem.address, &user_va)) return -EFAULT; if (unlikely(user_va >= ib_umem_end(odp) || diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 174f71ed5280..d239d559994f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -52,7 +52,6 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); - mkey->iova = MLX5_GET64(mkc, mkc, start_addr); mkey->size = MLX5_GET64(mkc, mkc, len); mkey->key = (u32)mlx5_mkey_variant(mkey->key) | mlx5_idx_to_mkey(mkey_index); mkey->pd = MLX5_GET(mkc, mkc, pd); diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c index 15e266d0e27a..14d4314cdc29 100644 --- a/drivers/vdpa/mlx5/core/resources.c +++ b/drivers/vdpa/mlx5/core/resources.c @@ -215,7 +215,6 @@ int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mk mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); - mkey->iova = MLX5_GET64(mkc, mkc, start_addr); mkey->size = MLX5_GET64(mkc, mkc, len); mkey->key |= mlx5_idx_to_mkey(mkey_index); mkey->pd = MLX5_GET(mkc, mkc, pd); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e23417424373..669904f9986e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -364,7 +364,6 @@ enum { }; struct mlx5_core_mkey { - u64 iova; u64 size; u32 key; u32 pd; -- cgit v1.2.3-71-gd317 From 062fd731e51ee29ba745b2fd1c7ac87dd460d4ca Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Tue, 12 Oct 2021 13:26:31 +0300 Subject: RDMA/mlx5: Remove size from struct mlx5_core_mkey mkey->size is already stored in ibmr->length, no need to store it here. Signed-off-by: Aharon Landau Reviewed-by: Shay Drory Acked-by: Michael S. Tsirkin Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/devx.c | 1 - drivers/infiniband/hw/mlx5/mr.c | 5 ++--- drivers/net/ethernet/mellanox/mlx5/core/mr.c | 1 - drivers/vdpa/mlx5/core/resources.c | 1 - include/linux/mlx5/driver.h | 1 - 5 files changed, 2 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 00ad24044c3a..8f6e1350cd37 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1303,7 +1303,6 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj, mkey->key = mlx5_idx_to_mkey( MLX5_GET(create_mkey_out, out, mkey_index)) | key; mkey->type = MLX5_MKEY_INDIRECT_DEVX; - mkey->size = MLX5_GET64(mkc, mkc, len); mkey->pd = MLX5_GET(mkc, mkc, pd); devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size); init_waitqueue_head(&mkey->wait); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 6815500cfdaa..1b1367c87a6b 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -975,7 +975,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, mr->ibmr.pd = pd; mr->umem = umem; - mr->mmkey.size = umem->length; mr->mmkey.pd = to_mpd(pd)->pdn; mr->page_shift = order_base_2(page_size); set_mr_fields(dev, mr, umem->length, access_flags, iova); @@ -1087,7 +1086,7 @@ static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr, wr->wr.opcode = MLX5_IB_WR_UMR; wr->pd = mr->ibmr.pd; wr->mkey = mr->mmkey.key; - wr->length = mr->mmkey.size; + wr->length = mr->ibmr.length; wr->virt_addr = mr->ibmr.iova; wr->access_flags = mr->access_flags; wr->page_shift = mr->page_shift; @@ -1764,7 +1763,7 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd, mr->ibmr.length = new_umem->length; mr->ibmr.iova = iova; - mr->mmkey.size = new_umem->length; + mr->ibmr.length = new_umem->length; mr->page_shift = order_base_2(page_size); mr->umem = new_umem; err = mlx5_ib_update_mr_pas(mr, upd_flags); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index d239d559994f..b5dd44944265 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -52,7 +52,6 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); - mkey->size = MLX5_GET64(mkc, mkc, len); mkey->key = (u32)mlx5_mkey_variant(mkey->key) | mlx5_idx_to_mkey(mkey_index); mkey->pd = MLX5_GET(mkc, mkc, pd); init_waitqueue_head(&mkey->wait); diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c index 14d4314cdc29..d3d8b8b4e377 100644 --- a/drivers/vdpa/mlx5/core/resources.c +++ b/drivers/vdpa/mlx5/core/resources.c @@ -215,7 +215,6 @@ int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mk mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); - mkey->size = MLX5_GET64(mkc, mkc, len); mkey->key |= mlx5_idx_to_mkey(mkey_index); mkey->pd = MLX5_GET(mkc, mkc, pd); return 0; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 669904f9986e..ff1e991314e2 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -364,7 +364,6 @@ enum { }; struct mlx5_core_mkey { - u64 size; u32 key; u32 pd; u32 type; -- cgit v1.2.3-71-gd317 From c64674168b6a2f293e92caf33c917ccf10886801 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Tue, 12 Oct 2021 13:26:32 +0300 Subject: RDMA/mlx5: Remove pd from struct mlx5_core_mkey There is no read of mkey->pd, only write. Remove it. Signed-off-by: Aharon Landau Reviewed-by: Shay Drory Acked-by: Michael S. Tsirkin Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/devx.c | 1 - drivers/infiniband/hw/mlx5/mr.c | 3 --- drivers/net/ethernet/mellanox/mlx5/core/mr.c | 3 --- drivers/vdpa/mlx5/core/resources.c | 3 --- include/linux/mlx5/driver.h | 1 - 5 files changed, 11 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 8f6e1350cd37..5322d787c094 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1303,7 +1303,6 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj, mkey->key = mlx5_idx_to_mkey( MLX5_GET(create_mkey_out, out, mkey_index)) | key; mkey->type = MLX5_MKEY_INDIRECT_DEVX; - mkey->pd = MLX5_GET(mkc, mkc, pd); devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size); init_waitqueue_head(&mkey->wait); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 1b1367c87a6b..9d7f1cadaa76 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -975,7 +975,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, mr->ibmr.pd = pd; mr->umem = umem; - mr->mmkey.pd = to_mpd(pd)->pdn; mr->page_shift = order_base_2(page_size); set_mr_fields(dev, mr, umem->length, access_flags, iova); @@ -1708,7 +1707,6 @@ static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, return err; mr->access_flags = access_flags; - mr->mmkey.pd = to_mpd(pd)->pdn; return 0; } @@ -1753,7 +1751,6 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd, if (flags & IB_MR_REREG_PD) { mr->ibmr.pd = pd; - mr->mmkey.pd = to_mpd(pd)->pdn; upd_flags |= MLX5_IB_UPD_XLT_PD; } if (flags & IB_MR_REREG_ACCESS) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index b5dd44944265..6e99fd166f98 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -41,7 +41,6 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, { u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {}; u32 mkey_index; - void *mkc; int err; MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); @@ -50,10 +49,8 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, if (err) return err; - mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); mkey->key = (u32)mlx5_mkey_variant(mkey->key) | mlx5_idx_to_mkey(mkey_index); - mkey->pd = MLX5_GET(mkc, mkc, pd); init_waitqueue_head(&mkey->wait); mlx5_core_dbg(dev, "out 0x%x, mkey 0x%x\n", mkey_index, mkey->key); diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c index d3d8b8b4e377..72b2d80e75b0 100644 --- a/drivers/vdpa/mlx5/core/resources.c +++ b/drivers/vdpa/mlx5/core/resources.c @@ -203,7 +203,6 @@ int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mk { u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {}; u32 mkey_index; - void *mkc; int err; MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); @@ -213,10 +212,8 @@ int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mk if (err) return err; - mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); mkey->key |= mlx5_idx_to_mkey(mkey_index); - mkey->pd = MLX5_GET(mkc, mkc, pd); return 0; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ff1e991314e2..f0ce7d4dc4ff 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -365,7 +365,6 @@ enum { struct mlx5_core_mkey { u32 key; - u32 pd; u32 type; struct wait_queue_head wait; refcount_t usecount; -- cgit v1.2.3-71-gd317 From 83fec3f12a5904b62330fd1a89af6d892afc387e Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Tue, 12 Oct 2021 13:26:33 +0300 Subject: RDMA/mlx5: Replace struct mlx5_core_mkey by u32 key In mlx5_core and vdpa there is no use of mlx5_core_mkey members except for the key itself. As preparation for moving mlx5_core_mkey to mlx5_ib, the occurrences of struct mlx5_core_mkey in all modules except for mlx5_ib are replaced by a u32 key. Signed-off-by: Aharon Landau Reviewed-by: Shay Drory Acked-by: Michael S. Tsirkin Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 21 ++++++++++++++------- drivers/infiniband/hw/mlx5/odp.c | 2 +- .../ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 6 +++--- .../ethernet/mellanox/mlx5/core/diag/fw_tracer.h | 2 +- .../ethernet/mellanox/mlx5/core/diag/rsc_dump.c | 10 +++++----- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en/trap.c | 2 +- .../net/ethernet/mellanox/mlx5/core/en_common.c | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 13 ++++++------- .../net/ethernet/mellanox/mlx5/core/fpga/conn.c | 10 +++++----- .../net/ethernet/mellanox/mlx5/core/fpga/core.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/mr.c | 22 ++++++++++------------ .../mellanox/mlx5/core/steering/dr_icm_pool.c | 10 +++++----- .../ethernet/mellanox/mlx5/core/steering/dr_send.c | 11 +++++------ .../mellanox/mlx5/core/steering/dr_types.h | 2 +- drivers/vdpa/mlx5/core/mlx5_vdpa.h | 8 ++++---- drivers/vdpa/mlx5/core/mr.c | 8 ++++---- drivers/vdpa/mlx5/core/resources.c | 8 ++++---- drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 +- include/linux/mlx5/driver.h | 14 ++++++-------- 21 files changed, 82 insertions(+), 81 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 9d7f1cadaa76..e2f020472ae2 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -104,8 +104,14 @@ static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, u32 *in, int inlen) { + int ret; + assign_mkey_variant(dev, mkey, in); - return mlx5_core_create_mkey(dev->mdev, mkey, in, inlen); + ret = mlx5_core_create_mkey(dev->mdev, &mkey->key, in, inlen); + if (!ret) + init_waitqueue_head(&mkey->wait); + + return ret; } static int @@ -133,7 +139,7 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key))); - return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); + return mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key); } static void create_mkey_callback(int status, struct mlx5_async_work *context) @@ -260,10 +266,11 @@ static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent) goto free_in; } - err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey, in, inlen); + err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey.key, in, inlen); if (err) goto free_mr; + init_waitqueue_head(&mr->mmkey.wait); mr->mmkey.type = MLX5_MKEY_MR; WRITE_ONCE(ent->dev->cache.last_add, jiffies); spin_lock_irq(&ent->lock); @@ -290,7 +297,7 @@ static void remove_cache_mr_locked(struct mlx5_cache_ent *ent) ent->available_mrs--; ent->total_mrs--; spin_unlock_irq(&ent->lock); - mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey); + mlx5_core_destroy_mkey(ent->dev->mdev, mr->mmkey.key); kfree(mr); spin_lock_irq(&ent->lock); } @@ -658,7 +665,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) ent->available_mrs--; ent->total_mrs--; spin_unlock_irq(&ent->lock); - mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); + mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key); } list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { @@ -2326,7 +2333,7 @@ int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) return 0; free_mkey: - mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); + mlx5_core_destroy_mkey(dev->mdev, mw->mmkey.key); free: kfree(in); return err; @@ -2345,7 +2352,7 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw) */ mlx5r_deref_wait_odp_mkey(&mmw->mmkey); - return mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); + return mlx5_core_destroy_mkey(dev->mdev, mmw->mmkey.key); } int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index d119ba3101a3..a654367af056 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -909,7 +909,7 @@ next_mr: pklm = (struct mlx5_klm *)MLX5_ADDR_OF(query_mkey_out, out, bsf0_klm0_pas_mtt0_1); - ret = mlx5_core_query_mkey(dev->mdev, mmkey, out, outlen); + ret = mlx5_core_query_mkey(dev->mdev, mmkey->key, out, outlen); if (ret) goto end; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index f9cf9fb31547..02db3148240a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -745,7 +745,7 @@ static int mlx5_fw_tracer_set_mtrc_conf(struct mlx5_fw_tracer *tracer) MLX5_SET(mtrc_conf, in, trace_mode, TRACE_TO_MEMORY); MLX5_SET(mtrc_conf, in, log_trace_buffer_size, ilog2(TRACER_BUFFER_PAGE_NUM)); - MLX5_SET(mtrc_conf, in, trace_mkey, tracer->buff.mkey.key); + MLX5_SET(mtrc_conf, in, trace_mkey, tracer->buff.mkey); err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MTRC_CONF, 0, 1); @@ -1028,7 +1028,7 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer) err_notifier_unregister: mlx5_eq_notifier_unregister(dev, &tracer->nb); - mlx5_core_destroy_mkey(dev, &tracer->buff.mkey); + mlx5_core_destroy_mkey(dev, tracer->buff.mkey); err_dealloc_pd: mlx5_core_dealloc_pd(dev, tracer->buff.pdn); err_cancel_work: @@ -1051,7 +1051,7 @@ void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer) if (tracer->owner) mlx5_fw_tracer_ownership_release(tracer); - mlx5_core_destroy_mkey(tracer->dev, &tracer->buff.mkey); + mlx5_core_destroy_mkey(tracer->dev, tracer->buff.mkey); mlx5_core_dealloc_pd(tracer->dev, tracer->buff.pdn); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h index 97252a85d65e..4762b55b0b0e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h @@ -89,7 +89,7 @@ struct mlx5_fw_tracer { void *log_buf; dma_addr_t dma; u32 size; - struct mlx5_core_mkey mkey; + u32 mkey; u32 consumer_index; } buff; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c index ed4fb79b4db7..538adab6878b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c @@ -30,7 +30,7 @@ static const char *const mlx5_rsc_sgmt_name[] = { struct mlx5_rsc_dump { u32 pdn; - struct mlx5_core_mkey mkey; + u32 mkey; u16 fw_segment_type[MLX5_SGMT_TYPE_NUM]; }; @@ -89,7 +89,7 @@ static int mlx5_rsc_dump_trigger(struct mlx5_core_dev *dev, struct mlx5_rsc_dump return -ENOMEM; in_seq_num = MLX5_GET(resource_dump, cmd->cmd, seq_num); - MLX5_SET(resource_dump, cmd->cmd, mkey, rsc_dump->mkey.key); + MLX5_SET(resource_dump, cmd->cmd, mkey, rsc_dump->mkey); MLX5_SET64(resource_dump, cmd->cmd, address, dma); err = mlx5_core_access_reg(dev, cmd->cmd, sizeof(cmd->cmd), cmd->cmd, @@ -202,7 +202,7 @@ free_page: } static int mlx5_rsc_dump_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, - struct mlx5_core_mkey *mkey) + u32 *mkey) { int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); void *mkc; @@ -276,7 +276,7 @@ int mlx5_rsc_dump_init(struct mlx5_core_dev *dev) return err; destroy_mkey: - mlx5_core_destroy_mkey(dev, &rsc_dump->mkey); + mlx5_core_destroy_mkey(dev, rsc_dump->mkey); free_pd: mlx5_core_dealloc_pd(dev, rsc_dump->pdn); return err; @@ -287,6 +287,6 @@ void mlx5_rsc_dump_cleanup(struct mlx5_core_dev *dev) if (IS_ERR_OR_NULL(dev->rsc_dump)) return; - mlx5_core_destroy_mkey(dev, &dev->rsc_dump->mkey); + mlx5_core_destroy_mkey(dev, dev->rsc_dump->mkey); mlx5_core_dealloc_pd(dev, dev->rsc_dump->pdn); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 03a7a4ce5cd5..7761daa25f63 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -665,7 +665,7 @@ struct mlx5e_rq { u8 wq_type; u32 rqn; struct mlx5_core_dev *mdev; - struct mlx5_core_mkey umr_mkey; + u32 umr_mkey; struct mlx5e_dma_info wqe_overflow; /* XDP read-mostly */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 3a86f66d1295..18d542b1c5cb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -682,7 +682,7 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, c->tstamp = &priv->tstamp; c->pdev = mlx5_core_dma_dev(priv->mdev); c->netdev = priv->netdev; - c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key); + c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey); c->num_tc = mlx5e_get_dcb_num_tc(params); c->stats = &priv->ptp_stats.ch; c->lag_port = lag_port; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index d54607a42740..a55b066746cb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -137,7 +137,7 @@ static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv) t->tstamp = &priv->tstamp; t->pdev = mlx5_core_dma_dev(priv->mdev); t->netdev = priv->netdev; - t->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key); + t->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey); t->stats = &priv->trap_stats.ch; netif_napi_add(netdev, &t->napi, mlx5e_trap_napi_poll, 64); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 84eb7201c142..c0f409c195bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -47,7 +47,7 @@ void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc) } static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, - struct mlx5_core_mkey *mkey) + u32 *mkey) { int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); void *mkc; @@ -108,7 +108,7 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) return 0; err_destroy_mkey: - mlx5_core_destroy_mkey(mdev, &res->mkey); + mlx5_core_destroy_mkey(mdev, res->mkey); err_dealloc_transport_domain: mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); err_dealloc_pd: @@ -121,7 +121,7 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs; mlx5_free_bfreg(mdev, &res->bfreg); - mlx5_core_destroy_mkey(mdev, &res->mkey); + mlx5_core_destroy_mkey(mdev, res->mkey); mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); mlx5_core_dealloc_pd(mdev, res->pdn); memset(res, 0, sizeof(*res)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 09c8b71b186c..63076d0e8b63 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -234,8 +234,7 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) } static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, - u64 npages, u8 page_shift, - struct mlx5_core_mkey *umr_mkey, + u64 npages, u8 page_shift, u32 *umr_mkey, dma_addr_t filler_addr) { struct mlx5_mtt *mtt; @@ -455,7 +454,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, err = mlx5e_create_rq_umr_mkey(mdev, rq); if (err) goto err_rq_drop_page; - rq->mkey_be = cpu_to_be32(rq->umr_mkey.key); + rq->mkey_be = cpu_to_be32(rq->umr_mkey); err = mlx5e_rq_alloc_mpwqe_info(rq, node); if (err) @@ -487,7 +486,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, if (err) goto err_rq_frags; - rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey.key); + rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey); } if (xsk) { @@ -574,7 +573,7 @@ err_free_by_rq_type: case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: kvfree(rq->mpwqe.info); err_rq_mkey: - mlx5_core_destroy_mkey(mdev, &rq->umr_mkey); + mlx5_core_destroy_mkey(mdev, rq->umr_mkey); err_rq_drop_page: mlx5e_free_mpwqe_rq_drop_page(rq); break; @@ -607,7 +606,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: kvfree(rq->mpwqe.info); - mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey); + mlx5_core_destroy_mkey(rq->mdev, rq->umr_mkey); mlx5e_free_mpwqe_rq_drop_page(rq); break; default: /* MLX5_WQ_TYPE_CYCLIC */ @@ -1991,7 +1990,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->cpu = cpu; c->pdev = mlx5_core_dma_dev(priv->mdev); c->netdev = priv->netdev; - c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key); + c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey); c->num_tc = mlx5e_get_dcb_num_tc(params); c->xdp = !!params->xdp_prog; c->stats = &priv->channel_stats[ix].ch; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index 306279b7f9e7..12abe991583a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -115,7 +115,7 @@ static int mlx5_fpga_conn_post_recv(struct mlx5_fpga_conn *conn, ix = conn->qp.rq.pc & (conn->qp.rq.size - 1); data = mlx5_wq_cyc_get_wqe(&conn->qp.wq.rq, ix); data->byte_count = cpu_to_be32(buf->sg[0].size); - data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey.key); + data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey); data->addr = cpu_to_be64(buf->sg[0].dma_addr); conn->qp.rq.pc++; @@ -155,7 +155,7 @@ static void mlx5_fpga_conn_post_send(struct mlx5_fpga_conn *conn, if (!buf->sg[sgi].data) break; data->byte_count = cpu_to_be32(buf->sg[sgi].size); - data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey.key); + data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey); data->addr = cpu_to_be64(buf->sg[sgi].dma_addr); data++; size++; @@ -221,7 +221,7 @@ static int mlx5_fpga_conn_post_recv_buf(struct mlx5_fpga_conn *conn) } static int mlx5_fpga_conn_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, - struct mlx5_core_mkey *mkey) + u32 *mkey) { int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); void *mkc; @@ -978,7 +978,7 @@ int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev) mlx5_fpga_err(fdev, "create mkey failed, %d\n", err); goto err_dealloc_pd; } - mlx5_fpga_dbg(fdev, "Created mkey 0x%x\n", fdev->conn_res.mkey.key); + mlx5_fpga_dbg(fdev, "Created mkey 0x%x\n", fdev->conn_res.mkey); return 0; @@ -994,7 +994,7 @@ out: void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev) { - mlx5_core_destroy_mkey(fdev->mdev, &fdev->conn_res.mkey); + mlx5_core_destroy_mkey(fdev->mdev, fdev->conn_res.mkey); mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn); mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar); mlx5_nic_vport_disable_roce(fdev->mdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h index 52c9dee91ea4..2a984e82ae16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h @@ -54,7 +54,7 @@ struct mlx5_fpga_device { /* QP Connection resources */ struct { u32 pdn; - struct mlx5_core_mkey mkey; + u32 mkey; struct mlx5_uars_page *uar; } conn_res; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 6e99fd166f98..f099a087400e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -35,9 +35,8 @@ #include #include "mlx5_core.h" -int mlx5_core_create_mkey(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey, - u32 *in, int inlen) +int mlx5_core_create_mkey(struct mlx5_core_dev *dev, u32 *mkey, u32 *in, + int inlen) { u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {}; u32 mkey_index; @@ -50,33 +49,32 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, return err; mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); - mkey->key = (u32)mlx5_mkey_variant(mkey->key) | mlx5_idx_to_mkey(mkey_index); - init_waitqueue_head(&mkey->wait); + *mkey = MLX5_GET(create_mkey_in, in, memory_key_mkey_entry.mkey_7_0) | + mlx5_idx_to_mkey(mkey_index); - mlx5_core_dbg(dev, "out 0x%x, mkey 0x%x\n", mkey_index, mkey->key); + mlx5_core_dbg(dev, "out 0x%x, mkey 0x%x\n", mkey_index, *mkey); return 0; } EXPORT_SYMBOL(mlx5_core_create_mkey); -int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey) +int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, u32 mkey) { u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {}; MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY); - MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key)); + MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey)); return mlx5_cmd_exec_in(dev, destroy_mkey, in); } EXPORT_SYMBOL(mlx5_core_destroy_mkey); -int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - u32 *out, int outlen) +int mlx5_core_query_mkey(struct mlx5_core_dev *dev, u32 mkey, u32 *out, + int outlen) { u32 in[MLX5_ST_SZ_DW(query_mkey_in)] = {}; memset(out, 0, outlen); MLX5_SET(query_mkey_in, in, opcode, MLX5_CMD_OP_QUERY_MKEY); - MLX5_SET(query_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key)); + MLX5_SET(query_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey)); return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } EXPORT_SYMBOL(mlx5_core_query_mkey); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c index 66c24767e3b0..7f6fd9c5e371 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c @@ -24,7 +24,7 @@ struct mlx5dr_icm_dm { }; struct mlx5dr_icm_mr { - struct mlx5_core_mkey mkey; + u32 mkey; struct mlx5dr_icm_dm dm; struct mlx5dr_domain *dmn; size_t length; @@ -33,7 +33,7 @@ struct mlx5dr_icm_mr { static int dr_icm_create_dm_mkey(struct mlx5_core_dev *mdev, u32 pd, u64 length, u64 start_addr, int mode, - struct mlx5_core_mkey *mkey) + u32 *mkey) { u32 inlen = MLX5_ST_SZ_BYTES(create_mkey_in); u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {}; @@ -116,7 +116,7 @@ dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool) return icm_mr; free_mkey: - mlx5_core_destroy_mkey(mdev, &icm_mr->mkey); + mlx5_core_destroy_mkey(mdev, icm_mr->mkey); free_dm: mlx5_dm_sw_icm_dealloc(mdev, icm_mr->dm.type, icm_mr->dm.length, 0, icm_mr->dm.addr, icm_mr->dm.obj_id); @@ -130,7 +130,7 @@ static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr) struct mlx5_core_dev *mdev = icm_mr->dmn->mdev; struct mlx5dr_icm_dm *dm = &icm_mr->dm; - mlx5_core_destroy_mkey(mdev, &icm_mr->mkey); + mlx5_core_destroy_mkey(mdev, icm_mr->mkey); mlx5_dm_sw_icm_dealloc(mdev, dm->type, dm->length, 0, dm->addr, dm->obj_id); kvfree(icm_mr); @@ -252,7 +252,7 @@ dr_icm_chunk_create(struct mlx5dr_icm_pool *pool, offset = mlx5dr_icm_pool_dm_type_to_entry_size(pool->icm_type) * seg; - chunk->rkey = buddy_mem_pool->icm_mr->mkey.key; + chunk->rkey = buddy_mem_pool->icm_mr->mkey; chunk->mr_addr = offset; chunk->icm_addr = (uintptr_t)buddy_mem_pool->icm_mr->icm_start_addr + offset; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index bfb14b4b1906..00aef47d7682 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -350,7 +350,7 @@ static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring, send_info->read.length = send_info->write.length; /* Read into the same write area */ send_info->read.addr = (uintptr_t)send_info->write.addr; - send_info->read.lkey = send_ring->mr->mkey.key; + send_info->read.lkey = send_ring->mr->mkey; if (send_ring->pending_wqe % send_ring->signal_th == 0) send_info->read.send_flags = IB_SEND_SIGNALED; @@ -388,7 +388,7 @@ static int dr_postsend_icm_data(struct mlx5dr_domain *dmn, (void *)(uintptr_t)send_info->write.addr, send_info->write.length); send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset; - send_info->write.lkey = send_ring->mr->mkey.key; + send_info->write.lkey = send_ring->mr->mkey; } send_ring->tx_head++; @@ -848,8 +848,7 @@ static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq) kfree(cq); } -static int -dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, struct mlx5_core_mkey *mkey) +static int dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey) { u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {}; void *mkc; @@ -908,7 +907,7 @@ static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev, static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr) { - mlx5_core_destroy_mkey(mdev, &mr->mkey); + mlx5_core_destroy_mkey(mdev, mr->mkey); dma_unmap_single(mlx5_core_dma_dev(mdev), mr->dma_addr, mr->size, DMA_BIDIRECTIONAL); kfree(mr); @@ -1039,7 +1038,7 @@ int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn) send_info.write.lkey = 0; /* Using the sync_mr in order to write/read */ send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr; - send_info.rkey = send_ring->sync_mr->mkey.key; + send_info.rkey = send_ring->sync_mr->mkey; for (i = 0; i < num_of_sends_req; i++) { ret = dr_postsend_icm_data(dmn, &send_info); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index b20e8aabb861..6c67185c05c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -1275,7 +1275,7 @@ struct mlx5dr_cq { struct mlx5dr_mr { struct mlx5_core_dev *mdev; - struct mlx5_core_mkey mkey; + u32 mkey; dma_addr_t dma_addr; void *addr; size_t size; diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index 01a848adf590..3163b313a470 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -15,7 +15,7 @@ struct mlx5_vdpa_direct_mr { u64 start; u64 end; u32 perm; - struct mlx5_core_mkey mr; + u32 mr; struct sg_table sg_head; int log_size; int nsg; @@ -25,7 +25,7 @@ struct mlx5_vdpa_direct_mr { }; struct mlx5_vdpa_mr { - struct mlx5_core_mkey mkey; + u32 mkey; /* list of direct MRs descendants of this indirect mr */ struct list_head head; @@ -99,9 +99,9 @@ int mlx5_vdpa_alloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 *tdn); void mlx5_vdpa_dealloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 tdn); int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev); void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev); -int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey, u32 *in, +int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, u32 *mkey, u32 *in, int inlen); -int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey); +int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, u32 mkey); int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, bool *change_map); int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb); diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index ff010c6d0cd3..a639b9208d41 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -88,7 +88,7 @@ static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) { - mlx5_vdpa_destroy_mkey(mvdev, &mr->mr); + mlx5_vdpa_destroy_mkey(mvdev, mr->mr); } static u64 map_start(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) @@ -162,7 +162,7 @@ again: } if (preve == dmr->start) { - klm->key = cpu_to_be32(dmr->mr.key); + klm->key = cpu_to_be32(dmr->mr); klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start)); preve = dmr->end; } else { @@ -217,7 +217,7 @@ static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey) { - mlx5_vdpa_destroy_mkey(mvdev, &mkey->mkey); + mlx5_vdpa_destroy_mkey(mvdev, mkey->mkey); } static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr, @@ -449,7 +449,7 @@ static int create_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) { - mlx5_vdpa_destroy_mkey(mvdev, &mr->mkey); + mlx5_vdpa_destroy_mkey(mvdev, mr->mkey); } static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src) diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c index 72b2d80e75b0..9800f9bec225 100644 --- a/drivers/vdpa/mlx5/core/resources.c +++ b/drivers/vdpa/mlx5/core/resources.c @@ -198,7 +198,7 @@ void mlx5_vdpa_dealloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 tdn) mlx5_cmd_exec_in(mvdev->mdev, dealloc_transport_domain, in); } -int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey, u32 *in, +int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, u32 *mkey, u32 *in, int inlen) { u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {}; @@ -213,17 +213,17 @@ int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mk return err; mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); - mkey->key |= mlx5_idx_to_mkey(mkey_index); + *mkey |= mlx5_idx_to_mkey(mkey_index); return 0; } -int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey) +int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, u32 mkey) { u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {}; MLX5_SET(destroy_mkey_in, in, uid, mvdev->res.uid); MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY); - MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key)); + MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey)); return mlx5_cmd_exec_in(mvdev->mdev, destroy_mkey, in); } diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index bd56de7484dc..5c7d2a953dbd 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -865,7 +865,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); - MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey.key); + MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey); MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id); MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size); MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f0ce7d4dc4ff..bd99f713720b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -650,7 +650,7 @@ struct mlx5e_resources { struct mlx5e_hw_objs { u32 pdn; struct mlx5_td td; - struct mlx5_core_mkey mkey; + u32 mkey; struct mlx5_sq_bfreg bfreg; } hw_objs; struct devlink_port dl_port; @@ -1021,13 +1021,11 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, gfp_t flags, int npages); void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, struct mlx5_cmd_mailbox *head); -int mlx5_core_create_mkey(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey, - u32 *in, int inlen); -int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey); -int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - u32 *out, int outlen); +int mlx5_core_create_mkey(struct mlx5_core_dev *dev, u32 *mkey, u32 *in, + int inlen); +int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, u32 mkey); +int mlx5_core_query_mkey(struct mlx5_core_dev *dev, u32 mkey, u32 *out, + int outlen); int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn); int mlx5_pagealloc_init(struct mlx5_core_dev *dev); -- cgit v1.2.3-71-gd317 From 4123bfb0b28b77b944360be8c758b1a0974e96ad Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Tue, 12 Oct 2021 13:26:34 +0300 Subject: RDMA/mlx5: Move struct mlx5_core_mkey to mlx5_ib Move mlx5_core_mkey struct to mlx5_ib, as the mlx5_core doesn't use it at this point. Signed-off-by: Aharon Landau Reviewed-by: Shay Drory Acked-by: Michael S. Tsirkin Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/devx.c | 2 +- drivers/infiniband/hw/mlx5/mlx5_ib.h | 25 +++++++++++++++++++------ drivers/infiniband/hw/mlx5/mr.c | 12 +++++------- drivers/infiniband/hw/mlx5/odp.c | 8 ++++---- include/linux/mlx5/driver.h | 13 ------------- 5 files changed, 29 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 5322d787c094..d07a21a13ac0 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1293,7 +1293,7 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj, void *in, void *out) { struct mlx5_ib_devx_mr *devx_mr = &obj->devx_mr; - struct mlx5_core_mkey *mkey; + struct mlx5_ib_mkey *mkey; void *mkc; u8 key; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index bf20a388eabe..8f754c52d469 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -619,6 +619,19 @@ struct mlx5_user_mmap_entry { u32 page_idx; }; +enum mlx5_mkey_type { + MLX5_MKEY_MR = 1, + MLX5_MKEY_MW, + MLX5_MKEY_INDIRECT_DEVX, +}; + +struct mlx5_ib_mkey { + u32 key; + enum mlx5_mkey_type type; + struct wait_queue_head wait; + refcount_t usecount; +}; + #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE) #define MLX5_IB_DM_MEMIC_ALLOWED_ACCESS (IB_ACCESS_LOCAL_WRITE |\ @@ -637,7 +650,7 @@ struct mlx5_user_mmap_entry { struct mlx5_ib_mr { struct ib_mr ibmr; - struct mlx5_core_mkey mmkey; + struct mlx5_ib_mkey mmkey; /* User MR data */ struct mlx5_cache_ent *cache_ent; @@ -713,12 +726,12 @@ static inline bool is_dmabuf_mr(struct mlx5_ib_mr *mr) struct mlx5_ib_mw { struct ib_mw ibmw; - struct mlx5_core_mkey mmkey; + struct mlx5_ib_mkey mmkey; int ndescs; }; struct mlx5_ib_devx_mr { - struct mlx5_core_mkey mmkey; + struct mlx5_ib_mkey mmkey; int ndescs; }; @@ -1579,7 +1592,7 @@ static inline bool mlx5_ib_can_reconfig_with_umr(struct mlx5_ib_dev *dev, } static inline int mlx5r_store_odp_mkey(struct mlx5_ib_dev *dev, - struct mlx5_core_mkey *mmkey) + struct mlx5_ib_mkey *mmkey) { refcount_set(&mmkey->usecount, 1); @@ -1588,14 +1601,14 @@ static inline int mlx5r_store_odp_mkey(struct mlx5_ib_dev *dev, } /* deref an mkey that can participate in ODP flow */ -static inline void mlx5r_deref_odp_mkey(struct mlx5_core_mkey *mmkey) +static inline void mlx5r_deref_odp_mkey(struct mlx5_ib_mkey *mmkey) { if (refcount_dec_and_test(&mmkey->usecount)) wake_up(&mmkey->wait); } /* deref an mkey that can participate in ODP flow and wait for relese */ -static inline void mlx5r_deref_wait_odp_mkey(struct mlx5_core_mkey *mmkey) +static inline void mlx5r_deref_wait_odp_mkey(struct mlx5_ib_mkey *mmkey) { mlx5r_deref_odp_mkey(mmkey); wait_event(mmkey->wait, refcount_read(&mmkey->usecount) == 0); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index e2f020472ae2..5afc3e965c28 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -88,9 +88,8 @@ static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, MLX5_SET64(mkc, mkc, start_addr, start_addr); } -static void -assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, - u32 *in) +static void assign_mkey_variant(struct mlx5_ib_dev *dev, + struct mlx5_ib_mkey *mkey, u32 *in) { u8 key = atomic_inc_return(&dev->mkey_var); void *mkc; @@ -100,9 +99,8 @@ assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, mkey->key = key; } -static int -mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, - u32 *in, int inlen) +static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, + struct mlx5_ib_mkey *mkey, u32 *in, int inlen) { int ret; @@ -116,7 +114,7 @@ mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, static int mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev, - struct mlx5_core_mkey *mkey, + struct mlx5_ib_mkey *mkey, struct mlx5_async_ctx *async_ctx, u32 *in, int inlen, u32 *out, int outlen, struct mlx5_async_work *context) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index a654367af056..7ab0a9b752f6 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -788,7 +788,7 @@ struct pf_frame { int depth; }; -static bool mkey_is_eq(struct mlx5_core_mkey *mmkey, u32 key) +static bool mkey_is_eq(struct mlx5_ib_mkey *mmkey, u32 key) { if (!mmkey) return false; @@ -797,7 +797,7 @@ static bool mkey_is_eq(struct mlx5_core_mkey *mmkey, u32 key) return mmkey->key == key; } -static int get_indirect_num_descs(struct mlx5_core_mkey *mmkey) +static int get_indirect_num_descs(struct mlx5_ib_mkey *mmkey) { struct mlx5_ib_mw *mw; struct mlx5_ib_devx_mr *devx_mr; @@ -831,7 +831,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, { int npages = 0, ret, i, outlen, cur_outlen = 0, depth = 0; struct pf_frame *head = NULL, *frame; - struct mlx5_core_mkey *mmkey; + struct mlx5_ib_mkey *mmkey; struct mlx5_ib_mr *mr; struct mlx5_klm *pklm; u32 *out = NULL; @@ -1703,8 +1703,8 @@ get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 lkey) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_core_mkey *mmkey; struct mlx5_ib_mr *mr = NULL; + struct mlx5_ib_mkey *mmkey; xa_lock(&dev->odp_mkeys); mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey)); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index bd99f713720b..70b6aff4940a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -357,19 +357,6 @@ struct mlx5_core_sig_ctx { u32 sigerr_count; }; -enum { - MLX5_MKEY_MR = 1, - MLX5_MKEY_MW, - MLX5_MKEY_INDIRECT_DEVX, -}; - -struct mlx5_core_mkey { - u32 key; - u32 type; - struct wait_queue_head wait; - refcount_t usecount; -}; - #define MLX5_24BIT_MASK ((1 << 24) - 1) enum mlx5_res_type { -- cgit v1.2.3-71-gd317