From d80976d9ffd9d7f89a26134a299b236910477f3b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 30 Nov 2021 16:27:55 +0100 Subject: dma-resv: some doc polish for iterators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hammer it a bit more in that iterators can be restarted and when that matters, plus suggest to prefer the locked version whenver. Also delete the two leftover kerneldoc for static functions plus sprinkle some more links while at it. v2: Keep some comments (Christian) Reviewed-by: Christian König Signed-off-by: Daniel Vetter Cc: Sumit Semwal Cc: "Christian König" Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Link: https://patchwork.freedesktop.org/patch/msgid/20211130152756.1388106-1-daniel.vetter@ffwll.ch --- include/linux/dma-resv.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index a715df97b31a..afdfdfac729f 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -153,6 +153,13 @@ struct dma_resv { * struct dma_resv_iter - current position into the dma_resv fences * * Don't touch this directly in the driver, use the accessor function instead. + * + * IMPORTANT + * + * When using the lockless iterators like dma_resv_iter_next_unlocked() or + * dma_resv_for_each_fence_unlocked() beware that the iterator can be restarted. + * Code which accumulates statistics or similar needs to check for this with + * dma_resv_iter_is_restarted(). */ struct dma_resv_iter { /** @obj: The dma_resv object we iterate over */ @@ -243,7 +250,11 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) * &dma_resv.lock and using RCU instead. The cursor needs to be initialized * with dma_resv_iter_begin() and cleaned up with dma_resv_iter_end(). Inside * the iterator a reference to the dma_fence is held and the RCU lock dropped. - * When the dma_resv is modified the iteration starts over again. + * + * Beware that the iterator can be restarted when the struct dma_resv for + * @cursor is modified. Code which accumulates statistics or similar needs to + * check for this with dma_resv_iter_is_restarted(). For this reason prefer the + * lock iterator dma_resv_for_each_fence() whenever possible. */ #define dma_resv_for_each_fence_unlocked(cursor, fence) \ for (fence = dma_resv_iter_first_unlocked(cursor); \ -- cgit v1.2.3-71-gd317 From ea4692c75e1c63926e4fb0728f5775ef0d733888 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 26 Jan 2022 01:39:41 -0800 Subject: lib/string_helpers: Consolidate string helpers implementation There are a few implementations of string helpers in the tree like yesno() that just returns "yes" or "no" depending on a boolean argument. Those are helpful to output strings to the user or log. In order to consolidate them, prefix all of them str_ prefix to make it clear what they are about and avoid symbol clashes. Taking the commoon `val ? "yes" : "no"` implementation, quite a few users of open coded yesno() could later be converted to the new function: $ git grep '?\s*"yes"\s*' | wc -l 286 $ git grep '?\s*"no"\s*' | wc -l 20 The inlined function should keep the const strings local to each compilation unit, the same way it's now, thus not changing the current behavior. Signed-off-by: Lucas De Marchi Reviewed-by: Andy Shevchenko Acked-by: Jani Nikula Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220126093951.1470898-2-lucas.demarchi@intel.com --- include/linux/string_helpers.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 7a22921c9db7..4d72258d42fd 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -106,4 +106,24 @@ void kfree_strarray(char **array, size_t n); char **devm_kasprintf_strarray(struct device *dev, const char *prefix, size_t n); +static inline const char *str_yes_no(bool v) +{ + return v ? "yes" : "no"; +} + +static inline const char *str_on_off(bool v) +{ + return v ? "on" : "off"; +} + +static inline const char *str_enable_disable(bool v) +{ + return v ? "enable" : "disable"; +} + +static inline const char *str_enabled_disabled(bool v) +{ + return v ? "enabled" : "disabled"; +} + #endif -- cgit v1.2.3-71-gd317 From 976b6d97c62347df3e686f60a5f455bb8ed6ea23 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 19 Jan 2022 11:17:32 +0100 Subject: dma-buf: consolidate dma_fence subclass checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consolidate the wrapper functions to check for dma_fence subclasses in the dma_fence header. This makes it easier to document and also check the different requirements for fence containers in the subclasses. Signed-off-by: Christian König Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20220204100429.2049-2-christian.koenig@amd.com --- include/linux/dma-fence-array.h | 15 +-------------- include/linux/dma-fence-chain.h | 3 +-- include/linux/dma-fence.h | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 303dd712220f..fec374f69e12 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -45,19 +45,6 @@ struct dma_fence_array { struct irq_work work; }; -extern const struct dma_fence_ops dma_fence_array_ops; - -/** - * dma_fence_is_array - check if a fence is from the array subsclass - * @fence: fence to test - * - * Return true if it is a dma_fence_array and false otherwise. - */ -static inline bool dma_fence_is_array(struct dma_fence *fence) -{ - return fence->ops == &dma_fence_array_ops; -} - /** * to_dma_fence_array - cast a fence to a dma_fence_array * @fence: fence to cast to a dma_fence_array @@ -68,7 +55,7 @@ static inline bool dma_fence_is_array(struct dma_fence *fence) static inline struct dma_fence_array * to_dma_fence_array(struct dma_fence *fence) { - if (fence->ops != &dma_fence_array_ops) + if (!fence || !dma_fence_is_array(fence)) return NULL; return container_of(fence, struct dma_fence_array, base); diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h index 54fe3443fd2c..ee906b659694 100644 --- a/include/linux/dma-fence-chain.h +++ b/include/linux/dma-fence-chain.h @@ -49,7 +49,6 @@ struct dma_fence_chain { spinlock_t lock; }; -extern const struct dma_fence_ops dma_fence_chain_ops; /** * to_dma_fence_chain - cast a fence to a dma_fence_chain @@ -61,7 +60,7 @@ extern const struct dma_fence_ops dma_fence_chain_ops; static inline struct dma_fence_chain * to_dma_fence_chain(struct dma_fence *fence) { - if (!fence || fence->ops != &dma_fence_chain_ops) + if (!fence || !dma_fence_is_chain(fence)) return NULL; return container_of(fence, struct dma_fence_chain, base); diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 1ea691753bd3..775cdc0b4f24 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -587,4 +587,42 @@ struct dma_fence *dma_fence_get_stub(void); struct dma_fence *dma_fence_allocate_private_stub(void); u64 dma_fence_context_alloc(unsigned num); +extern const struct dma_fence_ops dma_fence_array_ops; +extern const struct dma_fence_ops dma_fence_chain_ops; + +/** + * dma_fence_is_array - check if a fence is from the array subclass + * @fence: the fence to test + * + * Return true if it is a dma_fence_array and false otherwise. + */ +static inline bool dma_fence_is_array(struct dma_fence *fence) +{ + return fence->ops == &dma_fence_array_ops; +} + +/** + * dma_fence_is_chain - check if a fence is from the chain subclass + * @fence: the fence to test + * + * Return true if it is a dma_fence_chain and false otherwise. + */ +static inline bool dma_fence_is_chain(struct dma_fence *fence) +{ + return fence->ops == &dma_fence_chain_ops; +} + +/** + * dma_fence_is_container - check if a fence is a container for other fences + * @fence: the fence to test + * + * Return true if this fence is a container for other fences, false otherwise. + * This is important since we can't build up large fence structure or otherwise + * we run into recursion during operation on those fences. + */ +static inline bool dma_fence_is_container(struct dma_fence *fence) +{ + return dma_fence_is_array(fence) || dma_fence_is_chain(fence); +} + #endif /* __LINUX_DMA_FENCE_H */ -- cgit v1.2.3-71-gd317 From 18f5fad275efef015226ee4f90eae34d8f44aa5e Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 20 Jan 2022 11:42:40 +0100 Subject: dma-buf: add dma_fence_chain_contained helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's a reoccurring pattern that we need to extract the fence from a dma_fence_chain object. Add a helper for this. Signed-off-by: Christian König Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20220204100429.2049-6-christian.koenig@amd.com --- drivers/dma-buf/dma-fence-chain.c | 6 ++---- include/linux/dma-fence-chain.h | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c index 084c6927b735..06f8ef97c6e8 100644 --- a/drivers/dma-buf/dma-fence-chain.c +++ b/drivers/dma-buf/dma-fence-chain.c @@ -148,8 +148,7 @@ static bool dma_fence_chain_enable_signaling(struct dma_fence *fence) dma_fence_get(&head->base); dma_fence_chain_for_each(fence, &head->base) { - struct dma_fence_chain *chain = to_dma_fence_chain(fence); - struct dma_fence *f = chain ? chain->fence : fence; + struct dma_fence *f = dma_fence_chain_contained(fence); dma_fence_get(f); if (!dma_fence_add_callback(f, &head->cb, dma_fence_chain_cb)) { @@ -165,8 +164,7 @@ static bool dma_fence_chain_enable_signaling(struct dma_fence *fence) static bool dma_fence_chain_signaled(struct dma_fence *fence) { dma_fence_chain_for_each(fence, fence) { - struct dma_fence_chain *chain = to_dma_fence_chain(fence); - struct dma_fence *f = chain ? chain->fence : fence; + struct dma_fence *f = dma_fence_chain_contained(fence); if (!dma_fence_is_signaled(f)) { dma_fence_put(fence); diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h index ee906b659694..10d51bcdf7b7 100644 --- a/include/linux/dma-fence-chain.h +++ b/include/linux/dma-fence-chain.h @@ -66,6 +66,21 @@ to_dma_fence_chain(struct dma_fence *fence) return container_of(fence, struct dma_fence_chain, base); } +/** + * dma_fence_chain_contained - return the contained fence + * @fence: the fence to test + * + * If the fence is a dma_fence_chain the function returns the fence contained + * inside the chain object, otherwise it returns the fence itself. + */ +static inline struct dma_fence * +dma_fence_chain_contained(struct dma_fence *fence) +{ + struct dma_fence_chain *chain = to_dma_fence_chain(fence); + + return chain ? chain->fence : fence; +} + /** * dma_fence_chain_alloc * -- cgit v1.2.3-71-gd317 From 8c30e2d81bfddc5ab9f6b04db1c0f7d6ca7bdf46 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 11 Feb 2022 10:46:40 +0100 Subject: fbdev: Don't sort deferred-I/O pages by default Fbdev's deferred I/O sorts all dirty pages by default, which incurs a significant overhead. Make the sorting step optional and update the few drivers that require it. Use a FIFO list by default. Most fbdev drivers with deferred I/O build a bounding rectangle around the dirty pages or simply flush the whole screen. The only two affected DRM drivers, generic fbdev and vmwgfx, both use a bounding rectangle. In those cases, the exact order of the pages doesn't matter. The other drivers look at the page index or handle pages one-by-one. The patch sets the sort_pagelist flag for those, even though some of them would probably work correctly without sorting. Driver maintainers should update their driver accordingly. Sorting pages by memory offset for deferred I/O performs an implicit bubble-sort step on the list of dirty pages. The algorithm goes through the list of dirty pages and inserts each new page according to its index field. Even worse, list traversal always starts at the first entry. As video memory is most likely updated scanline by scanline, the algorithm traverses through the complete list for each updated page. For example, with 1024x768x32bpp each page covers exactly one scanline. Writing a single screen update from top to bottom requires updating 768 pages. With an average list length of 384 entries, a screen update creates (768 * 384 =) 294912 compare operation. Fix this by making the sorting step opt-in and update the few drivers that require it. All other drivers work with unsorted page lists. Pages are appended to the list. Therefore, in the common case of writing the framebuffer top to bottom, pages are still sorted by offset, which may have a positive effect on performance. Playing a video [1] in mplayer's benchmark mode shows the difference (i7-4790, FullHD, simpledrm, kernel with debugging). mplayer -benchmark -nosound -vo fbdev ./big_buck_bunny_720p_stereo.ogg With sorted page lists: BENCHMARKs: VC: 32.960s VO: 73.068s A: 0.000s Sys: 2.413s = 108.441s BENCHMARK%: VC: 30.3947% VO: 67.3802% A: 0.0000% Sys: 2.2251% = 100.0000% With unsorted page lists: BENCHMARKs: VC: 31.005s VO: 42.889s A: 0.000s Sys: 2.256s = 76.150s BENCHMARK%: VC: 40.7156% VO: 56.3219% A: 0.0000% Sys: 2.9625% = 100.0000% VC shows the overhead of video decoding, VO shows the overhead of the video output. Using unsorted page lists reduces the benchmark's run time by ~32s/~25%. v2: * Make sorted pagelists the special case (Sam) * Comment on drivers' use of pagelist (Sam) * Warn about the overhead in comment Signed-off-by: Thomas Zimmermann Acked-by: Sam Ravnborg Acked-by: Andy Shevchenko Link: https://download.blender.org/peach/bigbuckbunny_movies/big_buck_bunny_720p_stereo.ogg # [1] Link: https://patchwork.freedesktop.org/patch/msgid/20220211094640.21632-3-tzimmermann@suse.de --- drivers/staging/fbtft/fbtft-core.c | 1 + drivers/video/fbdev/broadsheetfb.c | 1 + drivers/video/fbdev/core/fb_defio.c | 24 +++++++++++++++++------- drivers/video/fbdev/metronomefb.c | 1 + drivers/video/fbdev/udlfb.c | 1 + include/linux/fb.h | 1 + 6 files changed, 22 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/staging/fbtft/fbtft-core.c b/drivers/staging/fbtft/fbtft-core.c index f2684d2d6851..4a35347b3020 100644 --- a/drivers/staging/fbtft/fbtft-core.c +++ b/drivers/staging/fbtft/fbtft-core.c @@ -654,6 +654,7 @@ struct fb_info *fbtft_framebuffer_alloc(struct fbtft_display *display, fbops->fb_blank = fbtft_fb_blank; fbdefio->delay = HZ / fps; + fbdefio->sort_pagelist = true; fbdefio->deferred_io = fbtft_deferred_io; fb_deferred_io_init(info); diff --git a/drivers/video/fbdev/broadsheetfb.c b/drivers/video/fbdev/broadsheetfb.c index fd66f4d4a621..b9054f658838 100644 --- a/drivers/video/fbdev/broadsheetfb.c +++ b/drivers/video/fbdev/broadsheetfb.c @@ -1059,6 +1059,7 @@ static const struct fb_ops broadsheetfb_ops = { static struct fb_deferred_io broadsheetfb_defio = { .delay = HZ/4, + .sort_pagelist = true, .deferred_io = broadsheetfb_dpy_deferred_io, }; diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c index 169a81c8172b..98b0f23bf5e2 100644 --- a/drivers/video/fbdev/core/fb_defio.c +++ b/drivers/video/fbdev/core/fb_defio.c @@ -96,7 +96,7 @@ static vm_fault_t fb_deferred_io_mkwrite(struct vm_fault *vmf) struct page *page = vmf->page; struct fb_info *info = vmf->vma->vm_private_data; struct fb_deferred_io *fbdefio = info->fbdefio; - struct page *cur; + struct list_head *pos = &fbdefio->pagelist; /* this is a callback we get when userspace first tries to write to the page. we schedule a workqueue. that workqueue @@ -137,14 +137,24 @@ static vm_fault_t fb_deferred_io_mkwrite(struct vm_fault *vmf) if (!list_empty(&page->lru)) goto page_already_added; - /* we loop through the pagelist before adding in order - to keep the pagelist sorted */ - list_for_each_entry(cur, &fbdefio->pagelist, lru) { - if (cur->index > page->index) - break; + if (unlikely(fbdefio->sort_pagelist)) { + /* + * We loop through the pagelist before adding in order to + * keep the pagelist sorted. This has significant overhead + * of O(n^2) with n being the number of written pages. If + * possible, drivers should try to work with unsorted page + * lists instead. + */ + struct page *cur; + + list_for_each_entry(cur, &fbdefio->pagelist, lru) { + if (cur->index > page->index) + break; + } + pos = &cur->lru; } - list_add_tail(&page->lru, &cur->lru); + list_add_tail(&page->lru, pos); page_already_added: mutex_unlock(&fbdefio->lock); diff --git a/drivers/video/fbdev/metronomefb.c b/drivers/video/fbdev/metronomefb.c index 952826557a0c..af858dd23ea6 100644 --- a/drivers/video/fbdev/metronomefb.c +++ b/drivers/video/fbdev/metronomefb.c @@ -568,6 +568,7 @@ static const struct fb_ops metronomefb_ops = { static struct fb_deferred_io metronomefb_defio = { .delay = HZ, + .sort_pagelist = true, .deferred_io = metronomefb_dpy_deferred_io, }; diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c index b9cdd02c1000..184bb8433b78 100644 --- a/drivers/video/fbdev/udlfb.c +++ b/drivers/video/fbdev/udlfb.c @@ -980,6 +980,7 @@ static int dlfb_ops_open(struct fb_info *info, int user) if (fbdefio) { fbdefio->delay = DL_DEFIO_WRITE_DELAY; + fbdefio->sort_pagelist = true; fbdefio->deferred_io = dlfb_dpy_deferred_io; } diff --git a/include/linux/fb.h b/include/linux/fb.h index 9a14f3f8a329..39baa9a70779 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -204,6 +204,7 @@ struct fb_pixmap { struct fb_deferred_io { /* delay between mkwrite and deferred handler */ unsigned long delay; + bool sort_pagelist; /* sort pagelist by offset */ struct mutex lock; /* mutex that protects the page list */ struct list_head pagelist; /* list of touched pages */ /* callback */ -- cgit v1.2.3-71-gd317