memset-sh4.S (1668B)
1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * "memset" implementation for SH4 4 * 5 * Copyright (C) 1999 Niibe Yutaka 6 * Copyright (c) 2009 STMicroelectronics Limited 7 * Author: Stuart Menefy <stuart.menefy:st.com> 8 */ 9 10/* 11 * void *memset(void *s, int c, size_t n); 12 */ 13 14#include <linux/linkage.h> 15 16ENTRY(memset) 17 mov #12,r0 18 add r6,r4 19 cmp/gt r6,r0 20 bt/s 40f ! if it's too small, set a byte at once 21 mov r4,r0 22 and #3,r0 23 cmp/eq #0,r0 24 bt/s 2f ! It's aligned 25 sub r0,r6 261: 27 dt r0 28 bf/s 1b 29 mov.b r5,@-r4 302: ! make VVVV 31 extu.b r5,r5 32 swap.b r5,r0 ! V0 33 or r0,r5 ! VV 34 swap.w r5,r0 ! VV00 35 or r0,r5 ! VVVV 36 37 ! Check if enough bytes need to be copied to be worth the big loop 38 mov #0x40, r0 ! (MT) 39 cmp/gt r6,r0 ! (MT) 64 > len => slow loop 40 41 bt/s 22f 42 mov r6,r0 43 44 ! align the dst to the cache block size if necessary 45 mov r4, r3 46 mov #~(0x1f), r1 47 48 and r3, r1 49 cmp/eq r3, r1 50 51 bt/s 11f ! dst is already aligned 52 sub r1, r3 ! r3-r1 -> r3 53 shlr2 r3 ! number of loops 54 5510: mov.l r5,@-r4 56 dt r3 57 bf/s 10b 58 add #-4, r6 59 6011: ! dst is 32byte aligned 61 mov r6,r2 62 mov #-5,r0 63 shld r0,r2 ! number of loops 64 65 add #-32, r4 66 mov r5, r0 6712: 68 movca.l r0,@r4 69 mov.l r5,@(4, r4) 70 mov.l r5,@(8, r4) 71 mov.l r5,@(12,r4) 72 mov.l r5,@(16,r4) 73 mov.l r5,@(20,r4) 74 add #-0x20, r6 75 mov.l r5,@(24,r4) 76 dt r2 77 mov.l r5,@(28,r4) 78 bf/s 12b 79 add #-32, r4 80 81 add #32, r4 82 mov #8, r0 83 cmp/ge r0, r6 84 bf 40f 85 86 mov r6,r0 8722: 88 shlr2 r0 89 shlr r0 ! r0 = r6 >> 3 903: 91 dt r0 92 mov.l r5,@-r4 ! set 8-byte at once 93 bf/s 3b 94 mov.l r5,@-r4 95 ! 96 mov #7,r0 97 and r0,r6 98 99 ! fill bytes (length may be zero) 10040: tst r6,r6 101 bt 5f 1024: 103 dt r6 104 bf/s 4b 105 mov.b r5,@-r4 1065: 107 rts 108 mov r4,r0