md5-asm.S (7412B)
1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * Fast MD5 implementation for PPC 4 * 5 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> 6 */ 7#include <asm/ppc_asm.h> 8#include <asm/asm-offsets.h> 9#include <asm/asm-compat.h> 10 11#define rHP r3 12#define rWP r4 13 14#define rH0 r0 15#define rH1 r6 16#define rH2 r7 17#define rH3 r5 18 19#define rW00 r8 20#define rW01 r9 21#define rW02 r10 22#define rW03 r11 23#define rW04 r12 24#define rW05 r14 25#define rW06 r15 26#define rW07 r16 27#define rW08 r17 28#define rW09 r18 29#define rW10 r19 30#define rW11 r20 31#define rW12 r21 32#define rW13 r22 33#define rW14 r23 34#define rW15 r24 35 36#define rT0 r25 37#define rT1 r26 38 39#define INITIALIZE \ 40 PPC_STLU r1,-INT_FRAME_SIZE(r1); \ 41 SAVE_GPRS(14, 26, r1) /* push registers onto stack */ 42 43#define FINALIZE \ 44 REST_GPRS(14, 26, r1); /* pop registers from stack */ \ 45 addi r1,r1,INT_FRAME_SIZE 46 47#ifdef __BIG_ENDIAN__ 48#define LOAD_DATA(reg, off) \ 49 lwbrx reg,0,rWP; /* load data */ 50#define INC_PTR \ 51 addi rWP,rWP,4; /* increment per word */ 52#define NEXT_BLOCK /* nothing to do */ 53#else 54#define LOAD_DATA(reg, off) \ 55 lwz reg,off(rWP); /* load data */ 56#define INC_PTR /* nothing to do */ 57#define NEXT_BLOCK \ 58 addi rWP,rWP,64; /* increment per block */ 59#endif 60 61#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \ 62 LOAD_DATA(w0, off) /* W */ \ 63 and rT0,b,c; /* 1: f = b and c */ \ 64 INC_PTR /* ptr++ */ \ 65 andc rT1,d,b; /* 1: f' = ~b and d */ \ 66 LOAD_DATA(w1, off+4) /* W */ \ 67 or rT0,rT0,rT1; /* 1: f = f or f' */ \ 68 addi w0,w0,k0l; /* 1: wk = w + k */ \ 69 add a,a,rT0; /* 1: a = a + f */ \ 70 addis w0,w0,k0h; /* 1: wk = w + k' */ \ 71 addis w1,w1,k1h; /* 2: wk = w + k */ \ 72 add a,a,w0; /* 1: a = a + wk */ \ 73 addi w1,w1,k1l; /* 2: wk = w + k' */ \ 74 rotrwi a,a,p; /* 1: a = a rotl x */ \ 75 add d,d,w1; /* 2: a = a + wk */ \ 76 add a,a,b; /* 1: a = a + b */ \ 77 and rT0,a,b; /* 2: f = b and c */ \ 78 andc rT1,c,a; /* 2: f' = ~b and d */ \ 79 or rT0,rT0,rT1; /* 2: f = f or f' */ \ 80 add d,d,rT0; /* 2: a = a + f */ \ 81 INC_PTR /* ptr++ */ \ 82 rotrwi d,d,q; /* 2: a = a rotl x */ \ 83 add d,d,a; /* 2: a = a + b */ 84 85#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ 86 andc rT0,c,d; /* 1: f = c and ~d */ \ 87 and rT1,b,d; /* 1: f' = b and d */ \ 88 addi w0,w0,k0l; /* 1: wk = w + k */ \ 89 or rT0,rT0,rT1; /* 1: f = f or f' */ \ 90 addis w0,w0,k0h; /* 1: wk = w + k' */ \ 91 add a,a,rT0; /* 1: a = a + f */ \ 92 addi w1,w1,k1l; /* 2: wk = w + k */ \ 93 add a,a,w0; /* 1: a = a + wk */ \ 94 addis w1,w1,k1h; /* 2: wk = w + k' */ \ 95 andc rT0,b,c; /* 2: f = c and ~d */ \ 96 rotrwi a,a,p; /* 1: a = a rotl x */ \ 97 add a,a,b; /* 1: a = a + b */ \ 98 add d,d,w1; /* 2: a = a + wk */ \ 99 and rT1,a,c; /* 2: f' = b and d */ \ 100 or rT0,rT0,rT1; /* 2: f = f or f' */ \ 101 add d,d,rT0; /* 2: a = a + f */ \ 102 rotrwi d,d,q; /* 2: a = a rotl x */ \ 103 add d,d,a; /* 2: a = a +b */ 104 105#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ 106 xor rT0,b,c; /* 1: f' = b xor c */ \ 107 addi w0,w0,k0l; /* 1: wk = w + k */ \ 108 xor rT1,rT0,d; /* 1: f = f xor f' */ \ 109 addis w0,w0,k0h; /* 1: wk = w + k' */ \ 110 add a,a,rT1; /* 1: a = a + f */ \ 111 addi w1,w1,k1l; /* 2: wk = w + k */ \ 112 add a,a,w0; /* 1: a = a + wk */ \ 113 addis w1,w1,k1h; /* 2: wk = w + k' */ \ 114 rotrwi a,a,p; /* 1: a = a rotl x */ \ 115 add d,d,w1; /* 2: a = a + wk */ \ 116 add a,a,b; /* 1: a = a + b */ \ 117 xor rT1,rT0,a; /* 2: f = b xor f' */ \ 118 add d,d,rT1; /* 2: a = a + f */ \ 119 rotrwi d,d,q; /* 2: a = a rotl x */ \ 120 add d,d,a; /* 2: a = a + b */ 121 122#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ 123 addi w0,w0,k0l; /* 1: w = w + k */ \ 124 orc rT0,b,d; /* 1: f = b or ~d */ \ 125 addis w0,w0,k0h; /* 1: w = w + k' */ \ 126 xor rT0,rT0,c; /* 1: f = f xor c */ \ 127 add a,a,w0; /* 1: a = a + wk */ \ 128 addi w1,w1,k1l; /* 2: w = w + k */ \ 129 add a,a,rT0; /* 1: a = a + f */ \ 130 addis w1,w1,k1h; /* 2: w = w + k' */ \ 131 rotrwi a,a,p; /* 1: a = a rotl x */ \ 132 add a,a,b; /* 1: a = a + b */ \ 133 orc rT0,a,c; /* 2: f = b or ~d */ \ 134 add d,d,w1; /* 2: a = a + wk */ \ 135 xor rT0,rT0,b; /* 2: f = f xor c */ \ 136 add d,d,rT0; /* 2: a = a + f */ \ 137 rotrwi d,d,q; /* 2: a = a rotl x */ \ 138 add d,d,a; /* 2: a = a + b */ 139 140_GLOBAL(ppc_md5_transform) 141 INITIALIZE 142 143 mtctr r5 144 lwz rH0,0(rHP) 145 lwz rH1,4(rHP) 146 lwz rH2,8(rHP) 147 lwz rH3,12(rHP) 148 149ppc_md5_main: 150 R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0, 151 0xd76b, -23432, 0xe8c8, -18602) 152 R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8, 153 0x2420, 0x70db, 0xc1be, -12562) 154 R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16, 155 0xf57c, 0x0faf, 0x4788, -14806) 156 R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24, 157 0xa830, 0x4613, 0xfd47, -27391) 158 R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32, 159 0x6981, -26408, 0x8b45, -2129) 160 R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40, 161 0xffff, 0x5bb1, 0x895d, -10306) 162 R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48, 163 0x6b90, 0x1122, 0xfd98, 0x7193) 164 R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56, 165 0xa679, 0x438e, 0x49b4, 0x0821) 166 167 R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23, 168 0x0d56, 0x6e0c, 0x1810, 0x6d2d) 169 R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12, 170 0x9d02, -32109, 0x124c, 0x2332) 171 R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23, 172 0x8ea7, 0x4a33, 0x0245, -18270) 173 R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12, 174 0x8eee, -8608, 0xf258, -5095) 175 R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23, 176 0x969d, -10697, 0x1cbe, -15288) 177 R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12, 178 0x3317, 0x3e99, 0xdbd9, 0x7c15) 179 R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23, 180 0xac4b, 0x7772, 0xd8cf, 0x331d) 181 R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12, 182 0x6a28, 0x6dd8, 0x219a, 0x3b68) 183 184 R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21, 185 0x29cb, 0x28e5, 0x4218, -7788) 186 R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16, 9, 187 0x473f, 0x06d1, 0x3aae, 0x3036) 188 R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21, 189 0xaea1, -15134, 0x640b, -11295) 190 R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16, 9, 191 0x8f4c, 0x4887, 0xbc7c, -22499) 192 R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21, 193 0x7eb8, -27199, 0x00ea, 0x6050) 194 R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16, 9, 195 0xe01a, 0x22fe, 0x4447, 0x69c5) 196 R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21, 197 0xb7f3, 0x0253, 0x59b1, 0x4d5b) 198 R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16, 9, 199 0x4701, -27017, 0xc7bd, -19859) 200 201 R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22, 202 0x0988, -1462, 0x4c70, -19401) 203 R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11, 204 0xadaf, -5221, 0xfc99, 0x66f7) 205 R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22, 206 0x7e80, -16418, 0xba1e, -25587) 207 R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11, 208 0x4130, 0x380d, 0xe0c5, 0x738d) 209 lwz rW00,0(rHP) 210 R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22, 211 0xe837, -30770, 0xde8a, 0x69e8) 212 lwz rW14,4(rHP) 213 R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11, 214 0x9e79, 0x260f, 0x256d, -27941) 215 lwz rW12,8(rHP) 216 R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22, 217 0xab75, -20775, 0x4f9e, -28397) 218 lwz rW10,12(rHP) 219 R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11, 220 0x662b, 0x7c56, 0x11b2, 0x0358) 221 222 add rH0,rH0,rW00 223 stw rH0,0(rHP) 224 add rH1,rH1,rW14 225 stw rH1,4(rHP) 226 add rH2,rH2,rW12 227 stw rH2,8(rHP) 228 add rH3,rH3,rW10 229 stw rH3,12(rHP) 230 NEXT_BLOCK 231 232 bdnz ppc_md5_main 233 234 FINALIZE 235 blr