padlock-sha.c (15046B)
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * Cryptographic API. 4 * 5 * Support for VIA PadLock hardware crypto engine. 6 * 7 * Copyright (c) 2006 Michal Ludvig <michal@logix.cz> 8 */ 9 10#include <crypto/internal/hash.h> 11#include <crypto/padlock.h> 12#include <crypto/sha1.h> 13#include <crypto/sha2.h> 14#include <linux/err.h> 15#include <linux/module.h> 16#include <linux/init.h> 17#include <linux/errno.h> 18#include <linux/interrupt.h> 19#include <linux/kernel.h> 20#include <linux/scatterlist.h> 21#include <asm/cpu_device_id.h> 22#include <asm/fpu/api.h> 23 24struct padlock_sha_desc { 25 struct shash_desc fallback; 26}; 27 28struct padlock_sha_ctx { 29 struct crypto_shash *fallback; 30}; 31 32static int padlock_sha_init(struct shash_desc *desc) 33{ 34 struct padlock_sha_desc *dctx = shash_desc_ctx(desc); 35 struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm); 36 37 dctx->fallback.tfm = ctx->fallback; 38 return crypto_shash_init(&dctx->fallback); 39} 40 41static int padlock_sha_update(struct shash_desc *desc, 42 const u8 *data, unsigned int length) 43{ 44 struct padlock_sha_desc *dctx = shash_desc_ctx(desc); 45 46 return crypto_shash_update(&dctx->fallback, data, length); 47} 48 49static int padlock_sha_export(struct shash_desc *desc, void *out) 50{ 51 struct padlock_sha_desc *dctx = shash_desc_ctx(desc); 52 53 return crypto_shash_export(&dctx->fallback, out); 54} 55 56static int padlock_sha_import(struct shash_desc *desc, const void *in) 57{ 58 struct padlock_sha_desc *dctx = shash_desc_ctx(desc); 59 struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm); 60 61 dctx->fallback.tfm = ctx->fallback; 62 return crypto_shash_import(&dctx->fallback, in); 63} 64 65static inline void padlock_output_block(uint32_t *src, 66 uint32_t *dst, size_t count) 67{ 68 while (count--) 69 *dst++ = swab32(*src++); 70} 71 72static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in, 73 unsigned int count, u8 *out) 74{ 75 /* We can't store directly to *out as it may be unaligned. */ 76 /* BTW Don't reduce the buffer size below 128 Bytes! 77 * PadLock microcode needs it that big. */ 78 char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__ 79 ((aligned(STACK_ALIGN))); 80 char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); 81 struct padlock_sha_desc *dctx = shash_desc_ctx(desc); 82 struct sha1_state state; 83 unsigned int space; 84 unsigned int leftover; 85 int err; 86 87 err = crypto_shash_export(&dctx->fallback, &state); 88 if (err) 89 goto out; 90 91 if (state.count + count > ULONG_MAX) 92 return crypto_shash_finup(&dctx->fallback, in, count, out); 93 94 leftover = ((state.count - 1) & (SHA1_BLOCK_SIZE - 1)) + 1; 95 space = SHA1_BLOCK_SIZE - leftover; 96 if (space) { 97 if (count > space) { 98 err = crypto_shash_update(&dctx->fallback, in, space) ?: 99 crypto_shash_export(&dctx->fallback, &state); 100 if (err) 101 goto out; 102 count -= space; 103 in += space; 104 } else { 105 memcpy(state.buffer + leftover, in, count); 106 in = state.buffer; 107 count += leftover; 108 state.count &= ~(SHA1_BLOCK_SIZE - 1); 109 } 110 } 111 112 memcpy(result, &state.state, SHA1_DIGEST_SIZE); 113 114 asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */ 115 : \ 116 : "c"((unsigned long)state.count + count), \ 117 "a"((unsigned long)state.count), \ 118 "S"(in), "D"(result)); 119 120 padlock_output_block((uint32_t *)result, (uint32_t *)out, 5); 121 122out: 123 return err; 124} 125 126static int padlock_sha1_final(struct shash_desc *desc, u8 *out) 127{ 128 u8 buf[4]; 129 130 return padlock_sha1_finup(desc, buf, 0, out); 131} 132 133static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in, 134 unsigned int count, u8 *out) 135{ 136 /* We can't store directly to *out as it may be unaligned. */ 137 /* BTW Don't reduce the buffer size below 128 Bytes! 138 * PadLock microcode needs it that big. */ 139 char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__ 140 ((aligned(STACK_ALIGN))); 141 char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); 142 struct padlock_sha_desc *dctx = shash_desc_ctx(desc); 143 struct sha256_state state; 144 unsigned int space; 145 unsigned int leftover; 146 int err; 147 148 err = crypto_shash_export(&dctx->fallback, &state); 149 if (err) 150 goto out; 151 152 if (state.count + count > ULONG_MAX) 153 return crypto_shash_finup(&dctx->fallback, in, count, out); 154 155 leftover = ((state.count - 1) & (SHA256_BLOCK_SIZE - 1)) + 1; 156 space = SHA256_BLOCK_SIZE - leftover; 157 if (space) { 158 if (count > space) { 159 err = crypto_shash_update(&dctx->fallback, in, space) ?: 160 crypto_shash_export(&dctx->fallback, &state); 161 if (err) 162 goto out; 163 count -= space; 164 in += space; 165 } else { 166 memcpy(state.buf + leftover, in, count); 167 in = state.buf; 168 count += leftover; 169 state.count &= ~(SHA1_BLOCK_SIZE - 1); 170 } 171 } 172 173 memcpy(result, &state.state, SHA256_DIGEST_SIZE); 174 175 asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */ 176 : \ 177 : "c"((unsigned long)state.count + count), \ 178 "a"((unsigned long)state.count), \ 179 "S"(in), "D"(result)); 180 181 padlock_output_block((uint32_t *)result, (uint32_t *)out, 8); 182 183out: 184 return err; 185} 186 187static int padlock_sha256_final(struct shash_desc *desc, u8 *out) 188{ 189 u8 buf[4]; 190 191 return padlock_sha256_finup(desc, buf, 0, out); 192} 193 194static int padlock_init_tfm(struct crypto_shash *hash) 195{ 196 const char *fallback_driver_name = crypto_shash_alg_name(hash); 197 struct padlock_sha_ctx *ctx = crypto_shash_ctx(hash); 198 struct crypto_shash *fallback_tfm; 199 200 /* Allocate a fallback and abort if it failed. */ 201 fallback_tfm = crypto_alloc_shash(fallback_driver_name, 0, 202 CRYPTO_ALG_NEED_FALLBACK); 203 if (IS_ERR(fallback_tfm)) { 204 printk(KERN_WARNING PFX "Fallback driver '%s' could not be loaded!\n", 205 fallback_driver_name); 206 return PTR_ERR(fallback_tfm); 207 } 208 209 ctx->fallback = fallback_tfm; 210 hash->descsize += crypto_shash_descsize(fallback_tfm); 211 return 0; 212} 213 214static void padlock_exit_tfm(struct crypto_shash *hash) 215{ 216 struct padlock_sha_ctx *ctx = crypto_shash_ctx(hash); 217 218 crypto_free_shash(ctx->fallback); 219} 220 221static struct shash_alg sha1_alg = { 222 .digestsize = SHA1_DIGEST_SIZE, 223 .init = padlock_sha_init, 224 .update = padlock_sha_update, 225 .finup = padlock_sha1_finup, 226 .final = padlock_sha1_final, 227 .export = padlock_sha_export, 228 .import = padlock_sha_import, 229 .init_tfm = padlock_init_tfm, 230 .exit_tfm = padlock_exit_tfm, 231 .descsize = sizeof(struct padlock_sha_desc), 232 .statesize = sizeof(struct sha1_state), 233 .base = { 234 .cra_name = "sha1", 235 .cra_driver_name = "sha1-padlock", 236 .cra_priority = PADLOCK_CRA_PRIORITY, 237 .cra_flags = CRYPTO_ALG_NEED_FALLBACK, 238 .cra_blocksize = SHA1_BLOCK_SIZE, 239 .cra_ctxsize = sizeof(struct padlock_sha_ctx), 240 .cra_module = THIS_MODULE, 241 } 242}; 243 244static struct shash_alg sha256_alg = { 245 .digestsize = SHA256_DIGEST_SIZE, 246 .init = padlock_sha_init, 247 .update = padlock_sha_update, 248 .finup = padlock_sha256_finup, 249 .final = padlock_sha256_final, 250 .export = padlock_sha_export, 251 .import = padlock_sha_import, 252 .init_tfm = padlock_init_tfm, 253 .exit_tfm = padlock_exit_tfm, 254 .descsize = sizeof(struct padlock_sha_desc), 255 .statesize = sizeof(struct sha256_state), 256 .base = { 257 .cra_name = "sha256", 258 .cra_driver_name = "sha256-padlock", 259 .cra_priority = PADLOCK_CRA_PRIORITY, 260 .cra_flags = CRYPTO_ALG_NEED_FALLBACK, 261 .cra_blocksize = SHA256_BLOCK_SIZE, 262 .cra_ctxsize = sizeof(struct padlock_sha_ctx), 263 .cra_module = THIS_MODULE, 264 } 265}; 266 267/* Add two shash_alg instance for hardware-implemented * 268* multiple-parts hash supported by VIA Nano Processor.*/ 269static int padlock_sha1_init_nano(struct shash_desc *desc) 270{ 271 struct sha1_state *sctx = shash_desc_ctx(desc); 272 273 *sctx = (struct sha1_state){ 274 .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, 275 }; 276 277 return 0; 278} 279 280static int padlock_sha1_update_nano(struct shash_desc *desc, 281 const u8 *data, unsigned int len) 282{ 283 struct sha1_state *sctx = shash_desc_ctx(desc); 284 unsigned int partial, done; 285 const u8 *src; 286 /*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/ 287 u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__ 288 ((aligned(STACK_ALIGN))); 289 u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); 290 291 partial = sctx->count & 0x3f; 292 sctx->count += len; 293 done = 0; 294 src = data; 295 memcpy(dst, (u8 *)(sctx->state), SHA1_DIGEST_SIZE); 296 297 if ((partial + len) >= SHA1_BLOCK_SIZE) { 298 299 /* Append the bytes in state's buffer to a block to handle */ 300 if (partial) { 301 done = -partial; 302 memcpy(sctx->buffer + partial, data, 303 done + SHA1_BLOCK_SIZE); 304 src = sctx->buffer; 305 asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" 306 : "+S"(src), "+D"(dst) \ 307 : "a"((long)-1), "c"((unsigned long)1)); 308 done += SHA1_BLOCK_SIZE; 309 src = data + done; 310 } 311 312 /* Process the left bytes from the input data */ 313 if (len - done >= SHA1_BLOCK_SIZE) { 314 asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" 315 : "+S"(src), "+D"(dst) 316 : "a"((long)-1), 317 "c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE))); 318 done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE); 319 src = data + done; 320 } 321 partial = 0; 322 } 323 memcpy((u8 *)(sctx->state), dst, SHA1_DIGEST_SIZE); 324 memcpy(sctx->buffer + partial, src, len - done); 325 326 return 0; 327} 328 329static int padlock_sha1_final_nano(struct shash_desc *desc, u8 *out) 330{ 331 struct sha1_state *state = (struct sha1_state *)shash_desc_ctx(desc); 332 unsigned int partial, padlen; 333 __be64 bits; 334 static const u8 padding[64] = { 0x80, }; 335 336 bits = cpu_to_be64(state->count << 3); 337 338 /* Pad out to 56 mod 64 */ 339 partial = state->count & 0x3f; 340 padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial); 341 padlock_sha1_update_nano(desc, padding, padlen); 342 343 /* Append length field bytes */ 344 padlock_sha1_update_nano(desc, (const u8 *)&bits, sizeof(bits)); 345 346 /* Swap to output */ 347 padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 5); 348 349 return 0; 350} 351 352static int padlock_sha256_init_nano(struct shash_desc *desc) 353{ 354 struct sha256_state *sctx = shash_desc_ctx(desc); 355 356 *sctx = (struct sha256_state){ 357 .state = { SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, \ 358 SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7}, 359 }; 360 361 return 0; 362} 363 364static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *data, 365 unsigned int len) 366{ 367 struct sha256_state *sctx = shash_desc_ctx(desc); 368 unsigned int partial, done; 369 const u8 *src; 370 /*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/ 371 u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__ 372 ((aligned(STACK_ALIGN))); 373 u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); 374 375 partial = sctx->count & 0x3f; 376 sctx->count += len; 377 done = 0; 378 src = data; 379 memcpy(dst, (u8 *)(sctx->state), SHA256_DIGEST_SIZE); 380 381 if ((partial + len) >= SHA256_BLOCK_SIZE) { 382 383 /* Append the bytes in state's buffer to a block to handle */ 384 if (partial) { 385 done = -partial; 386 memcpy(sctx->buf + partial, data, 387 done + SHA256_BLOCK_SIZE); 388 src = sctx->buf; 389 asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" 390 : "+S"(src), "+D"(dst) 391 : "a"((long)-1), "c"((unsigned long)1)); 392 done += SHA256_BLOCK_SIZE; 393 src = data + done; 394 } 395 396 /* Process the left bytes from input data*/ 397 if (len - done >= SHA256_BLOCK_SIZE) { 398 asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" 399 : "+S"(src), "+D"(dst) 400 : "a"((long)-1), 401 "c"((unsigned long)((len - done) / 64))); 402 done += ((len - done) - (len - done) % 64); 403 src = data + done; 404 } 405 partial = 0; 406 } 407 memcpy((u8 *)(sctx->state), dst, SHA256_DIGEST_SIZE); 408 memcpy(sctx->buf + partial, src, len - done); 409 410 return 0; 411} 412 413static int padlock_sha256_final_nano(struct shash_desc *desc, u8 *out) 414{ 415 struct sha256_state *state = 416 (struct sha256_state *)shash_desc_ctx(desc); 417 unsigned int partial, padlen; 418 __be64 bits; 419 static const u8 padding[64] = { 0x80, }; 420 421 bits = cpu_to_be64(state->count << 3); 422 423 /* Pad out to 56 mod 64 */ 424 partial = state->count & 0x3f; 425 padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial); 426 padlock_sha256_update_nano(desc, padding, padlen); 427 428 /* Append length field bytes */ 429 padlock_sha256_update_nano(desc, (const u8 *)&bits, sizeof(bits)); 430 431 /* Swap to output */ 432 padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 8); 433 434 return 0; 435} 436 437static int padlock_sha_export_nano(struct shash_desc *desc, 438 void *out) 439{ 440 int statesize = crypto_shash_statesize(desc->tfm); 441 void *sctx = shash_desc_ctx(desc); 442 443 memcpy(out, sctx, statesize); 444 return 0; 445} 446 447static int padlock_sha_import_nano(struct shash_desc *desc, 448 const void *in) 449{ 450 int statesize = crypto_shash_statesize(desc->tfm); 451 void *sctx = shash_desc_ctx(desc); 452 453 memcpy(sctx, in, statesize); 454 return 0; 455} 456 457static struct shash_alg sha1_alg_nano = { 458 .digestsize = SHA1_DIGEST_SIZE, 459 .init = padlock_sha1_init_nano, 460 .update = padlock_sha1_update_nano, 461 .final = padlock_sha1_final_nano, 462 .export = padlock_sha_export_nano, 463 .import = padlock_sha_import_nano, 464 .descsize = sizeof(struct sha1_state), 465 .statesize = sizeof(struct sha1_state), 466 .base = { 467 .cra_name = "sha1", 468 .cra_driver_name = "sha1-padlock-nano", 469 .cra_priority = PADLOCK_CRA_PRIORITY, 470 .cra_blocksize = SHA1_BLOCK_SIZE, 471 .cra_module = THIS_MODULE, 472 } 473}; 474 475static struct shash_alg sha256_alg_nano = { 476 .digestsize = SHA256_DIGEST_SIZE, 477 .init = padlock_sha256_init_nano, 478 .update = padlock_sha256_update_nano, 479 .final = padlock_sha256_final_nano, 480 .export = padlock_sha_export_nano, 481 .import = padlock_sha_import_nano, 482 .descsize = sizeof(struct sha256_state), 483 .statesize = sizeof(struct sha256_state), 484 .base = { 485 .cra_name = "sha256", 486 .cra_driver_name = "sha256-padlock-nano", 487 .cra_priority = PADLOCK_CRA_PRIORITY, 488 .cra_blocksize = SHA256_BLOCK_SIZE, 489 .cra_module = THIS_MODULE, 490 } 491}; 492 493static const struct x86_cpu_id padlock_sha_ids[] = { 494 X86_MATCH_FEATURE(X86_FEATURE_PHE, NULL), 495 {} 496}; 497MODULE_DEVICE_TABLE(x86cpu, padlock_sha_ids); 498 499static int __init padlock_init(void) 500{ 501 int rc = -ENODEV; 502 struct cpuinfo_x86 *c = &cpu_data(0); 503 struct shash_alg *sha1; 504 struct shash_alg *sha256; 505 506 if (!x86_match_cpu(padlock_sha_ids) || !boot_cpu_has(X86_FEATURE_PHE_EN)) 507 return -ENODEV; 508 509 /* Register the newly added algorithm module if on * 510 * VIA Nano processor, or else just do as before */ 511 if (c->x86_model < 0x0f) { 512 sha1 = &sha1_alg; 513 sha256 = &sha256_alg; 514 } else { 515 sha1 = &sha1_alg_nano; 516 sha256 = &sha256_alg_nano; 517 } 518 519 rc = crypto_register_shash(sha1); 520 if (rc) 521 goto out; 522 523 rc = crypto_register_shash(sha256); 524 if (rc) 525 goto out_unreg1; 526 527 printk(KERN_NOTICE PFX "Using VIA PadLock ACE for SHA1/SHA256 algorithms.\n"); 528 529 return 0; 530 531out_unreg1: 532 crypto_unregister_shash(sha1); 533 534out: 535 printk(KERN_ERR PFX "VIA PadLock SHA1/SHA256 initialization failed.\n"); 536 return rc; 537} 538 539static void __exit padlock_fini(void) 540{ 541 struct cpuinfo_x86 *c = &cpu_data(0); 542 543 if (c->x86_model >= 0x0f) { 544 crypto_unregister_shash(&sha1_alg_nano); 545 crypto_unregister_shash(&sha256_alg_nano); 546 } else { 547 crypto_unregister_shash(&sha1_alg); 548 crypto_unregister_shash(&sha256_alg); 549 } 550} 551 552module_init(padlock_init); 553module_exit(padlock_fini); 554 555MODULE_DESCRIPTION("VIA PadLock SHA1/SHA256 algorithms support."); 556MODULE_LICENSE("GPL"); 557MODULE_AUTHOR("Michal Ludvig"); 558 559MODULE_ALIAS_CRYPTO("sha1-all"); 560MODULE_ALIAS_CRYPTO("sha256-all"); 561MODULE_ALIAS_CRYPTO("sha1-padlock"); 562MODULE_ALIAS_CRYPTO("sha256-padlock");