mpih-div.c (13559B)
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* mpihelp-div.c - MPI helper functions 3 * Copyright (C) 1994, 1996 Free Software Foundation, Inc. 4 * Copyright (C) 1998, 1999 Free Software Foundation, Inc. 5 * 6 * This file is part of GnuPG. 7 * 8 * Note: This code is heavily based on the GNU MP Library. 9 * Actually it's the same code with only minor changes in the 10 * way the data is stored; this is to support the abstraction 11 * of an optional secure memory allocation which may be used 12 * to avoid revealing of sensitive data due to paging etc. 13 * The GNU MP Library itself is published under the LGPL; 14 * however I decided to publish this code under the plain GPL. 15 */ 16 17#include "mpi-internal.h" 18#include "longlong.h" 19 20#ifndef UMUL_TIME 21#define UMUL_TIME 1 22#endif 23#ifndef UDIV_TIME 24#define UDIV_TIME UMUL_TIME 25#endif 26 27 28mpi_limb_t 29mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, 30 mpi_limb_t divisor_limb) 31{ 32 mpi_size_t i; 33 mpi_limb_t n1, n0, r; 34 mpi_limb_t dummy __maybe_unused; 35 36 /* Botch: Should this be handled at all? Rely on callers? */ 37 if (!dividend_size) 38 return 0; 39 40 /* If multiplication is much faster than division, and the 41 * dividend is large, pre-invert the divisor, and use 42 * only multiplications in the inner loop. 43 * 44 * This test should be read: 45 * Does it ever help to use udiv_qrnnd_preinv? 46 * && Does what we save compensate for the inversion overhead? 47 */ 48 if (UDIV_TIME > (2 * UMUL_TIME + 6) 49 && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) { 50 int normalization_steps; 51 52 normalization_steps = count_leading_zeros(divisor_limb); 53 if (normalization_steps) { 54 mpi_limb_t divisor_limb_inverted; 55 56 divisor_limb <<= normalization_steps; 57 58 /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The 59 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the 60 * most significant bit (with weight 2**N) implicit. 61 * 62 * Special case for DIVISOR_LIMB == 100...000. 63 */ 64 if (!(divisor_limb << 1)) 65 divisor_limb_inverted = ~(mpi_limb_t)0; 66 else 67 udiv_qrnnd(divisor_limb_inverted, dummy, 68 -divisor_limb, 0, divisor_limb); 69 70 n1 = dividend_ptr[dividend_size - 1]; 71 r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); 72 73 /* Possible optimization: 74 * if (r == 0 75 * && divisor_limb > ((n1 << normalization_steps) 76 * | (dividend_ptr[dividend_size - 2] >> ...))) 77 * ...one division less... 78 */ 79 for (i = dividend_size - 2; i >= 0; i--) { 80 n0 = dividend_ptr[i]; 81 UDIV_QRNND_PREINV(dummy, r, r, 82 ((n1 << normalization_steps) 83 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))), 84 divisor_limb, divisor_limb_inverted); 85 n1 = n0; 86 } 87 UDIV_QRNND_PREINV(dummy, r, r, 88 n1 << normalization_steps, 89 divisor_limb, divisor_limb_inverted); 90 return r >> normalization_steps; 91 } else { 92 mpi_limb_t divisor_limb_inverted; 93 94 /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The 95 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the 96 * most significant bit (with weight 2**N) implicit. 97 * 98 * Special case for DIVISOR_LIMB == 100...000. 99 */ 100 if (!(divisor_limb << 1)) 101 divisor_limb_inverted = ~(mpi_limb_t)0; 102 else 103 udiv_qrnnd(divisor_limb_inverted, dummy, 104 -divisor_limb, 0, divisor_limb); 105 106 i = dividend_size - 1; 107 r = dividend_ptr[i]; 108 109 if (r >= divisor_limb) 110 r = 0; 111 else 112 i--; 113 114 for ( ; i >= 0; i--) { 115 n0 = dividend_ptr[i]; 116 UDIV_QRNND_PREINV(dummy, r, r, 117 n0, divisor_limb, divisor_limb_inverted); 118 } 119 return r; 120 } 121 } else { 122 if (UDIV_NEEDS_NORMALIZATION) { 123 int normalization_steps; 124 125 normalization_steps = count_leading_zeros(divisor_limb); 126 if (normalization_steps) { 127 divisor_limb <<= normalization_steps; 128 129 n1 = dividend_ptr[dividend_size - 1]; 130 r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); 131 132 /* Possible optimization: 133 * if (r == 0 134 * && divisor_limb > ((n1 << normalization_steps) 135 * | (dividend_ptr[dividend_size - 2] >> ...))) 136 * ...one division less... 137 */ 138 for (i = dividend_size - 2; i >= 0; i--) { 139 n0 = dividend_ptr[i]; 140 udiv_qrnnd(dummy, r, r, 141 ((n1 << normalization_steps) 142 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))), 143 divisor_limb); 144 n1 = n0; 145 } 146 udiv_qrnnd(dummy, r, r, 147 n1 << normalization_steps, 148 divisor_limb); 149 return r >> normalization_steps; 150 } 151 } 152 /* No normalization needed, either because udiv_qrnnd doesn't require 153 * it, or because DIVISOR_LIMB is already normalized. 154 */ 155 i = dividend_size - 1; 156 r = dividend_ptr[i]; 157 158 if (r >= divisor_limb) 159 r = 0; 160 else 161 i--; 162 163 for (; i >= 0; i--) { 164 n0 = dividend_ptr[i]; 165 udiv_qrnnd(dummy, r, r, n0, divisor_limb); 166 } 167 return r; 168 } 169} 170 171/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write 172 * the NSIZE-DSIZE least significant quotient limbs at QP 173 * and the DSIZE long remainder at NP. If QEXTRA_LIMBS is 174 * non-zero, generate that many fraction bits and append them after the 175 * other quotient limbs. 176 * Return the most significant limb of the quotient, this is always 0 or 1. 177 * 178 * Preconditions: 179 * 0. NSIZE >= DSIZE. 180 * 1. The most significant bit of the divisor must be set. 181 * 2. QP must either not overlap with the input operands at all, or 182 * QP + DSIZE >= NP must hold true. (This means that it's 183 * possible to put the quotient in the high part of NUM, right after the 184 * remainder in NUM. 185 * 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero. 186 */ 187 188mpi_limb_t 189mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs, 190 mpi_ptr_t np, mpi_size_t nsize, mpi_ptr_t dp, mpi_size_t dsize) 191{ 192 mpi_limb_t most_significant_q_limb = 0; 193 194 switch (dsize) { 195 case 0: 196 /* We are asked to divide by zero, so go ahead and do it! (To make 197 the compiler not remove this statement, return the value.) */ 198 /* 199 * existing clients of this function have been modified 200 * not to call it with dsize == 0, so this should not happen 201 */ 202 return 1 / dsize; 203 204 case 1: 205 { 206 mpi_size_t i; 207 mpi_limb_t n1; 208 mpi_limb_t d; 209 210 d = dp[0]; 211 n1 = np[nsize - 1]; 212 213 if (n1 >= d) { 214 n1 -= d; 215 most_significant_q_limb = 1; 216 } 217 218 qp += qextra_limbs; 219 for (i = nsize - 2; i >= 0; i--) 220 udiv_qrnnd(qp[i], n1, n1, np[i], d); 221 qp -= qextra_limbs; 222 223 for (i = qextra_limbs - 1; i >= 0; i--) 224 udiv_qrnnd(qp[i], n1, n1, 0, d); 225 226 np[0] = n1; 227 } 228 break; 229 230 case 2: 231 { 232 mpi_size_t i; 233 mpi_limb_t n1, n0, n2; 234 mpi_limb_t d1, d0; 235 236 np += nsize - 2; 237 d1 = dp[1]; 238 d0 = dp[0]; 239 n1 = np[1]; 240 n0 = np[0]; 241 242 if (n1 >= d1 && (n1 > d1 || n0 >= d0)) { 243 sub_ddmmss(n1, n0, n1, n0, d1, d0); 244 most_significant_q_limb = 1; 245 } 246 247 for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--) { 248 mpi_limb_t q; 249 mpi_limb_t r; 250 251 if (i >= qextra_limbs) 252 np--; 253 else 254 np[0] = 0; 255 256 if (n1 == d1) { 257 /* Q should be either 111..111 or 111..110. Need special 258 * treatment of this rare case as normal division would 259 * give overflow. */ 260 q = ~(mpi_limb_t) 0; 261 262 r = n0 + d1; 263 if (r < d1) { /* Carry in the addition? */ 264 add_ssaaaa(n1, n0, r - d0, 265 np[0], 0, d0); 266 qp[i] = q; 267 continue; 268 } 269 n1 = d0 - (d0 != 0 ? 1 : 0); 270 n0 = -d0; 271 } else { 272 udiv_qrnnd(q, r, n1, n0, d1); 273 umul_ppmm(n1, n0, d0, q); 274 } 275 276 n2 = np[0]; 277q_test: 278 if (n1 > r || (n1 == r && n0 > n2)) { 279 /* The estimated Q was too large. */ 280 q--; 281 sub_ddmmss(n1, n0, n1, n0, 0, d0); 282 r += d1; 283 if (r >= d1) /* If not carry, test Q again. */ 284 goto q_test; 285 } 286 287 qp[i] = q; 288 sub_ddmmss(n1, n0, r, n2, n1, n0); 289 } 290 np[1] = n1; 291 np[0] = n0; 292 } 293 break; 294 295 default: 296 { 297 mpi_size_t i; 298 mpi_limb_t dX, d1, n0; 299 300 np += nsize - dsize; 301 dX = dp[dsize - 1]; 302 d1 = dp[dsize - 2]; 303 n0 = np[dsize - 1]; 304 305 if (n0 >= dX) { 306 if (n0 > dX 307 || mpihelp_cmp(np, dp, dsize - 1) >= 0) { 308 mpihelp_sub_n(np, np, dp, dsize); 309 n0 = np[dsize - 1]; 310 most_significant_q_limb = 1; 311 } 312 } 313 314 for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) { 315 mpi_limb_t q; 316 mpi_limb_t n1, n2; 317 mpi_limb_t cy_limb; 318 319 if (i >= qextra_limbs) { 320 np--; 321 n2 = np[dsize]; 322 } else { 323 n2 = np[dsize - 1]; 324 MPN_COPY_DECR(np + 1, np, dsize - 1); 325 np[0] = 0; 326 } 327 328 if (n0 == dX) { 329 /* This might over-estimate q, but it's probably not worth 330 * the extra code here to find out. */ 331 q = ~(mpi_limb_t) 0; 332 } else { 333 mpi_limb_t r; 334 335 udiv_qrnnd(q, r, n0, np[dsize - 1], dX); 336 umul_ppmm(n1, n0, d1, q); 337 338 while (n1 > r 339 || (n1 == r 340 && n0 > np[dsize - 2])) { 341 q--; 342 r += dX; 343 if (r < dX) /* I.e. "carry in previous addition?" */ 344 break; 345 n1 -= n0 < d1; 346 n0 -= d1; 347 } 348 } 349 350 /* Possible optimization: We already have (q * n0) and (1 * n1) 351 * after the calculation of q. Taking advantage of that, we 352 * could make this loop make two iterations less. */ 353 cy_limb = mpihelp_submul_1(np, dp, dsize, q); 354 355 if (n2 != cy_limb) { 356 mpihelp_add_n(np, np, dp, dsize); 357 q--; 358 } 359 360 qp[i] = q; 361 n0 = np[dsize - 1]; 362 } 363 } 364 } 365 366 return most_significant_q_limb; 367} 368 369/**************** 370 * Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. 371 * Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR. 372 * Return the single-limb remainder. 373 * There are no constraints on the value of the divisor. 374 * 375 * QUOT_PTR and DIVIDEND_PTR might point to the same limb. 376 */ 377 378mpi_limb_t 379mpihelp_divmod_1(mpi_ptr_t quot_ptr, 380 mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, 381 mpi_limb_t divisor_limb) 382{ 383 mpi_size_t i; 384 mpi_limb_t n1, n0, r; 385 mpi_limb_t dummy __maybe_unused; 386 387 if (!dividend_size) 388 return 0; 389 390 /* If multiplication is much faster than division, and the 391 * dividend is large, pre-invert the divisor, and use 392 * only multiplications in the inner loop. 393 * 394 * This test should be read: 395 * Does it ever help to use udiv_qrnnd_preinv? 396 * && Does what we save compensate for the inversion overhead? 397 */ 398 if (UDIV_TIME > (2 * UMUL_TIME + 6) 399 && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) { 400 int normalization_steps; 401 402 normalization_steps = count_leading_zeros(divisor_limb); 403 if (normalization_steps) { 404 mpi_limb_t divisor_limb_inverted; 405 406 divisor_limb <<= normalization_steps; 407 408 /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The 409 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the 410 * most significant bit (with weight 2**N) implicit. 411 */ 412 /* Special case for DIVISOR_LIMB == 100...000. */ 413 if (!(divisor_limb << 1)) 414 divisor_limb_inverted = ~(mpi_limb_t)0; 415 else 416 udiv_qrnnd(divisor_limb_inverted, dummy, 417 -divisor_limb, 0, divisor_limb); 418 419 n1 = dividend_ptr[dividend_size - 1]; 420 r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); 421 422 /* Possible optimization: 423 * if (r == 0 424 * && divisor_limb > ((n1 << normalization_steps) 425 * | (dividend_ptr[dividend_size - 2] >> ...))) 426 * ...one division less... 427 */ 428 for (i = dividend_size - 2; i >= 0; i--) { 429 n0 = dividend_ptr[i]; 430 UDIV_QRNND_PREINV(quot_ptr[i + 1], r, r, 431 ((n1 << normalization_steps) 432 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))), 433 divisor_limb, divisor_limb_inverted); 434 n1 = n0; 435 } 436 UDIV_QRNND_PREINV(quot_ptr[0], r, r, 437 n1 << normalization_steps, 438 divisor_limb, divisor_limb_inverted); 439 return r >> normalization_steps; 440 } else { 441 mpi_limb_t divisor_limb_inverted; 442 443 /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The 444 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the 445 * most significant bit (with weight 2**N) implicit. 446 */ 447 /* Special case for DIVISOR_LIMB == 100...000. */ 448 if (!(divisor_limb << 1)) 449 divisor_limb_inverted = ~(mpi_limb_t) 0; 450 else 451 udiv_qrnnd(divisor_limb_inverted, dummy, 452 -divisor_limb, 0, divisor_limb); 453 454 i = dividend_size - 1; 455 r = dividend_ptr[i]; 456 457 if (r >= divisor_limb) 458 r = 0; 459 else 460 quot_ptr[i--] = 0; 461 462 for ( ; i >= 0; i--) { 463 n0 = dividend_ptr[i]; 464 UDIV_QRNND_PREINV(quot_ptr[i], r, r, 465 n0, divisor_limb, divisor_limb_inverted); 466 } 467 return r; 468 } 469 } else { 470 if (UDIV_NEEDS_NORMALIZATION) { 471 int normalization_steps; 472 473 normalization_steps = count_leading_zeros(divisor_limb); 474 if (normalization_steps) { 475 divisor_limb <<= normalization_steps; 476 477 n1 = dividend_ptr[dividend_size - 1]; 478 r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); 479 480 /* Possible optimization: 481 * if (r == 0 482 * && divisor_limb > ((n1 << normalization_steps) 483 * | (dividend_ptr[dividend_size - 2] >> ...))) 484 * ...one division less... 485 */ 486 for (i = dividend_size - 2; i >= 0; i--) { 487 n0 = dividend_ptr[i]; 488 udiv_qrnnd(quot_ptr[i + 1], r, r, 489 ((n1 << normalization_steps) 490 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))), 491 divisor_limb); 492 n1 = n0; 493 } 494 udiv_qrnnd(quot_ptr[0], r, r, 495 n1 << normalization_steps, 496 divisor_limb); 497 return r >> normalization_steps; 498 } 499 } 500 /* No normalization needed, either because udiv_qrnnd doesn't require 501 * it, or because DIVISOR_LIMB is already normalized. 502 */ 503 i = dividend_size - 1; 504 r = dividend_ptr[i]; 505 506 if (r >= divisor_limb) 507 r = 0; 508 else 509 quot_ptr[i--] = 0; 510 511 for (; i >= 0; i--) { 512 n0 = dividend_ptr[i]; 513 udiv_qrnnd(quot_ptr[i], r, r, n0, divisor_limb); 514 } 515 return r; 516 } 517}