cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

reciprocal_div.h (3356B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2#ifndef _LINUX_RECIPROCAL_DIV_H
      3#define _LINUX_RECIPROCAL_DIV_H
      4
      5#include <linux/types.h>
      6
      7/*
      8 * This algorithm is based on the paper "Division by Invariant
      9 * Integers Using Multiplication" by Torbjörn Granlund and Peter
     10 * L. Montgomery.
     11 *
     12 * The assembler implementation from Agner Fog, which this code is
     13 * based on, can be found here:
     14 * http://www.agner.org/optimize/asmlib.zip
     15 *
     16 * This optimization for A/B is helpful if the divisor B is mostly
     17 * runtime invariant. The reciprocal of B is calculated in the
     18 * slow-path with reciprocal_value(). The fast-path can then just use
     19 * a much faster multiplication operation with a variable dividend A
     20 * to calculate the division A/B.
     21 */
     22
     23struct reciprocal_value {
     24	u32 m;
     25	u8 sh1, sh2;
     26};
     27
     28/* "reciprocal_value" and "reciprocal_divide" together implement the basic
     29 * version of the algorithm described in Figure 4.1 of the paper.
     30 */
     31struct reciprocal_value reciprocal_value(u32 d);
     32
     33static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R)
     34{
     35	u32 t = (u32)(((u64)a * R.m) >> 32);
     36	return (t + ((a - t) >> R.sh1)) >> R.sh2;
     37}
     38
     39struct reciprocal_value_adv {
     40	u32 m;
     41	u8 sh, exp;
     42	bool is_wide_m;
     43};
     44
     45/* "reciprocal_value_adv" implements the advanced version of the algorithm
     46 * described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose
     47 * ceil(log2(d)) result will be 32 which then requires u128 divide on host. The
     48 * exception case could be easily handled before calling "reciprocal_value_adv".
     49 *
     50 * The advanced version requires more complex calculation to get the reciprocal
     51 * multiplier and other control variables, but then could reduce the required
     52 * emulation operations.
     53 *
     54 * It makes no sense to use this advanced version for host divide emulation,
     55 * those extra complexities for calculating multiplier etc could completely
     56 * waive our saving on emulation operations.
     57 *
     58 * However, it makes sense to use it for JIT divide code generation for which
     59 * we are willing to trade performance of JITed code with that of host. As shown
     60 * by the following pseudo code, the required emulation operations could go down
     61 * from 6 (the basic version) to 3 or 4.
     62 *
     63 * To use the result of "reciprocal_value_adv", suppose we want to calculate
     64 * n/d, the pseudo C code will be:
     65 *
     66 *   struct reciprocal_value_adv rvalue;
     67 *   u8 pre_shift, exp;
     68 *
     69 *   // handle exception case.
     70 *   if (d >= (1U << 31)) {
     71 *     result = n >= d;
     72 *     return;
     73 *   }
     74 *
     75 *   rvalue = reciprocal_value_adv(d, 32)
     76 *   exp = rvalue.exp;
     77 *   if (rvalue.is_wide_m && !(d & 1)) {
     78 *     // floor(log2(d & (2^32 -d)))
     79 *     pre_shift = fls(d & -d) - 1;
     80 *     rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift);
     81 *   } else {
     82 *     pre_shift = 0;
     83 *   }
     84 *
     85 *   // code generation starts.
     86 *   if (imm == 1U << exp) {
     87 *     result = n >> exp;
     88 *   } else if (rvalue.is_wide_m) {
     89 *     // pre_shift must be zero when reached here.
     90 *     t = (n * rvalue.m) >> 32;
     91 *     result = n - t;
     92 *     result >>= 1;
     93 *     result += t;
     94 *     result >>= rvalue.sh - 1;
     95 *   } else {
     96 *     if (pre_shift)
     97 *       result = n >> pre_shift;
     98 *     result = ((u64)result * rvalue.m) >> 32;
     99 *     result >>= rvalue.sh;
    100 *   }
    101 */
    102struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec);
    103
    104#endif /* _LINUX_RECIPROCAL_DIV_H */