csum.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
csum.c (4093B)
      1// SPDX-License-Identifier: GPL-2.0-only
      2// Copyright (C) 2019-2020 Arm Ltd.
      3
      4#include <linux/compiler.h>
      5#include <linux/kasan-checks.h>
      6#include <linux/kernel.h>
      7
      8#include <net/checksum.h>
      9
     10/* Looks dumb, but generates nice-ish code */
     11static u64 accumulate(u64 sum, u64 data)
     12{
     13	__uint128_t tmp = (__uint128_t)sum + data;
     14	return tmp + (tmp >> 64);
     15}
     16
     17/*
     18 * We over-read the buffer and this makes KASAN unhappy. Instead, disable
     19 * instrumentation and call kasan explicitly.
     20 */
     21unsigned int __no_sanitize_address do_csum(const unsigned char *buff, int len)
     22{
     23	unsigned int offset, shift, sum;
     24	const u64 *ptr;
     25	u64 data, sum64 = 0;
     26
     27	if (unlikely(len == 0))
     28		return 0;
     29
     30	offset = (unsigned long)buff & 7;
     31	/*
     32	 * This is to all intents and purposes safe, since rounding down cannot
     33	 * result in a different page or cache line being accessed, and @buff
     34	 * should absolutely not be pointing to anything read-sensitive. We do,
     35	 * however, have to be careful not to piss off KASAN, which means using
     36	 * unchecked reads to accommodate the head and tail, for which we'll
     37	 * compensate with an explicit check up-front.
     38	 */
     39	kasan_check_read(buff, len);
     40	ptr = (u64 *)(buff - offset);
     41	len = len + offset - 8;
     42
     43	/*
     44	 * Head: zero out any excess leading bytes. Shifting back by the same
     45	 * amount should be at least as fast as any other way of handling the
     46	 * odd/even alignment, and means we can ignore it until the very end.
     47	 */
     48	shift = offset * 8;
     49	data = *ptr++;
     50#ifdef __LITTLE_ENDIAN
     51	data = (data >> shift) << shift;
     52#else
     53	data = (data << shift) >> shift;
     54#endif
     55
     56	/*
     57	 * Body: straightforward aligned loads from here on (the paired loads
     58	 * underlying the quadword type still only need dword alignment). The
     59	 * main loop strictly excludes the tail, so the second loop will always
     60	 * run at least once.
     61	 */
     62	while (unlikely(len > 64)) {
     63		__uint128_t tmp1, tmp2, tmp3, tmp4;
     64
     65		tmp1 = *(__uint128_t *)ptr;
     66		tmp2 = *(__uint128_t *)(ptr + 2);
     67		tmp3 = *(__uint128_t *)(ptr + 4);
     68		tmp4 = *(__uint128_t *)(ptr + 6);
     69
     70		len -= 64;
     71		ptr += 8;
     72
     73		/* This is the "don't dump the carry flag into a GPR" idiom */
     74		tmp1 += (tmp1 >> 64) | (tmp1 << 64);
     75		tmp2 += (tmp2 >> 64) | (tmp2 << 64);
     76		tmp3 += (tmp3 >> 64) | (tmp3 << 64);
     77		tmp4 += (tmp4 >> 64) | (tmp4 << 64);
     78		tmp1 = ((tmp1 >> 64) << 64) | (tmp2 >> 64);
     79		tmp1 += (tmp1 >> 64) | (tmp1 << 64);
     80		tmp3 = ((tmp3 >> 64) << 64) | (tmp4 >> 64);
     81		tmp3 += (tmp3 >> 64) | (tmp3 << 64);
     82		tmp1 = ((tmp1 >> 64) << 64) | (tmp3 >> 64);
     83		tmp1 += (tmp1 >> 64) | (tmp1 << 64);
     84		tmp1 = ((tmp1 >> 64) << 64) | sum64;
     85		tmp1 += (tmp1 >> 64) | (tmp1 << 64);
     86		sum64 = tmp1 >> 64;
     87	}
     88	while (len > 8) {
     89		__uint128_t tmp;
     90
     91		sum64 = accumulate(sum64, data);
     92		tmp = *(__uint128_t *)ptr;
     93
     94		len -= 16;
     95		ptr += 2;
     96
     97#ifdef __LITTLE_ENDIAN
     98		data = tmp >> 64;
     99		sum64 = accumulate(sum64, tmp);
    100#else
    101		data = tmp;
    102		sum64 = accumulate(sum64, tmp >> 64);
    103#endif
    104	}
    105	if (len > 0) {
    106		sum64 = accumulate(sum64, data);
    107		data = *ptr;
    108		len -= 8;
    109	}
    110	/*
    111	 * Tail: zero any over-read bytes similarly to the head, again
    112	 * preserving odd/even alignment.
    113	 */
    114	shift = len * -8;
    115#ifdef __LITTLE_ENDIAN
    116	data = (data << shift) >> shift;
    117#else
    118	data = (data >> shift) << shift;
    119#endif
    120	sum64 = accumulate(sum64, data);
    121
    122	/* Finally, folding */
    123	sum64 += (sum64 >> 32) | (sum64 << 32);
    124	sum = sum64 >> 32;
    125	sum += (sum >> 16) | (sum << 16);
    126	if (offset & 1)
    127		return (u16)swab32(sum);
    128
    129	return sum >> 16;
    130}
    131
    132__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
    133			const struct in6_addr *daddr,
    134			__u32 len, __u8 proto, __wsum csum)
    135{
    136	__uint128_t src, dst;
    137	u64 sum = (__force u64)csum;
    138
    139	src = *(const __uint128_t *)saddr->s6_addr;
    140	dst = *(const __uint128_t *)daddr->s6_addr;
    141
    142	sum += (__force u32)htonl(len);
    143#ifdef __LITTLE_ENDIAN
    144	sum += (u32)proto << 24;
    145#else
    146	sum += proto;
    147#endif
    148	src += (src >> 64) | (src << 64);
    149	dst += (dst >> 64) | (dst << 64);
    150
    151	sum = accumulate(sum, src >> 64);
    152	sum = accumulate(sum, dst >> 64);
    153
    154	sum += ((sum >> 32) | (sum << 32));
    155	return csum_fold((__force __wsum)(sum >> 32));
    156}
    157EXPORT_SYMBOL(csum_ipv6_magic);