memcpy.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
memcpy.c (4089B)
      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 *  linux/arch/alpha/lib/memcpy.c
      4 *
      5 *  Copyright (C) 1995  Linus Torvalds
      6 */
      7
      8/*
      9 * This is a reasonably optimized memcpy() routine.
     10 */
     11
     12/*
     13 * Note that the C code is written to be optimized into good assembly. However,
     14 * at this point gcc is unable to sanely compile "if (n >= 0)", resulting in a
     15 * explicit compare against 0 (instead of just using the proper "blt reg, xx" or
     16 * "bge reg, xx"). I hope alpha-gcc will be fixed to notice this eventually..
     17 */
     18
     19#include <linux/types.h>
     20#include <linux/export.h>
     21
     22/*
     23 * This should be done in one go with ldq_u*2/mask/stq_u. Do it
     24 * with a macro so that we can fix it up later..
     25 */
     26#define ALIGN_DEST_TO8_UP(d,s,n) \
     27	while (d & 7) { \
     28		if (n <= 0) return; \
     29		n--; \
     30		*(char *) d = *(char *) s; \
     31		d++; s++; \
     32	}
     33#define ALIGN_DEST_TO8_DN(d,s,n) \
     34	while (d & 7) { \
     35		if (n <= 0) return; \
     36		n--; \
     37		d--; s--; \
     38		*(char *) d = *(char *) s; \
     39	}
     40
     41/*
     42 * This should similarly be done with ldq_u*2/mask/stq. The destination
     43 * is aligned, but we don't fill in a full quad-word
     44 */
     45#define DO_REST_UP(d,s,n) \
     46	while (n > 0) { \
     47		n--; \
     48		*(char *) d = *(char *) s; \
     49		d++; s++; \
     50	}
     51#define DO_REST_DN(d,s,n) \
     52	while (n > 0) { \
     53		n--; \
     54		d--; s--; \
     55		*(char *) d = *(char *) s; \
     56	}
     57
     58/*
     59 * This should be done with ldq/mask/stq. The source and destination are
     60 * aligned, but we don't fill in a full quad-word
     61 */
     62#define DO_REST_ALIGNED_UP(d,s,n) DO_REST_UP(d,s,n)
     63#define DO_REST_ALIGNED_DN(d,s,n) DO_REST_DN(d,s,n)
     64
     65/*
     66 * This does unaligned memory copies. We want to avoid storing to
     67 * an unaligned address, as that would do a read-modify-write cycle.
     68 * We also want to avoid double-reading the unaligned reads.
     69 *
     70 * Note the ordering to try to avoid load (and address generation) latencies.
     71 */
     72static inline void __memcpy_unaligned_up (unsigned long d, unsigned long s,
     73					  long n)
     74{
     75	ALIGN_DEST_TO8_UP(d,s,n);
     76	n -= 8;			/* to avoid compare against 8 in the loop */
     77	if (n >= 0) {
     78		unsigned long low_word, high_word;
     79		__asm__("ldq_u %0,%1":"=r" (low_word):"m" (*(unsigned long *) s));
     80		do {
     81			unsigned long tmp;
     82			__asm__("ldq_u %0,%1":"=r" (high_word):"m" (*(unsigned long *)(s+8)));
     83			n -= 8;
     84			__asm__("extql %1,%2,%0"
     85				:"=r" (low_word)
     86				:"r" (low_word), "r" (s));
     87			__asm__("extqh %1,%2,%0"
     88				:"=r" (tmp)
     89				:"r" (high_word), "r" (s));
     90			s += 8;
     91			*(unsigned long *) d = low_word | tmp;
     92			d += 8;
     93			low_word = high_word;
     94		} while (n >= 0);
     95	}
     96	n += 8;
     97	DO_REST_UP(d,s,n);
     98}
     99
    100static inline void __memcpy_unaligned_dn (unsigned long d, unsigned long s,
    101					  long n)
    102{
    103	/* I don't understand AXP assembler well enough for this. -Tim */
    104	s += n;
    105	d += n;
    106	while (n--)
    107		* (char *) --d = * (char *) --s;
    108}
    109
    110/*
    111 * Hmm.. Strange. The __asm__ here is there to make gcc use an integer register
    112 * for the load-store. I don't know why, but it would seem that using a floating
    113 * point register for the move seems to slow things down (very small difference,
    114 * though).
    115 *
    116 * Note the ordering to try to avoid load (and address generation) latencies.
    117 */
    118static inline void __memcpy_aligned_up (unsigned long d, unsigned long s,
    119					long n)
    120{
    121	ALIGN_DEST_TO8_UP(d,s,n);
    122	n -= 8;
    123	while (n >= 0) {
    124		unsigned long tmp;
    125		__asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
    126		n -= 8;
    127		s += 8;
    128		*(unsigned long *) d = tmp;
    129		d += 8;
    130	}
    131	n += 8;
    132	DO_REST_ALIGNED_UP(d,s,n);
    133}
    134static inline void __memcpy_aligned_dn (unsigned long d, unsigned long s,
    135					long n)
    136{
    137	s += n;
    138	d += n;
    139	ALIGN_DEST_TO8_DN(d,s,n);
    140	n -= 8;
    141	while (n >= 0) {
    142		unsigned long tmp;
    143		s -= 8;
    144		__asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
    145		n -= 8;
    146		d -= 8;
    147		*(unsigned long *) d = tmp;
    148	}
    149	n += 8;
    150	DO_REST_ALIGNED_DN(d,s,n);
    151}
    152
    153void * memcpy(void * dest, const void *src, size_t n)
    154{
    155	if (!(((unsigned long) dest ^ (unsigned long) src) & 7)) {
    156		__memcpy_aligned_up ((unsigned long) dest, (unsigned long) src,
    157				     n);
    158		return dest;
    159	}
    160	__memcpy_unaligned_up ((unsigned long) dest, (unsigned long) src, n);
    161	return dest;
    162}
    163EXPORT_SYMBOL(memcpy);