cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

ecc-sw-hamming.c (19771B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * This file contains an ECC algorithm that detects and corrects 1 bit
      4 * errors in a 256 byte block of data.
      5 *
      6 * Copyright © 2008 Koninklijke Philips Electronics NV.
      7 *                  Author: Frans Meulenbroeks
      8 *
      9 * Completely replaces the previous ECC implementation which was written by:
     10 *   Steven J. Hill (sjhill@realitydiluted.com)
     11 *   Thomas Gleixner (tglx@linutronix.de)
     12 *
     13 * Information on how this algorithm works and how it was developed
     14 * can be found in Documentation/driver-api/mtd/nand_ecc.rst
     15 */
     16
     17#include <linux/types.h>
     18#include <linux/kernel.h>
     19#include <linux/module.h>
     20#include <linux/mtd/nand.h>
     21#include <linux/mtd/nand-ecc-sw-hamming.h>
     22#include <linux/slab.h>
     23#include <asm/byteorder.h>
     24
     25/*
     26 * invparity is a 256 byte table that contains the odd parity
     27 * for each byte. So if the number of bits in a byte is even,
     28 * the array element is 1, and when the number of bits is odd
     29 * the array eleemnt is 0.
     30 */
     31static const char invparity[256] = {
     32	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
     33	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
     34	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
     35	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
     36	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
     37	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
     38	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
     39	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
     40	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
     41	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
     42	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
     43	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
     44	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
     45	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
     46	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
     47	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1
     48};
     49
     50/*
     51 * bitsperbyte contains the number of bits per byte
     52 * this is only used for testing and repairing parity
     53 * (a precalculated value slightly improves performance)
     54 */
     55static const char bitsperbyte[256] = {
     56	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
     57	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
     58	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
     59	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
     60	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
     61	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
     62	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
     63	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
     64	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
     65	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
     66	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
     67	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
     68	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
     69	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
     70	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
     71	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
     72};
     73
     74/*
     75 * addressbits is a lookup table to filter out the bits from the xor-ed
     76 * ECC data that identify the faulty location.
     77 * this is only used for repairing parity
     78 * see the comments in nand_ecc_sw_hamming_correct for more details
     79 */
     80static const char addressbits[256] = {
     81	0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
     82	0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
     83	0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
     84	0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
     85	0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
     86	0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
     87	0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
     88	0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
     89	0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
     90	0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
     91	0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
     92	0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
     93	0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
     94	0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
     95	0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
     96	0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
     97	0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
     98	0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
     99	0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
    100	0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
    101	0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
    102	0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
    103	0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
    104	0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
    105	0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
    106	0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
    107	0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
    108	0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
    109	0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
    110	0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
    111	0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
    112	0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f
    113};
    114
    115int ecc_sw_hamming_calculate(const unsigned char *buf, unsigned int step_size,
    116			     unsigned char *code, bool sm_order)
    117{
    118	const u32 *bp = (uint32_t *)buf;
    119	const u32 eccsize_mult = (step_size == 256) ? 1 : 2;
    120	/* current value in buffer */
    121	u32 cur;
    122	/* rp0..rp17 are the various accumulated parities (per byte) */
    123	u32 rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7, rp8, rp9, rp10, rp11, rp12,
    124		rp13, rp14, rp15, rp16, rp17;
    125	/* Cumulative parity for all data */
    126	u32 par;
    127	/* Cumulative parity at the end of the loop (rp12, rp14, rp16) */
    128	u32 tmppar;
    129	int i;
    130
    131	par = 0;
    132	rp4 = 0;
    133	rp6 = 0;
    134	rp8 = 0;
    135	rp10 = 0;
    136	rp12 = 0;
    137	rp14 = 0;
    138	rp16 = 0;
    139	rp17 = 0;
    140
    141	/*
    142	 * The loop is unrolled a number of times;
    143	 * This avoids if statements to decide on which rp value to update
    144	 * Also we process the data by longwords.
    145	 * Note: passing unaligned data might give a performance penalty.
    146	 * It is assumed that the buffers are aligned.
    147	 * tmppar is the cumulative sum of this iteration.
    148	 * needed for calculating rp12, rp14, rp16 and par
    149	 * also used as a performance improvement for rp6, rp8 and rp10
    150	 */
    151	for (i = 0; i < eccsize_mult << 2; i++) {
    152		cur = *bp++;
    153		tmppar = cur;
    154		rp4 ^= cur;
    155		cur = *bp++;
    156		tmppar ^= cur;
    157		rp6 ^= tmppar;
    158		cur = *bp++;
    159		tmppar ^= cur;
    160		rp4 ^= cur;
    161		cur = *bp++;
    162		tmppar ^= cur;
    163		rp8 ^= tmppar;
    164
    165		cur = *bp++;
    166		tmppar ^= cur;
    167		rp4 ^= cur;
    168		rp6 ^= cur;
    169		cur = *bp++;
    170		tmppar ^= cur;
    171		rp6 ^= cur;
    172		cur = *bp++;
    173		tmppar ^= cur;
    174		rp4 ^= cur;
    175		cur = *bp++;
    176		tmppar ^= cur;
    177		rp10 ^= tmppar;
    178
    179		cur = *bp++;
    180		tmppar ^= cur;
    181		rp4 ^= cur;
    182		rp6 ^= cur;
    183		rp8 ^= cur;
    184		cur = *bp++;
    185		tmppar ^= cur;
    186		rp6 ^= cur;
    187		rp8 ^= cur;
    188		cur = *bp++;
    189		tmppar ^= cur;
    190		rp4 ^= cur;
    191		rp8 ^= cur;
    192		cur = *bp++;
    193		tmppar ^= cur;
    194		rp8 ^= cur;
    195
    196		cur = *bp++;
    197		tmppar ^= cur;
    198		rp4 ^= cur;
    199		rp6 ^= cur;
    200		cur = *bp++;
    201		tmppar ^= cur;
    202		rp6 ^= cur;
    203		cur = *bp++;
    204		tmppar ^= cur;
    205		rp4 ^= cur;
    206		cur = *bp++;
    207		tmppar ^= cur;
    208
    209		par ^= tmppar;
    210		if ((i & 0x1) == 0)
    211			rp12 ^= tmppar;
    212		if ((i & 0x2) == 0)
    213			rp14 ^= tmppar;
    214		if (eccsize_mult == 2 && (i & 0x4) == 0)
    215			rp16 ^= tmppar;
    216	}
    217
    218	/*
    219	 * handle the fact that we use longword operations
    220	 * we'll bring rp4..rp14..rp16 back to single byte entities by
    221	 * shifting and xoring first fold the upper and lower 16 bits,
    222	 * then the upper and lower 8 bits.
    223	 */
    224	rp4 ^= (rp4 >> 16);
    225	rp4 ^= (rp4 >> 8);
    226	rp4 &= 0xff;
    227	rp6 ^= (rp6 >> 16);
    228	rp6 ^= (rp6 >> 8);
    229	rp6 &= 0xff;
    230	rp8 ^= (rp8 >> 16);
    231	rp8 ^= (rp8 >> 8);
    232	rp8 &= 0xff;
    233	rp10 ^= (rp10 >> 16);
    234	rp10 ^= (rp10 >> 8);
    235	rp10 &= 0xff;
    236	rp12 ^= (rp12 >> 16);
    237	rp12 ^= (rp12 >> 8);
    238	rp12 &= 0xff;
    239	rp14 ^= (rp14 >> 16);
    240	rp14 ^= (rp14 >> 8);
    241	rp14 &= 0xff;
    242	if (eccsize_mult == 2) {
    243		rp16 ^= (rp16 >> 16);
    244		rp16 ^= (rp16 >> 8);
    245		rp16 &= 0xff;
    246	}
    247
    248	/*
    249	 * we also need to calculate the row parity for rp0..rp3
    250	 * This is present in par, because par is now
    251	 * rp3 rp3 rp2 rp2 in little endian and
    252	 * rp2 rp2 rp3 rp3 in big endian
    253	 * as well as
    254	 * rp1 rp0 rp1 rp0 in little endian and
    255	 * rp0 rp1 rp0 rp1 in big endian
    256	 * First calculate rp2 and rp3
    257	 */
    258#ifdef __BIG_ENDIAN
    259	rp2 = (par >> 16);
    260	rp2 ^= (rp2 >> 8);
    261	rp2 &= 0xff;
    262	rp3 = par & 0xffff;
    263	rp3 ^= (rp3 >> 8);
    264	rp3 &= 0xff;
    265#else
    266	rp3 = (par >> 16);
    267	rp3 ^= (rp3 >> 8);
    268	rp3 &= 0xff;
    269	rp2 = par & 0xffff;
    270	rp2 ^= (rp2 >> 8);
    271	rp2 &= 0xff;
    272#endif
    273
    274	/* reduce par to 16 bits then calculate rp1 and rp0 */
    275	par ^= (par >> 16);
    276#ifdef __BIG_ENDIAN
    277	rp0 = (par >> 8) & 0xff;
    278	rp1 = (par & 0xff);
    279#else
    280	rp1 = (par >> 8) & 0xff;
    281	rp0 = (par & 0xff);
    282#endif
    283
    284	/* finally reduce par to 8 bits */
    285	par ^= (par >> 8);
    286	par &= 0xff;
    287
    288	/*
    289	 * and calculate rp5..rp15..rp17
    290	 * note that par = rp4 ^ rp5 and due to the commutative property
    291	 * of the ^ operator we can say:
    292	 * rp5 = (par ^ rp4);
    293	 * The & 0xff seems superfluous, but benchmarking learned that
    294	 * leaving it out gives slightly worse results. No idea why, probably
    295	 * it has to do with the way the pipeline in pentium is organized.
    296	 */
    297	rp5 = (par ^ rp4) & 0xff;
    298	rp7 = (par ^ rp6) & 0xff;
    299	rp9 = (par ^ rp8) & 0xff;
    300	rp11 = (par ^ rp10) & 0xff;
    301	rp13 = (par ^ rp12) & 0xff;
    302	rp15 = (par ^ rp14) & 0xff;
    303	if (eccsize_mult == 2)
    304		rp17 = (par ^ rp16) & 0xff;
    305
    306	/*
    307	 * Finally calculate the ECC bits.
    308	 * Again here it might seem that there are performance optimisations
    309	 * possible, but benchmarks showed that on the system this is developed
    310	 * the code below is the fastest
    311	 */
    312	if (sm_order) {
    313		code[0] = (invparity[rp7] << 7) | (invparity[rp6] << 6) |
    314			  (invparity[rp5] << 5) | (invparity[rp4] << 4) |
    315			  (invparity[rp3] << 3) | (invparity[rp2] << 2) |
    316			  (invparity[rp1] << 1) | (invparity[rp0]);
    317		code[1] = (invparity[rp15] << 7) | (invparity[rp14] << 6) |
    318			  (invparity[rp13] << 5) | (invparity[rp12] << 4) |
    319			  (invparity[rp11] << 3) | (invparity[rp10] << 2) |
    320			  (invparity[rp9] << 1) | (invparity[rp8]);
    321	} else {
    322		code[1] = (invparity[rp7] << 7) | (invparity[rp6] << 6) |
    323			  (invparity[rp5] << 5) | (invparity[rp4] << 4) |
    324			  (invparity[rp3] << 3) | (invparity[rp2] << 2) |
    325			  (invparity[rp1] << 1) | (invparity[rp0]);
    326		code[0] = (invparity[rp15] << 7) | (invparity[rp14] << 6) |
    327			  (invparity[rp13] << 5) | (invparity[rp12] << 4) |
    328			  (invparity[rp11] << 3) | (invparity[rp10] << 2) |
    329			  (invparity[rp9] << 1) | (invparity[rp8]);
    330	}
    331
    332	if (eccsize_mult == 1)
    333		code[2] =
    334		    (invparity[par & 0xf0] << 7) |
    335		    (invparity[par & 0x0f] << 6) |
    336		    (invparity[par & 0xcc] << 5) |
    337		    (invparity[par & 0x33] << 4) |
    338		    (invparity[par & 0xaa] << 3) |
    339		    (invparity[par & 0x55] << 2) |
    340		    3;
    341	else
    342		code[2] =
    343		    (invparity[par & 0xf0] << 7) |
    344		    (invparity[par & 0x0f] << 6) |
    345		    (invparity[par & 0xcc] << 5) |
    346		    (invparity[par & 0x33] << 4) |
    347		    (invparity[par & 0xaa] << 3) |
    348		    (invparity[par & 0x55] << 2) |
    349		    (invparity[rp17] << 1) |
    350		    (invparity[rp16] << 0);
    351
    352	return 0;
    353}
    354EXPORT_SYMBOL(ecc_sw_hamming_calculate);
    355
    356/**
    357 * nand_ecc_sw_hamming_calculate - Calculate 3-byte ECC for 256/512-byte block
    358 * @nand: NAND device
    359 * @buf: Input buffer with raw data
    360 * @code: Output buffer with ECC
    361 */
    362int nand_ecc_sw_hamming_calculate(struct nand_device *nand,
    363				  const unsigned char *buf, unsigned char *code)
    364{
    365	struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
    366	unsigned int step_size = nand->ecc.ctx.conf.step_size;
    367	bool sm_order = engine_conf ? engine_conf->sm_order : false;
    368
    369	return ecc_sw_hamming_calculate(buf, step_size, code, sm_order);
    370}
    371EXPORT_SYMBOL(nand_ecc_sw_hamming_calculate);
    372
    373int ecc_sw_hamming_correct(unsigned char *buf, unsigned char *read_ecc,
    374			   unsigned char *calc_ecc, unsigned int step_size,
    375			   bool sm_order)
    376{
    377	const u32 eccsize_mult = step_size >> 8;
    378	unsigned char b0, b1, b2, bit_addr;
    379	unsigned int byte_addr;
    380
    381	/*
    382	 * b0 to b2 indicate which bit is faulty (if any)
    383	 * we might need the xor result  more than once,
    384	 * so keep them in a local var
    385	*/
    386	if (sm_order) {
    387		b0 = read_ecc[0] ^ calc_ecc[0];
    388		b1 = read_ecc[1] ^ calc_ecc[1];
    389	} else {
    390		b0 = read_ecc[1] ^ calc_ecc[1];
    391		b1 = read_ecc[0] ^ calc_ecc[0];
    392	}
    393
    394	b2 = read_ecc[2] ^ calc_ecc[2];
    395
    396	/* check if there are any bitfaults */
    397
    398	/* repeated if statements are slightly more efficient than switch ... */
    399	/* ordered in order of likelihood */
    400
    401	if ((b0 | b1 | b2) == 0)
    402		return 0;	/* no error */
    403
    404	if ((((b0 ^ (b0 >> 1)) & 0x55) == 0x55) &&
    405	    (((b1 ^ (b1 >> 1)) & 0x55) == 0x55) &&
    406	    ((eccsize_mult == 1 && ((b2 ^ (b2 >> 1)) & 0x54) == 0x54) ||
    407	     (eccsize_mult == 2 && ((b2 ^ (b2 >> 1)) & 0x55) == 0x55))) {
    408	/* single bit error */
    409		/*
    410		 * rp17/rp15/13/11/9/7/5/3/1 indicate which byte is the faulty
    411		 * byte, cp 5/3/1 indicate the faulty bit.
    412		 * A lookup table (called addressbits) is used to filter
    413		 * the bits from the byte they are in.
    414		 * A marginal optimisation is possible by having three
    415		 * different lookup tables.
    416		 * One as we have now (for b0), one for b2
    417		 * (that would avoid the >> 1), and one for b1 (with all values
    418		 * << 4). However it was felt that introducing two more tables
    419		 * hardly justify the gain.
    420		 *
    421		 * The b2 shift is there to get rid of the lowest two bits.
    422		 * We could also do addressbits[b2] >> 1 but for the
    423		 * performance it does not make any difference
    424		 */
    425		if (eccsize_mult == 1)
    426			byte_addr = (addressbits[b1] << 4) + addressbits[b0];
    427		else
    428			byte_addr = (addressbits[b2 & 0x3] << 8) +
    429				    (addressbits[b1] << 4) + addressbits[b0];
    430		bit_addr = addressbits[b2 >> 2];
    431		/* flip the bit */
    432		buf[byte_addr] ^= (1 << bit_addr);
    433		return 1;
    434
    435	}
    436	/* count nr of bits; use table lookup, faster than calculating it */
    437	if ((bitsperbyte[b0] + bitsperbyte[b1] + bitsperbyte[b2]) == 1)
    438		return 1;	/* error in ECC data; no action needed */
    439
    440	pr_err("%s: uncorrectable ECC error\n", __func__);
    441	return -EBADMSG;
    442}
    443EXPORT_SYMBOL(ecc_sw_hamming_correct);
    444
    445/**
    446 * nand_ecc_sw_hamming_correct - Detect and correct bit error(s)
    447 * @nand: NAND device
    448 * @buf: Raw data read from the chip
    449 * @read_ecc: ECC bytes read from the chip
    450 * @calc_ecc: ECC calculated from the raw data
    451 *
    452 * Detect and correct up to 1 bit error per 256/512-byte block.
    453 */
    454int nand_ecc_sw_hamming_correct(struct nand_device *nand, unsigned char *buf,
    455				unsigned char *read_ecc,
    456				unsigned char *calc_ecc)
    457{
    458	struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
    459	unsigned int step_size = nand->ecc.ctx.conf.step_size;
    460	bool sm_order = engine_conf ? engine_conf->sm_order : false;
    461
    462	return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc, step_size,
    463				      sm_order);
    464}
    465EXPORT_SYMBOL(nand_ecc_sw_hamming_correct);
    466
    467int nand_ecc_sw_hamming_init_ctx(struct nand_device *nand)
    468{
    469	struct nand_ecc_props *conf = &nand->ecc.ctx.conf;
    470	struct nand_ecc_sw_hamming_conf *engine_conf;
    471	struct mtd_info *mtd = nanddev_to_mtd(nand);
    472	int ret;
    473
    474	if (!mtd->ooblayout) {
    475		switch (mtd->oobsize) {
    476		case 8:
    477		case 16:
    478			mtd_set_ooblayout(mtd, nand_get_small_page_ooblayout());
    479			break;
    480		case 64:
    481		case 128:
    482			mtd_set_ooblayout(mtd,
    483					  nand_get_large_page_hamming_ooblayout());
    484			break;
    485		default:
    486			return -ENOTSUPP;
    487		}
    488	}
    489
    490	conf->engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
    491	conf->algo = NAND_ECC_ALGO_HAMMING;
    492	conf->step_size = nand->ecc.user_conf.step_size;
    493	conf->strength = 1;
    494
    495	/* Use the strongest configuration by default */
    496	if (conf->step_size != 256 && conf->step_size != 512)
    497		conf->step_size = 256;
    498
    499	engine_conf = kzalloc(sizeof(*engine_conf), GFP_KERNEL);
    500	if (!engine_conf)
    501		return -ENOMEM;
    502
    503	ret = nand_ecc_init_req_tweaking(&engine_conf->req_ctx, nand);
    504	if (ret)
    505		goto free_engine_conf;
    506
    507	engine_conf->code_size = 3;
    508	engine_conf->calc_buf = kzalloc(mtd->oobsize, GFP_KERNEL);
    509	engine_conf->code_buf = kzalloc(mtd->oobsize, GFP_KERNEL);
    510	if (!engine_conf->calc_buf || !engine_conf->code_buf) {
    511		ret = -ENOMEM;
    512		goto free_bufs;
    513	}
    514
    515	nand->ecc.ctx.priv = engine_conf;
    516	nand->ecc.ctx.nsteps = mtd->writesize / conf->step_size;
    517	nand->ecc.ctx.total = nand->ecc.ctx.nsteps * engine_conf->code_size;
    518
    519	return 0;
    520
    521free_bufs:
    522	nand_ecc_cleanup_req_tweaking(&engine_conf->req_ctx);
    523	kfree(engine_conf->calc_buf);
    524	kfree(engine_conf->code_buf);
    525free_engine_conf:
    526	kfree(engine_conf);
    527
    528	return ret;
    529}
    530EXPORT_SYMBOL(nand_ecc_sw_hamming_init_ctx);
    531
    532void nand_ecc_sw_hamming_cleanup_ctx(struct nand_device *nand)
    533{
    534	struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
    535
    536	if (engine_conf) {
    537		nand_ecc_cleanup_req_tweaking(&engine_conf->req_ctx);
    538		kfree(engine_conf->calc_buf);
    539		kfree(engine_conf->code_buf);
    540		kfree(engine_conf);
    541	}
    542}
    543EXPORT_SYMBOL(nand_ecc_sw_hamming_cleanup_ctx);
    544
    545static int nand_ecc_sw_hamming_prepare_io_req(struct nand_device *nand,
    546					      struct nand_page_io_req *req)
    547{
    548	struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
    549	struct mtd_info *mtd = nanddev_to_mtd(nand);
    550	int eccsize = nand->ecc.ctx.conf.step_size;
    551	int eccbytes = engine_conf->code_size;
    552	int eccsteps = nand->ecc.ctx.nsteps;
    553	int total = nand->ecc.ctx.total;
    554	u8 *ecccalc = engine_conf->calc_buf;
    555	const u8 *data;
    556	int i;
    557
    558	/* Nothing to do for a raw operation */
    559	if (req->mode == MTD_OPS_RAW)
    560		return 0;
    561
    562	/* This engine does not provide BBM/free OOB bytes protection */
    563	if (!req->datalen)
    564		return 0;
    565
    566	nand_ecc_tweak_req(&engine_conf->req_ctx, req);
    567
    568	/* No more preparation for page read */
    569	if (req->type == NAND_PAGE_READ)
    570		return 0;
    571
    572	/* Preparation for page write: derive the ECC bytes and place them */
    573	for (i = 0, data = req->databuf.out;
    574	     eccsteps;
    575	     eccsteps--, i += eccbytes, data += eccsize)
    576		nand_ecc_sw_hamming_calculate(nand, data, &ecccalc[i]);
    577
    578	return mtd_ooblayout_set_eccbytes(mtd, ecccalc, (void *)req->oobbuf.out,
    579					  0, total);
    580}
    581
    582static int nand_ecc_sw_hamming_finish_io_req(struct nand_device *nand,
    583					     struct nand_page_io_req *req)
    584{
    585	struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
    586	struct mtd_info *mtd = nanddev_to_mtd(nand);
    587	int eccsize = nand->ecc.ctx.conf.step_size;
    588	int total = nand->ecc.ctx.total;
    589	int eccbytes = engine_conf->code_size;
    590	int eccsteps = nand->ecc.ctx.nsteps;
    591	u8 *ecccalc = engine_conf->calc_buf;
    592	u8 *ecccode = engine_conf->code_buf;
    593	unsigned int max_bitflips = 0;
    594	u8 *data = req->databuf.in;
    595	int i, ret;
    596
    597	/* Nothing to do for a raw operation */
    598	if (req->mode == MTD_OPS_RAW)
    599		return 0;
    600
    601	/* This engine does not provide BBM/free OOB bytes protection */
    602	if (!req->datalen)
    603		return 0;
    604
    605	/* No more preparation for page write */
    606	if (req->type == NAND_PAGE_WRITE) {
    607		nand_ecc_restore_req(&engine_conf->req_ctx, req);
    608		return 0;
    609	}
    610
    611	/* Finish a page read: retrieve the (raw) ECC bytes*/
    612	ret = mtd_ooblayout_get_eccbytes(mtd, ecccode, req->oobbuf.in, 0,
    613					 total);
    614	if (ret)
    615		return ret;
    616
    617	/* Calculate the ECC bytes */
    618	for (i = 0; eccsteps; eccsteps--, i += eccbytes, data += eccsize)
    619		nand_ecc_sw_hamming_calculate(nand, data, &ecccalc[i]);
    620
    621	/* Finish a page read: compare and correct */
    622	for (eccsteps = nand->ecc.ctx.nsteps, i = 0, data = req->databuf.in;
    623	     eccsteps;
    624	     eccsteps--, i += eccbytes, data += eccsize) {
    625		int stat =  nand_ecc_sw_hamming_correct(nand, data,
    626							&ecccode[i],
    627							&ecccalc[i]);
    628		if (stat < 0) {
    629			mtd->ecc_stats.failed++;
    630		} else {
    631			mtd->ecc_stats.corrected += stat;
    632			max_bitflips = max_t(unsigned int, max_bitflips, stat);
    633		}
    634	}
    635
    636	nand_ecc_restore_req(&engine_conf->req_ctx, req);
    637
    638	return max_bitflips;
    639}
    640
    641static struct nand_ecc_engine_ops nand_ecc_sw_hamming_engine_ops = {
    642	.init_ctx = nand_ecc_sw_hamming_init_ctx,
    643	.cleanup_ctx = nand_ecc_sw_hamming_cleanup_ctx,
    644	.prepare_io_req = nand_ecc_sw_hamming_prepare_io_req,
    645	.finish_io_req = nand_ecc_sw_hamming_finish_io_req,
    646};
    647
    648static struct nand_ecc_engine nand_ecc_sw_hamming_engine = {
    649	.ops = &nand_ecc_sw_hamming_engine_ops,
    650};
    651
    652struct nand_ecc_engine *nand_ecc_sw_hamming_get_engine(void)
    653{
    654	return &nand_ecc_sw_hamming_engine;
    655}
    656EXPORT_SYMBOL(nand_ecc_sw_hamming_get_engine);
    657
    658MODULE_LICENSE("GPL");
    659MODULE_AUTHOR("Frans Meulenbroeks <fransmeulenbroeks@gmail.com>");
    660MODULE_DESCRIPTION("NAND software Hamming ECC support");