bch.c source code [linux/lib/bch.c]

1	/*
2	* Generic binary BCH encoding/decoding library
3	*
4	* This program is free software; you can redistribute it and/or modify it
5	* under the terms of the GNU General Public License version 2 as published by
6	* the Free Software Foundation.
7	*
8	* This program is distributed in the hope that it will be useful, but WITHOUT
9	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11	* more details.
12	*
13	* You should have received a copy of the GNU General Public License along with
14	* this program; if not, write to the Free Software Foundation, Inc., 51
15	* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
16	*
17	* Copyright © 2011 Parrot S.A.
18	*
19	* Author: Ivan Djelic <ivan.djelic@parrot.com>
20	*
21	* Description:
22	*
23	* This library provides runtime configurable encoding/decoding of binary
24	* Bose-Chaudhuri-Hocquenghem (BCH) codes.
25	*
26	* Call bch_init to get a pointer to a newly allocated bch_control structure for
27	* the given m (Galois field order), t (error correction capability) and
28	* (optional) primitive polynomial parameters.
29	*
30	* Call bch_encode to compute and store ecc parity bytes to a given buffer.
31	* Call bch_decode to detect and locate errors in received data.
32	*
33	* On systems supporting hw BCH features, intermediate results may be provided
34	* to bch_decode in order to skip certain steps. See bch_decode() documentation
35	* for details.
36	*
37	* Option CONFIG_BCH_CONST_PARAMS can be used to force fixed values of
38	* parameters m and t; thus allowing extra compiler optimizations and providing
39	* better (up to 2x) encoding performance. Using this option makes sense when
40	* (m,t) are fixed and known in advance, e.g. when using BCH error correction
41	* on a particular NAND flash device.
42	*
43	* Algorithmic details:
44	*
45	* Encoding is performed by processing 32 input bits in parallel, using 4
46	* remainder lookup tables.
47	*
48	* The final stage of decoding involves the following internal steps:
49	* a. Syndrome computation
50	* b. Error locator polynomial computation using Berlekamp-Massey algorithm
51	* c. Error locator root finding (by far the most expensive step)
52	*
53	* In this implementation, step c is not performed using the usual Chien search.
54	* Instead, an alternative approach described in [1] is used. It consists in
55	* factoring the error locator polynomial using the Berlekamp Trace algorithm
56	* (BTA) down to a certain degree (4), after which ad hoc low-degree polynomial
57	* solving techniques [2] are used. The resulting algorithm, called BTZ, yields
58	* much better performance than Chien search for usual (m,t) values (typically
59	* m >= 13, t < 32, see [1]).
60	*
61	* [1] B. Biswas, V. Herbert. Efficient root finding of polynomials over fields
62	* of characteristic 2, in: Western European Workshop on Research in Cryptology
63	* - WEWoRC 2009, Graz, Austria, LNCS, Springer, July 2009, to appear.
64	* [2] [Zin96] V.A. Zinoviev. On the solution of equations of degree 10 over
65	* finite fields GF(2^q). In Rapport de recherche INRIA no 2829, 1996.
66	*/
67
68	#include <linux/kernel.h>
69	#include <linux/errno.h>
70	#include <linux/init.h>
71	#include <linux/module.h>
72	#include <linux/slab.h>
73	#include <linux/bitops.h>
74	#include <linux/bitrev.h>
75	#include <asm/byteorder.h>
76	#include <linux/bch.h>
77
78	#if defined(CONFIG_BCH_CONST_PARAMS)
79	#define GF_M(_p) (CONFIG_BCH_CONST_M)
80	#define GF_T(_p) (CONFIG_BCH_CONST_T)
81	#define GF_N(_p) ((1 << (CONFIG_BCH_CONST_M))-1)
82	#define BCH_MAX_M (CONFIG_BCH_CONST_M)
83	#define BCH_MAX_T (CONFIG_BCH_CONST_T)
84	#else
85	#define GF_M(_p) ((_p)->m)
86	#define GF_T(_p) ((_p)->t)
87	#define GF_N(_p) ((_p)->n)
88	#define BCH_MAX_M 15 /* 2KB */
89	#define BCH_MAX_T 64 /* 64 bit correction */
90	#endif
91
92	#define BCH_ECC_WORDS(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32)
93	#define BCH_ECC_BYTES(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8)
94
95	#define BCH_ECC_MAX_WORDS DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 32)
96
97	#ifndef dbg
98	#define dbg(_fmt, args...) do {} while (0)
99	#endif
100
101	/*
102	* represent a polynomial over GF(2^m)
103	*/
104	struct gf_poly {
105	unsigned int deg; / polynomial degree /
106	unsigned int c[]; / polynomial terms /
107	};
108
109	/ given its degree, compute a polynomial size in bytes /
110	#define GF_POLY_SZ(_d) (sizeof(struct gf_poly)+((_d)+1)*sizeof(unsigned int))
111
112	/ polynomial of degree 1 /
113	struct gf_poly_deg1 {
114	struct gf_poly poly;
115	unsigned int c[`2`];
116	};
117
118	static u8 swap_bits(struct bch_control *bch, u8 in)
119	{
120	if (!bch->swap_bits)
121	return in;
122
123	return bitrev8(in);
124	}
125
126	/*
127	* same as bch_encode(), but process input data one byte at a time
128	*/
129	static void bch_encode_unaligned(struct bch_control *bch,
130	const unsigned char data, unsigned* int len,
131	uint32_t *ecc)
132	{
133	int i;
134	const uint32_t *p;
135	const int l = BCH_ECC_WORDS(bch)-`1`;
136
137	while (len--) {
138	u8 tmp = swap_bits(bch, in: *data++);
139
140	p = bch->mod8_tab + (l+`1`)*(((ecc[`0`] >> `24`)^(tmp)) & `0xff`);
141
142	for (i = `0`; i < l; i++)
143	ecc[i] = ((ecc[i] << `8`)\|(ecc[i+`1`] >> `24`))^(*p++);
144
145	ecc[l] = (ecc[l] << `8`)^(*p);
146	}
147	}
148
149	/*
150	* convert ecc bytes to aligned, zero-padded 32-bit ecc words
151	*/
152	static void load_ecc8(struct bch_control bch, uint32_t dst,
153	const uint8_t *src)
154	{
155	uint8_t pad[`4`] = {`0`, `0`, `0`, `0`};
156	unsigned int i, nwords = BCH_ECC_WORDS(bch)-`1`;
157
158	for (i = `0`; i < nwords; i++, src += `4`)
159	dst[i] = ((u32)swap_bits(bch, in: src[`0`]) << `24`) \|
160	((u32)swap_bits(bch, in: src[`1`]) << `16`) \|
161	((u32)swap_bits(bch, in: src[`2`]) << `8`) \|
162	swap_bits(bch, in: src[`3`]);
163
164	memcpy(pad, src, BCH_ECC_BYTES(bch)-`4`*nwords);
165	dst[nwords] = ((u32)swap_bits(bch, in: pad[`0`]) << `24`) \|
166	((u32)swap_bits(bch, in: pad[`1`]) << `16`) \|
167	((u32)swap_bits(bch, in: pad[`2`]) << `8`) \|
168	swap_bits(bch, in: pad[`3`]);
169	}
170
171	/*
172	* convert 32-bit ecc words to ecc bytes
173	*/
174	static void store_ecc8(struct bch_control bch, uint8_t dst,
175	const uint32_t *src)
176	{
177	uint8_t pad[`4`];
178	unsigned int i, nwords = BCH_ECC_WORDS(bch)-`1`;
179
180	for (i = `0`; i < nwords; i++) {
181	*dst++ = swap_bits(bch, in: src[i] >> `24`);
182	*dst++ = swap_bits(bch, in: src[i] >> `16`);
183	*dst++ = swap_bits(bch, in: src[i] >> `8`);
184	*dst++ = swap_bits(bch, in: src[i]);
185	}
186	pad[`0`] = swap_bits(bch, in: src[nwords] >> `24`);
187	pad[`1`] = swap_bits(bch, in: src[nwords] >> `16`);
188	pad[`2`] = swap_bits(bch, in: src[nwords] >> `8`);
189	pad[`3`] = swap_bits(bch, in: src[nwords]);
190	memcpy(dst, pad, BCH_ECC_BYTES(bch)-`4`*nwords);
191	}
192
193	/**
194	* bch_encode - calculate BCH ecc parity of data
195	* @bch: BCH control structure
196	* @data: data to encode
197	* @len: data length in bytes
198	* @ecc: ecc parity data, must be initialized by caller
199	*
200	* The @ecc parity array is used both as input and output parameter, in order to
201	* allow incremental computations. It should be of the size indicated by member
202	* @ecc_bytes of @bch, and should be initialized to 0 before the first call.
203	*
204	* The exact number of computed ecc parity bits is given by member @ecc_bits of
205	* @bch; it may be less than m*t for large values of t.
206	*/
207	void bch_encode(struct bch_control bch, const* uint8_t *data,
208	unsigned int len, uint8_t *ecc)
209	{
210	const unsigned int l = BCH_ECC_WORDS(bch)-`1`;
211	unsigned int i, mlen;
212	unsigned long m;
213	uint32_t w, r[BCH_ECC_MAX_WORDS];
214	const size_t r_bytes = BCH_ECC_WORDS(bch) * sizeof(*r);
215	const uint32_t * const tab0 = bch->mod8_tab;
216	const uint32_t * const tab1 = tab0 + `256`*(l+`1`);
217	const uint32_t * const tab2 = tab1 + `256`*(l+`1`);
218	const uint32_t * const tab3 = tab2 + `256`*(l+`1`);
219	const uint32_t pdata, p0, p1, p2, *p3;
220
221	if (WARN_ON(r_bytes > sizeof(r)))
222	return;
223
224	if (ecc) {
225	/ load ecc parity bytes into internal 32-bit buffer /
226	load_ecc8(bch, dst: bch->ecc_buf, src: ecc);
227	} else {
228	memset(bch->ecc_buf, `0`, r_bytes);
229	}
230
231	/ process first unaligned data bytes /
232	m = ((unsigned long)data) & `3`;
233	if (m) {
234	mlen = (len < (`4`-m)) ? len : `4`-m;
235	bch_encode_unaligned(bch, data, len: mlen, ecc: bch->ecc_buf);
236	data += mlen;
237	len -= mlen;
238	}
239
240	/ process 32-bit aligned data words /
241	pdata = (uint32_t *)data;
242	mlen = len/`4`;
243	data += `4`*mlen;
244	len -= `4`*mlen;
245	memcpy(r, bch->ecc_buf, r_bytes);
246
247	/*
248	* split each 32-bit word into 4 polynomials of weight 8 as follows:
249	*
250	* 31 ...24 23 ...16 15 ... 8 7 ... 0
251	* xxxxxxxx yyyyyyyy zzzzzzzz tttttttt
252	* tttttttt mod g = r0 (precomputed)
253	* zzzzzzzz 00000000 mod g = r1 (precomputed)
254	* yyyyyyyy 00000000 00000000 mod g = r2 (precomputed)
255	* xxxxxxxx 00000000 00000000 00000000 mod g = r3 (precomputed)
256	* xxxxxxxx yyyyyyyy zzzzzzzz tttttttt mod g = r0^r1^r2^r3
257	*/
258	while (mlen--) {
259	/ input data is read in big-endian format /
260	w = cpu_to_be32(*pdata++);
261	if (bch->swap_bits)
262	w = (u32)swap_bits(bch, in: w) \|
263	((u32)swap_bits(bch, in: w >> `8`) << `8`) \|
264	((u32)swap_bits(bch, in: w >> `16`) << `16`) \|
265	((u32)swap_bits(bch, in: w >> `24`) << `24`);
266	w ^= r[`0`];
267	p0 = tab0 + (l+`1`)*((w >> `0`) & `0xff`);
268	p1 = tab1 + (l+`1`)*((w >> `8`) & `0xff`);
269	p2 = tab2 + (l+`1`)*((w >> `16`) & `0xff`);
270	p3 = tab3 + (l+`1`)*((w >> `24`) & `0xff`);
271
272	for (i = `0`; i < l; i++)
273	r[i] = r[i+`1`]^p0[i]^p1[i]^p2[i]^p3[i];
274
275	r[l] = p0[l]^p1[l]^p2[l]^p3[l];
276	}
277	memcpy(bch->ecc_buf, r, r_bytes);
278
279	/ process last unaligned bytes /
280	if (len)
281	bch_encode_unaligned(bch, data, len, ecc: bch->ecc_buf);
282
283	/ store ecc parity bytes into original parity buffer /
284	if (ecc)
285	store_ecc8(bch, dst: ecc, src: bch->ecc_buf);
286	}
287	EXPORT_SYMBOL_GPL(bch_encode);
288
289	static inline int modulo(struct bch_control bch, unsigned* int v)
290	{
291	const unsigned int n = GF_N(bch);
292	while (v >= n) {
293	v -= n;
294	v = (v & n) + (v >> GF_M(bch));
295	}
296	return v;
297	}
298
299	/*
300	* shorter and faster modulo function, only works when v < 2N.
301	*/
302	static inline int mod_s(struct bch_control bch, unsigned* int v)
303	{
304	const unsigned int n = GF_N(bch);
305	return (v < n) ? v : v-n;
306	}
307
308	static inline int deg(unsigned int poly)
309	{
310	/ polynomial degree is the most-significant bit index /
311	return fls(x: poly)-`1`;
312	}
313
314	static inline int parity(unsigned int x)
315	{
316	/*
317	* public domain code snippet, lifted from
318	* http://www-graphics.stanford.edu/~seander/bithacks.html
319	*/
320	x ^= x >> `1`;
321	x ^= x >> `2`;
322	x = (x & `0x11111111U`) * `0x11111111U`;
323	return (x >> `28`) & `1`;
324	}
325
326	/ Galois field basic operations: multiply, divide, inverse, etc. /
327
328	static inline unsigned int gf_mul(struct bch_control bch, unsigned* int a,
329	unsigned int b)
330	{
331	return (a && b) ? bch->a_pow_tab[mod_s(bch, v: bch->a_log_tab[a]+
332	bch->a_log_tab[b])] : `0`;
333	}
334
335	static inline unsigned int gf_sqr(struct bch_control bch, unsigned* int a)
336	{
337	return a ? bch->a_pow_tab[mod_s(bch, v: `2`*bch->a_log_tab[a])] : `0`;
338	}
339
340	static inline unsigned int gf_div(struct bch_control bch, unsigned* int a,
341	unsigned int b)
342	{
343	return a ? bch->a_pow_tab[mod_s(bch, v: bch->a_log_tab[a]+
344	GF_N(bch)-bch->a_log_tab[b])] : `0`;
345	}
346
347	static inline unsigned int gf_inv(struct bch_control bch, unsigned* int a)
348	{
349	return bch->a_pow_tab[GF_N(bch)-bch->a_log_tab[a]];
350	}
351
352	static inline unsigned int a_pow(struct bch_control bch, int* i)
353	{
354	return bch->a_pow_tab[modulo(bch, v: i)];
355	}
356
357	static inline int a_log(struct bch_control bch, unsigned* int x)
358	{
359	return bch->a_log_tab[x];
360	}
361
362	static inline int a_ilog(struct bch_control bch, unsigned* int x)
363	{
364	return mod_s(bch, GF_N(bch)-bch->a_log_tab[x]);
365	}
366
367	/*
368	* compute 2t syndromes of ecc polynomial, i.e. ecc(a^j) for j=1..2t
369	*/
370	static void compute_syndromes(struct bch_control bch, uint32_t ecc,
371	unsigned int *syn)
372	{
373	int i, j, s;
374	unsigned int m;
375	uint32_t poly;
376	const int t = GF_T(bch);
377
378	s = bch->ecc_bits;
379
380	/ make sure extra bits in last ecc word are cleared /
381	m = ((unsigned int)s) & `31`;
382	if (m)
383	ecc[s/`32`] &= ~((`1u` << (`32`-m))-`1`);
384	memset(syn, `0`, `2`tsizeof(*syn));
385
386	/ compute v(a^j) for j=1 .. 2t-1 /
387	do {
388	poly = *ecc++;
389	s -= `32`;
390	while (poly) {
391	i = deg(poly);
392	for (j = `0`; j < `2`*t; j += `2`)
393	syn[j] ^= a_pow(bch, i: (j+`1`)*(i+s));
394
395	poly ^= (`1` << i);
396	}
397	} while (s > `0`);
398
399	/ v(a^(2j)) = v(a^j)^2 /
400	for (j = `0`; j < t; j++)
401	syn[`2`*j+`1`] = gf_sqr(bch, a: syn[j]);
402	}
403
404	static void gf_poly_copy(struct gf_poly dst, struct* gf_poly *src)
405	{
406	memcpy(dst, src, GF_POLY_SZ(src->deg));
407	}
408
409	static int compute_error_locator_polynomial(struct bch_control *bch,
410	const unsigned int *syn)
411	{
412	const unsigned int t = GF_T(bch);
413	const unsigned int n = GF_N(bch);
414	unsigned int i, j, tmp, l, pd = `1`, d = syn[`0`];
415	struct gf_poly *elp = bch->elp;
416	struct gf_poly *pelp = bch->poly_2t[`0`];
417	struct gf_poly *elp_copy = bch->poly_2t[`1`];
418	int k, pp = -`1`;
419
420	memset(pelp, `0`, GF_POLY_SZ(`2`*t));
421	memset(elp, `0`, GF_POLY_SZ(`2`*t));
422
423	pelp->deg = `0`;
424	pelp->c[`0`] = `1`;
425	elp->deg = `0`;
426	elp->c[`0`] = `1`;
427
428	/ use simplified binary Berlekamp-Massey algorithm /
429	for (i = `0`; (i < t) && (elp->deg <= t); i++) {
430	if (d) {
431	k = `2`*i-pp;
432	gf_poly_copy(dst: elp_copy, src: elp);
433	/ e[i+1](X) = e[i](X)+didp^-1X^2(i-p)e[p](X) /*
434	tmp = a_log(bch, x: d)+n-a_log(bch, x: pd);
435	for (j = `0`; j <= pelp->deg; j++) {
436	if (pelp->c[j]) {
437	l = a_log(bch, x: pelp->c[j]);
438	elp->c[j+k] ^= a_pow(bch, i: tmp+l);
439	}
440	}
441	/ compute l[i+1] = max(l[i]->c[l[p]+2(i-p]) /*
442	tmp = pelp->deg+k;
443	if (tmp > elp->deg) {
444	elp->deg = tmp;
445	gf_poly_copy(dst: pelp, src: elp_copy);
446	pd = d;
447	pp = `2`*i;
448	}
449	}
450	/ di+1 = S(2i+3)+elp[i+1].1S(2i+2)+...+elp[i+1].lS(2i+3-l) /*
451	if (i < t-`1`) {
452	d = syn[`2`*i+`2`];
453	for (j = `1`; j <= elp->deg; j++)
454	d ^= gf_mul(bch, a: elp->c[j], b: syn[`2`*i+`2`-j]);
455	}
456	}
457	dbg("elp=%s\n", gf_poly_str(elp));
458	return (elp->deg > t) ? -`1` : (int)elp->deg;
459	}
460
461	/*
462	* solve a m x m linear system in GF(2) with an expected number of solutions,
463	* and return the number of found solutions
464	*/
465	static int solve_linear_system(struct bch_control bch, unsigned* int *rows,
466	unsigned int sol, int* nsol)
467	{
468	const int m = GF_M(bch);
469	unsigned int tmp, mask;
470	int rem, c, r, p, k, param[BCH_MAX_M];
471
472	k = `0`;
473	mask = `1` << m;
474
475	/ Gaussian elimination /
476	for (c = `0`; c < m; c++) {
477	rem = `0`;
478	p = c-k;
479	/ find suitable row for elimination /
480	for (r = p; r < m; r++) {
481	if (rows[r] & mask) {
482	if (r != p) {
483	tmp = rows[r];
484	rows[r] = rows[p];
485	rows[p] = tmp;
486	}
487	rem = r+`1`;
488	break;
489	}
490	}
491	if (rem) {
492	/ perform elimination on remaining rows /
493	tmp = rows[p];
494	for (r = rem; r < m; r++) {
495	if (rows[r] & mask)
496	rows[r] ^= tmp;
497	}
498	} else {
499	/ elimination not needed, store defective row index /
500	param[k++] = c;
501	}
502	mask >>= `1`;
503	}
504	/ rewrite system, inserting fake parameter rows /
505	if (k > `0`) {
506	p = k;
507	for (r = m-`1`; r >= `0`; r--) {
508	if ((r > m-`1`-k) && rows[r])
509	/ system has no solution /
510	return `0`;
511
512	rows[r] = (p && (r == param[p-`1`])) ?
513	p--, `1u` << (m-r) : rows[r-p];
514	}
515	}
516
517	if (nsol != (`1` << k))
518	/ unexpected number of solutions /
519	return `0`;
520
521	for (p = `0`; p < nsol; p++) {
522	/ set parameters for p-th solution /
523	for (c = `0`; c < k; c++)
524	rows[param[c]] = (rows[param[c]] & ~`1`)\|((p >> c) & `1`);
525
526	/ compute unique solution /
527	tmp = `0`;
528	for (r = m-`1`; r >= `0`; r--) {
529	mask = rows[r] & (tmp\|`1`);
530	tmp \|= parity(x: mask) << (m-r);
531	}
532	sol[p] = tmp >> `1`;
533	}
534	return nsol;
535	}
536
537	/*
538	* this function builds and solves a linear system for finding roots of a degree
539	* 4 affine monic polynomial X^4+aX^2+bX+c over GF(2^m).
540	*/
541	static int find_affine4_roots(struct bch_control bch, unsigned* int a,
542	unsigned int b, unsigned int c,
543	unsigned int *roots)
544	{
545	int i, j, k;
546	const int m = GF_M(bch);
547	unsigned int mask = `0xff`, t, rows[`16`] = {`0`,};
548
549	j = a_log(bch, x: b);
550	k = a_log(bch, x: a);
551	rows[`0`] = c;
552
553	/ build linear system to solve X^4+aX^2+bX+c = 0 /
554	for (i = `0`; i < m; i++) {
555	rows[i+`1`] = bch->a_pow_tab[`4`*i]^
556	(a ? bch->a_pow_tab[mod_s(bch, v: k)] : `0`)^
557	(b ? bch->a_pow_tab[mod_s(bch, v: j)] : `0`);
558	j++;
559	k += `2`;
560	}
561	/*
562	* transpose 16x16 matrix before passing it to linear solver
563	* warning: this code assumes m < 16
564	*/
565	for (j = `8`; j != `0`; j >>= `1`, mask ^= (mask << j)) {
566	for (k = `0`; k < `16`; k = (k+j+`1`) & ~j) {
567	t = ((rows[k] >> j)^rows[k+j]) & mask;
568	rows[k] ^= (t << j);
569	rows[k+j] ^= t;
570	}
571	}
572	return solve_linear_system(bch, rows, sol: roots, nsol: `4`);
573	}
574
575	/*
576	* compute root r of a degree 1 polynomial over GF(2^m) (returned as log(1/r))
577	*/
578	static int find_poly_deg1_roots(struct bch_control bch, struct* gf_poly *poly,
579	unsigned int *roots)
580	{
581	int n = `0`;
582
583	if (poly->c[`0`])
584	/ poly[X] = bX+c with c!=0, root=c/b /
585	roots[n++] = mod_s(bch, GF_N(bch)-bch->a_log_tab[poly->c[`0`]]+
586	bch->a_log_tab[poly->c[`1`]]);
587	return n;
588	}
589
590	/*
591	* compute roots of a degree 2 polynomial over GF(2^m)
592	*/
593	static int find_poly_deg2_roots(struct bch_control bch, struct* gf_poly *poly,
594	unsigned int *roots)
595	{
596	int n = `0`, i, l0, l1, l2;
597	unsigned int u, v, r;
598
599	if (poly->c[`0`] && poly->c[`1`]) {
600
601	l0 = bch->a_log_tab[poly->c[`0`]];
602	l1 = bch->a_log_tab[poly->c[`1`]];
603	l2 = bch->a_log_tab[poly->c[`2`]];
604
605	/ using z=a/bX, transform aX^2+bX+c into z^2+z+u (u=ac/b^2) /
606	u = a_pow(bch, i: l0+l2+`2`*(GF_N(bch)-l1));
607	/*
608	* let u = sum(li.a^i) i=0..m-1; then compute r = sum(li.xi):
609	* r^2+r = sum(li.(xi^2+xi)) = sum(li.(a^i+Tr(a^i).a^k)) =
610	* u + sum(li.Tr(a^i).a^k) = u+a^k.Tr(sum(li.a^i)) = u+a^k.Tr(u)
611	* i.e. r and r+1 are roots iff Tr(u)=0
612	*/
613	r = `0`;
614	v = u;
615	while (v) {
616	i = deg(poly: v);
617	r ^= bch->xi_tab[i];
618	v ^= (`1` << i);
619	}
620	/ verify root /
621	if ((gf_sqr(bch, a: r)^r) == u) {
622	/ reverse z=a/bX transformation and compute log(1/r) /
623	roots[n++] = modulo(bch, v: `2`*GF_N(bch)-l1-
624	bch->a_log_tab[r]+l2);
625	roots[n++] = modulo(bch, v: `2`*GF_N(bch)-l1-
626	bch->a_log_tab[r^`1`]+l2);
627	}
628	}
629	return n;
630	}
631
632	/*
633	* compute roots of a degree 3 polynomial over GF(2^m)
634	*/
635	static int find_poly_deg3_roots(struct bch_control bch, struct* gf_poly *poly,
636	unsigned int *roots)
637	{
638	int i, n = `0`;
639	unsigned int a, b, c, a2, b2, c2, e3, tmp[`4`];
640
641	if (poly->c[`0`]) {
642	/ transform polynomial into monic X^3 + a2X^2 + b2X + c2 /
643	e3 = poly->c[`3`];
644	c2 = gf_div(bch, a: poly->c[`0`], b: e3);
645	b2 = gf_div(bch, a: poly->c[`1`], b: e3);
646	a2 = gf_div(bch, a: poly->c[`2`], b: e3);
647
648	/ (X+a2)(X^3+a2X^2+b2X+c2) = X^4+aX^2+bX+c (affine) /
649	c = gf_mul(bch, a: a2, b: c2); / c = a2c2 /
650	b = gf_mul(bch, a: a2, b: b2)^c2; / b = a2b2 + c2 /
651	a = gf_sqr(bch, a: a2)^b2; / a = a2^2 + b2 /
652
653	/ find the 4 roots of this affine polynomial /
654	if (find_affine4_roots(bch, a, b, c, roots: tmp) == `4`) {
655	/ remove a2 from final list of roots /
656	for (i = `0`; i < `4`; i++) {
657	if (tmp[i] != a2)
658	roots[n++] = a_ilog(bch, x: tmp[i]);
659	}
660	}
661	}
662	return n;
663	}
664
665	/*
666	* compute roots of a degree 4 polynomial over GF(2^m)
667	*/
668	static int find_poly_deg4_roots(struct bch_control bch, struct* gf_poly *poly,
669	unsigned int *roots)
670	{
671	int i, l, n = `0`;
672	unsigned int a, b, c, d, e = `0`, f, a2, b2, c2, e4;
673
674	if (poly->c[`0`] == `0`)
675	return `0`;
676
677	/ transform polynomial into monic X^4 + aX^3 + bX^2 + cX + d /
678	e4 = poly->c[`4`];
679	d = gf_div(bch, a: poly->c[`0`], b: e4);
680	c = gf_div(bch, a: poly->c[`1`], b: e4);
681	b = gf_div(bch, a: poly->c[`2`], b: e4);
682	a = gf_div(bch, a: poly->c[`3`], b: e4);
683
684	/ use Y=1/X transformation to get an affine polynomial /
685	if (a) {
686	/ first, eliminate cX by using z=X+e with ae^2+c=0 /
687	if (c) {
688	/ compute e such that e^2 = c/a /
689	f = gf_div(bch, a: c, b: a);
690	l = a_log(bch, x: f);
691	l += (l & `1`) ? GF_N(bch) : `0`;
692	e = a_pow(bch, i: l/`2`);
693	/*
694	* use transformation z=X+e:
695	* z^4+e^4 + a(z^3+ez^2+e^2z+e^3) + b(z^2+e^2) +cz+ce+d
696	* z^4 + az^3 + (ae+b)z^2 + (ae^2+c)z+e^4+be^2+ae^3+ce+d
697	* z^4 + az^3 + (ae+b)z^2 + e^4+be^2+d
698	* z^4 + az^3 + b'z^2 + d'
699	*/
700	d = a_pow(bch, i: `2`*l)^gf_mul(bch, a: b, b: f)^d;
701	b = gf_mul(bch, a, b: e)^b;
702	}
703	/ now, use Y=1/X to get Y^4 + b/dY^2 + a/dY + 1/d /
704	if (d == `0`)
705	/ assume all roots have multiplicity 1 /
706	return `0`;
707
708	c2 = gf_inv(bch, a: d);
709	b2 = gf_div(bch, a, b: d);
710	a2 = gf_div(bch, a: b, b: d);
711	} else {
712	/ polynomial is already affine /
713	c2 = d;
714	b2 = c;
715	a2 = b;
716	}
717	/ find the 4 roots of this affine polynomial /
718	if (find_affine4_roots(bch, a: a2, b: b2, c: c2, roots) == `4`) {
719	for (i = `0`; i < `4`; i++) {
720	/ post-process roots (reverse transformations) /
721	f = a ? gf_inv(bch, a: roots[i]) : roots[i];
722	roots[i] = a_ilog(bch, x: f^e);
723	}
724	n = `4`;
725	}
726	return n;
727	}
728
729	/*
730	* build monic, log-based representation of a polynomial
731	*/
732	static void gf_poly_logrep(struct bch_control *bch,
733	const struct gf_poly a, int* *rep)
734	{
735	int i, d = a->deg, l = GF_N(bch)-a_log(bch, x: a->c[a->deg]);
736
737	/ represent 0 values with -1; warning, rep[d] is not set to 1 /
738	for (i = `0`; i < d; i++)
739	rep[i] = a->c[i] ? mod_s(bch, v: a_log(bch, x: a->c[i])+l) : -`1`;
740	}
741
742	/*
743	* compute polynomial Euclidean division remainder in GF(2^m)[X]
744	*/
745	static void gf_poly_mod(struct bch_control bch, struct* gf_poly *a,
746	const struct gf_poly b, int* *rep)
747	{
748	int la, p, m;
749	unsigned int i, j, *c = a->c;
750	const unsigned int d = b->deg;
751
752	if (a->deg < d)
753	return;
754
755	/ reuse or compute log representation of denominator /
756	if (!rep) {
757	rep = bch->cache;
758	gf_poly_logrep(bch, a: b, rep);
759	}
760
761	for (j = a->deg; j >= d; j--) {
762	if (c[j]) {
763	la = a_log(bch, x: c[j]);
764	p = j-d;
765	for (i = `0`; i < d; i++, p++) {
766	m = rep[i];
767	if (m >= `0`)
768	c[p] ^= bch->a_pow_tab[mod_s(bch,
769	v: m+la)];
770	}
771	}
772	}
773	a->deg = d-`1`;
774	while (!c[a->deg] && a->deg)
775	a->deg--;
776	}
777
778	/*
779	* compute polynomial Euclidean division quotient in GF(2^m)[X]
780	*/
781	static void gf_poly_div(struct bch_control bch, struct* gf_poly *a,
782	const struct gf_poly b, struct* gf_poly *q)
783	{
784	if (a->deg >= b->deg) {
785	q->deg = a->deg-b->deg;
786	/ compute a mod b (modifies a) /
787	gf_poly_mod(bch, a, b, NULL);
788	/ quotient is stored in upper part of polynomial a /
789	memcpy(q->c, &a->c[b->deg], (`1`+q->deg)*sizeof(unsigned int));
790	} else {
791	q->deg = `0`;
792	q->c[`0`] = `0`;
793	}
794	}
795
796	/*
797	* compute polynomial GCD (Greatest Common Divisor) in GF(2^m)[X]
798	*/
799	static struct gf_poly gf_poly_gcd(struct* bch_control bch, struct* gf_poly *a,
800	struct gf_poly *b)
801	{
802	struct gf_poly *tmp;
803
804	dbg("gcd(%s,%s)=", gf_poly_str(a), gf_poly_str(b));
805
806	if (a->deg < b->deg) {
807	tmp = b;
808	b = a;
809	a = tmp;
810	}
811
812	while (b->deg > `0`) {
813	gf_poly_mod(bch, a, b, NULL);
814	tmp = b;
815	b = a;
816	a = tmp;
817	}
818
819	dbg("%s\n", gf_poly_str(a));
820
821	return a;
822	}
823
824	/*
825	* Given a polynomial f and an integer k, compute Tr(a^kX) mod f
826	* This is used in Berlekamp Trace algorithm for splitting polynomials
827	*/
828	static void compute_trace_bk_mod(struct bch_control bch, int* k,
829	const struct gf_poly f, struct* gf_poly *z,
830	struct gf_poly *out)
831	{
832	const int m = GF_M(bch);
833	int i, j;
834
835	/ z contains z^2j mod f /
836	z->deg = `1`;
837	z->c[`0`] = `0`;
838	z->c[`1`] = bch->a_pow_tab[k];
839
840	out->deg = `0`;
841	memset(out, `0`, GF_POLY_SZ(f->deg));
842
843	/ compute f log representation only once /
844	gf_poly_logrep(bch, a: f, rep: bch->cache);
845
846	for (i = `0`; i < m; i++) {
847	/ add a^(k2^i)(z^(2^i) mod f) and compute (z^(2^i) mod f)^2 /*
848	for (j = z->deg; j >= `0`; j--) {
849	out->c[j] ^= z->c[j];
850	z->c[`2`*j] = gf_sqr(bch, a: z->c[j]);
851	z->c[`2`*j+`1`] = `0`;
852	}
853	if (z->deg > out->deg)
854	out->deg = z->deg;
855
856	if (i < m-`1`) {
857	z->deg *= `2`;
858	/ z^(2(i+1)) mod f = (z^(2^i) mod f)^2 mod f /
859	gf_poly_mod(bch, a: z, b: f, rep: bch->cache);
860	}
861	}
862	while (!out->c[out->deg] && out->deg)
863	out->deg--;
864
865	dbg("Tr(a^%d.X) mod f = %s\n", k, gf_poly_str(out));
866	}
867
868	/*
869	* factor a polynomial using Berlekamp Trace algorithm (BTA)
870	*/
871	static void factor_polynomial(struct bch_control bch, int* k, struct gf_poly *f,
872	struct gf_poly g, struct gf_poly h)
873	{
874	struct gf_poly *f2 = bch->poly_2t[`0`];
875	struct gf_poly *q = bch->poly_2t[`1`];
876	struct gf_poly *tk = bch->poly_2t[`2`];
877	struct gf_poly *z = bch->poly_2t[`3`];
878	struct gf_poly *gcd;
879
880	dbg("factoring %s...\n", gf_poly_str(f));
881
882	*g = f;
883	*h = NULL;
884
885	/ tk = Tr(a^k.X) mod f /
886	compute_trace_bk_mod(bch, k, f, z, out: tk);
887
888	if (tk->deg > `0`) {
889	/ compute g = gcd(f, tk) (destructive operation) /
890	gf_poly_copy(dst: f2, src: f);
891	gcd = gf_poly_gcd(bch, a: f2, b: tk);
892	if (gcd->deg < f->deg) {
893	/ compute h=f/gcd(f,tk); this will modify f and q /
894	gf_poly_div(bch, a: f, b: gcd, q);
895	/ store g and h in-place (clobbering f) /
896	h = &((struct* gf_poly_deg1 *)f)[gcd->deg].poly;
897	gf_poly_copy(dst: *g, src: gcd);
898	gf_poly_copy(dst: *h, src: q);
899	}
900	}
901	}
902
903	/*
904	* find roots of a polynomial, using BTZ algorithm; see the beginning of this
905	* file for details
906	*/
907	static int find_poly_roots(struct bch_control bch, unsigned* int k,
908	struct gf_poly poly, unsigned* int *roots)
909	{
910	int cnt;
911	struct gf_poly f1, f2;
912
913	switch (poly->deg) {
914	/ handle low degree polynomials with ad hoc techniques /
915	case `1`:
916	cnt = find_poly_deg1_roots(bch, poly, roots);
917	break;
918	case `2`:
919	cnt = find_poly_deg2_roots(bch, poly, roots);
920	break;
921	case `3`:
922	cnt = find_poly_deg3_roots(bch, poly, roots);
923	break;
924	case `4`:
925	cnt = find_poly_deg4_roots(bch, poly, roots);
926	break;
927	default:
928	/ factor polynomial using Berlekamp Trace Algorithm (BTA) /
929	cnt = `0`;
930	if (poly->deg && (k <= GF_M(bch))) {
931	factor_polynomial(bch, k, f: poly, g: &f1, h: &f2);
932	if (f1)
933	cnt += find_poly_roots(bch, k: k+`1`, poly: f1, roots);
934	if (f2)
935	cnt += find_poly_roots(bch, k: k+`1`, poly: f2, roots: roots+cnt);
936	}
937	break;
938	}
939	return cnt;
940	}
941
942	#if defined(USE_CHIEN_SEARCH)
943	/*
944	* exhaustive root search (Chien) implementation - not used, included only for
945	* reference/comparison tests
946	*/
947	static int chien_search(struct bch_control bch, unsigned* int len,
948	struct gf_poly p, unsigned* int *roots)
949	{
950	int m;
951	unsigned int i, j, syn, syn0, count = `0`;
952	const unsigned int k = `8`*len+bch->ecc_bits;
953
954	/ use a log-based representation of polynomial /
955	gf_poly_logrep(bch, p, bch->cache);
956	bch->cache[p->deg] = `0`;
957	syn0 = gf_div(bch, p->c[`0`], p->c[p->deg]);
958
959	for (i = GF_N(bch)-k+`1`; i <= GF_N(bch); i++) {
960	/ compute elp(a^i) /
961	for (j = `1`, syn = syn0; j <= p->deg; j++) {
962	m = bch->cache[j];
963	if (m >= `0`)
964	syn ^= a_pow(bch, m+j*i);
965	}
966	if (syn == `0`) {
967	roots[count++] = GF_N(bch)-i;
968	if (count == p->deg)
969	break;
970	}
971	}
972	return (count == p->deg) ? count : `0`;
973	}
974	#define find_poly_roots(_p, _k, _elp, _loc) chien_search(_p, len, _elp, _loc)
975	#endif /* USE_CHIEN_SEARCH */
976
977	/**
978	* bch_decode - decode received codeword and find bit error locations
979	* @bch: BCH control structure
980	* @data: received data, ignored if @calc_ecc is provided
981	* @len: data length in bytes, must always be provided
982	* @recv_ecc: received ecc, if NULL then assume it was XORed in @calc_ecc
983	* @calc_ecc: calculated ecc, if NULL then calc_ecc is computed from @data
984	* @syn: hw computed syndrome data (if NULL, syndrome is calculated)
985	* @errloc: output array of error locations
986	*
987	* Returns:
988	* The number of errors found, or -EBADMSG if decoding failed, or -EINVAL if
989	* invalid parameters were provided
990	*
991	* Depending on the available hw BCH support and the need to compute @calc_ecc
992	* separately (using bch_encode()), this function should be called with one of
993	* the following parameter configurations -
994	*
995	* by providing @data and @recv_ecc only:
996	* bch_decode(@bch, @data, @len, @recv_ecc, NULL, NULL, @errloc)
997	*
998	* by providing @recv_ecc and @calc_ecc:
999	* bch_decode(@bch, NULL, @len, @recv_ecc, @calc_ecc, NULL, @errloc)
1000	*
1001	* by providing ecc = recv_ecc XOR calc_ecc:
1002	* bch_decode(@bch, NULL, @len, NULL, ecc, NULL, @errloc)
1003	*
1004	* by providing syndrome results @syn:
1005	* bch_decode(@bch, NULL, @len, NULL, NULL, @syn, @errloc)
1006	*
1007	* Once bch_decode() has successfully returned with a positive value, error
1008	* locations returned in array @errloc should be interpreted as follows -
1009	*
1010	* if (errloc[n] >= 8*len), then n-th error is located in ecc (no need for
1011	* data correction)
1012	*
1013	* if (errloc[n] < 8*len), then n-th error is located in data and can be
1014	* corrected with statement data[errloc[n]/8] ^= 1 << (errloc[n] % 8);
1015	*
1016	* Note that this function does not perform any data correction by itself, it
1017	* merely indicates error locations.
1018	*/
1019	int bch_decode(struct bch_control bch, const* uint8_t data, unsigned* int len,
1020	const uint8_t recv_ecc, const* uint8_t *calc_ecc,
1021	const unsigned int syn, unsigned* int *errloc)
1022	{
1023	const unsigned int ecc_words = BCH_ECC_WORDS(bch);
1024	unsigned int nbits;
1025	int i, err, nroots;
1026	uint32_t sum;
1027
1028	/ sanity check: make sure data length can be handled /
1029	if (`8`*len > (bch->n-bch->ecc_bits))
1030	return -EINVAL;
1031
1032	/ if caller does not provide syndromes, compute them /
1033	if (!syn) {
1034	if (!calc_ecc) {
1035	/ compute received data ecc into an internal buffer /
1036	if (!data \|\| !recv_ecc)
1037	return -EINVAL;
1038	bch_encode(bch, data, len, NULL);
1039	} else {
1040	/ load provided calculated ecc /
1041	load_ecc8(bch, dst: bch->ecc_buf, src: calc_ecc);
1042	}
1043	/ load received ecc or assume it was XORed in calc_ecc /
1044	if (recv_ecc) {
1045	load_ecc8(bch, dst: bch->ecc_buf2, src: recv_ecc);
1046	/ XOR received and calculated ecc /
1047	for (i = `0`, sum = `0`; i < (int)ecc_words; i++) {
1048	bch->ecc_buf[i] ^= bch->ecc_buf2[i];
1049	sum \|= bch->ecc_buf[i];
1050	}
1051	if (!sum)
1052	/ no error found /
1053	return `0`;
1054	}
1055	compute_syndromes(bch, ecc: bch->ecc_buf, syn: bch->syn);
1056	syn = bch->syn;
1057	}
1058
1059	err = compute_error_locator_polynomial(bch, syn);
1060	if (err > `0`) {
1061	nroots = find_poly_roots(bch, k: `1`, poly: bch->elp, roots: errloc);
1062	if (err != nroots)
1063	err = -`1`;
1064	}
1065	if (err > `0`) {
1066	/ post-process raw error locations for easier correction /
1067	nbits = (len*`8`)+bch->ecc_bits;
1068	for (i = `0`; i < err; i++) {
1069	if (errloc[i] >= nbits) {
1070	err = -`1`;
1071	break;
1072	}
1073	errloc[i] = nbits-`1`-errloc[i];
1074	if (!bch->swap_bits)
1075	errloc[i] = (errloc[i] & ~`7`) \|
1076	(`7`-(errloc[i] & `7`));
1077	}
1078	}
1079	return (err >= `0`) ? err : -EBADMSG;
1080	}
1081	EXPORT_SYMBOL_GPL(bch_decode);
1082
1083	/*
1084	* generate Galois field lookup tables
1085	*/
1086	static int build_gf_tables(struct bch_control bch, unsigned* int poly)
1087	{
1088	unsigned int i, x = `1`;
1089	const unsigned int k = `1` << deg(poly);
1090
1091	/ primitive polynomial must be of degree m /
1092	if (k != (`1u` << GF_M(bch)))
1093	return -`1`;
1094
1095	for (i = `0`; i < GF_N(bch); i++) {
1096	bch->a_pow_tab[i] = x;
1097	bch->a_log_tab[x] = i;
1098	if (i && (x == `1`))
1099	/ polynomial is not primitive (a^i=1 with 0<i<2^m-1) /
1100	return -`1`;
1101	x <<= `1`;
1102	if (x & k)
1103	x ^= poly;
1104	}
1105	bch->a_pow_tab[GF_N(bch)] = `1`;
1106	bch->a_log_tab[`0`] = `0`;
1107
1108	return `0`;
1109	}
1110
1111	/*
1112	* compute generator polynomial remainder tables for fast encoding
1113	*/
1114	static void build_mod8_tables(struct bch_control bch, const* uint32_t *g)
1115	{
1116	int i, j, b, d;
1117	uint32_t data, hi, lo, *tab;
1118	const int l = BCH_ECC_WORDS(bch);
1119	const int plen = DIV_ROUND_UP(bch->ecc_bits+`1`, `32`);
1120	const int ecclen = DIV_ROUND_UP(bch->ecc_bits, `32`);
1121
1122	memset(bch->mod8_tab, `0`, `4``256`l*sizeof(*bch->mod8_tab));
1123
1124	for (i = `0`; i < `256`; i++) {
1125	/ p(X)=i is a small polynomial of weight <= 8 /
1126	for (b = `0`; b < `4`; b++) {
1127	/ we want to compute (p(X).X^(8b+deg(g))) mod g(X) /*
1128	tab = bch->mod8_tab + (b`256`+i)l;
1129	data = i << (`8`*b);
1130	while (data) {
1131	d = deg(poly: data);
1132	/ subtract X^d.g(X) from p(X).X^(8b+deg(g)) /*
1133	data ^= g[`0`] >> (`31`-d);
1134	for (j = `0`; j < ecclen; j++) {
1135	hi = (d < `31`) ? g[j] << (d+`1`) : `0`;
1136	lo = (j+`1` < plen) ?
1137	g[j+`1`] >> (`31`-d) : `0`;
1138	tab[j] ^= hi\|lo;
1139	}
1140	}
1141	}
1142	}
1143	}
1144
1145	/*
1146	* build a base for factoring degree 2 polynomials
1147	*/
1148	static int build_deg2_base(struct bch_control *bch)
1149	{
1150	const int m = GF_M(bch);
1151	int i, j, r;
1152	unsigned int sum, x, y, remaining, ak = `0`, xi[BCH_MAX_M];
1153
1154	/ find k s.t. Tr(a^k) = 1 and 0 <= k < m /
1155	for (i = `0`; i < m; i++) {
1156	for (j = `0`, sum = `0`; j < m; j++)
1157	sum ^= a_pow(bch, i: i*(`1` << j));
1158
1159	if (sum) {
1160	ak = bch->a_pow_tab[i];
1161	break;
1162	}
1163	}
1164	/ find xi, i=0..m-1 such that xi^2+xi = a^i+Tr(a^i).a^k /
1165	remaining = m;
1166	memset(xi, `0`, sizeof(xi));
1167
1168	for (x = `0`; (x <= GF_N(bch)) && remaining; x++) {
1169	y = gf_sqr(bch, a: x)^x;
1170	for (i = `0`; i < `2`; i++) {
1171	r = a_log(bch, x: y);
1172	if (y && (r < m) && !xi[r]) {
1173	bch->xi_tab[r] = x;
1174	xi[r] = `1`;
1175	remaining--;
1176	dbg("x%d = %x\n", r, x);
1177	break;
1178	}
1179	y ^= ak;
1180	}
1181	}
1182	/ should not happen but check anyway /
1183	return remaining ? -`1` : `0`;
1184	}
1185
1186	static void bch_alloc(size_t size, int* *err)
1187	{
1188	void *ptr;
1189
1190	ptr = kmalloc(size, GFP_KERNEL);
1191	if (ptr == NULL)
1192	*err = `1`;
1193	return ptr;
1194	}
1195
1196	/*
1197	* compute generator polynomial for given (m,t) parameters.
1198	*/
1199	static uint32_t compute_generator_polynomial(struct* bch_control *bch)
1200	{
1201	const unsigned int m = GF_M(bch);
1202	const unsigned int t = GF_T(bch);
1203	int n, err = `0`;
1204	unsigned int i, j, nbits, r, word, *roots;
1205	struct gf_poly *g;
1206	uint32_t *genpoly;
1207
1208	g = bch_alloc(GF_POLY_SZ(m*t), err: &err);
1209	roots = bch_alloc(size: (bch->n+`1`)*sizeof(*roots), err: &err);
1210	genpoly = bch_alloc(DIV_ROUND_UP(mt+`1`, `32`)sizeof(*genpoly), err: &err);
1211
1212	if (err) {
1213	kfree(objp: genpoly);
1214	genpoly = NULL;
1215	goto finish;
1216	}
1217
1218	/ enumerate all roots of g(X) /
1219	memset(roots , `0`, (bch->n+`1`)*sizeof(*roots));
1220	for (i = `0`; i < t; i++) {
1221	for (j = `0`, r = `2`*i+`1`; j < m; j++) {
1222	roots[r] = `1`;
1223	r = mod_s(bch, v: `2`*r);
1224	}
1225	}
1226	/ build generator polynomial g(X) /
1227	g->deg = `0`;
1228	g->c[`0`] = `1`;
1229	for (i = `0`; i < GF_N(bch); i++) {
1230	if (roots[i]) {
1231	/ multiply g(X) by (X+root) /
1232	r = bch->a_pow_tab[i];
1233	g->c[g->deg+`1`] = `1`;
1234	for (j = g->deg; j > `0`; j--)
1235	g->c[j] = gf_mul(bch, a: g->c[j], b: r)^g->c[j-`1`];
1236
1237	g->c[`0`] = gf_mul(bch, a: g->c[`0`], b: r);
1238	g->deg++;
1239	}
1240	}
1241	/ store left-justified binary representation of g(X) /
1242	n = g->deg+`1`;
1243	i = `0`;
1244
1245	while (n > `0`) {
1246	nbits = (n > `32`) ? `32` : n;
1247	for (j = `0`, word = `0`; j < nbits; j++) {
1248	if (g->c[n-`1`-j])
1249	word \|= `1u` << (`31`-j);
1250	}
1251	genpoly[i++] = word;
1252	n -= nbits;
1253	}
1254	bch->ecc_bits = g->deg;
1255
1256	finish:
1257	kfree(objp: g);
1258	kfree(objp: roots);
1259
1260	return genpoly;
1261	}
1262
1263	/**
1264	* bch_init - initialize a BCH encoder/decoder
1265	* @m: Galois field order, should be in the range 5-15
1266	* @t: maximum error correction capability, in bits
1267	* @prim_poly: user-provided primitive polynomial (or 0 to use default)
1268	* @swap_bits: swap bits within data and syndrome bytes
1269	*
1270	* Returns:
1271	* a newly allocated BCH control structure if successful, NULL otherwise
1272	*
1273	* This initialization can take some time, as lookup tables are built for fast
1274	* encoding/decoding; make sure not to call this function from a time critical
1275	* path. Usually, bch_init() should be called on module/driver init and
1276	* bch_free() should be called to release memory on exit.
1277	*
1278	* You may provide your own primitive polynomial of degree @m in argument
1279	* @prim_poly, or let bch_init() use its default polynomial.
1280	*
1281	* Once bch_init() has successfully returned a pointer to a newly allocated
1282	* BCH control structure, ecc length in bytes is given by member @ecc_bytes of
1283	* the structure.
1284	*/
1285	struct bch_control bch_init(int* m, int t, unsigned int prim_poly,
1286	bool swap_bits)
1287	{
1288	int err = `0`;
1289	unsigned int i, words;
1290	uint32_t *genpoly;
1291	struct bch_control *bch = NULL;
1292
1293	const int min_m = `5`;
1294
1295	/ default primitive polynomials /
1296	static const unsigned int prim_poly_tab[] = {
1297	`0x25`, `0x43`, `0x83`, `0x11d`, `0x211`, `0x409`, `0x805`, `0x1053`, `0x201b`,
1298	`0x402b`, `0x8003`,
1299	};
1300
1301	#if defined(CONFIG_BCH_CONST_PARAMS)
1302	if ((m != (CONFIG_BCH_CONST_M)) \|\| (t != (CONFIG_BCH_CONST_T))) {
1303	printk(KERN_ERR "bch encoder/decoder was configured to support "
1304	"parameters m=%d, t=%d only!\n",
1305	CONFIG_BCH_CONST_M, CONFIG_BCH_CONST_T);
1306	goto fail;
1307	}
1308	#endif
1309	if ((m < min_m) \|\| (m > BCH_MAX_M))
1310	/*
1311	* values of m greater than 15 are not currently supported;
1312	* supporting m > 15 would require changing table base type
1313	* (uint16_t) and a small patch in matrix transposition
1314	*/
1315	goto fail;
1316
1317	if (t > BCH_MAX_T)
1318	/*
1319	* we can support larger than 64 bits if necessary, at the
1320	* cost of higher stack usage.
1321	*/
1322	goto fail;
1323
1324	/ sanity checks /
1325	if ((t < `1`) \|\| (m*t >= ((`1` << m)-`1`)))
1326	/ invalid t value /
1327	goto fail;
1328
1329	/ select a primitive polynomial for generating GF(2^m) /
1330	if (prim_poly == `0`)
1331	prim_poly = prim_poly_tab[m-min_m];
1332
1333	bch = kzalloc(size: sizeof(*bch), GFP_KERNEL);
1334	if (bch == NULL)
1335	goto fail;
1336
1337	bch->m = m;
1338	bch->t = t;
1339	bch->n = (`1` << m)-`1`;
1340	words = DIV_ROUND_UP(m*t, `32`);
1341	bch->ecc_bytes = DIV_ROUND_UP(m*t, `8`);
1342	bch->a_pow_tab = bch_alloc(size: (`1`+bch->n)*sizeof(*bch->a_pow_tab), err: &err);
1343	bch->a_log_tab = bch_alloc(size: (`1`+bch->n)*sizeof(*bch->a_log_tab), err: &err);
1344	bch->mod8_tab = bch_alloc(size: words`1024`sizeof(*bch->mod8_tab), err: &err);
1345	bch->ecc_buf = bch_alloc(size: words*sizeof(*bch->ecc_buf), err: &err);
1346	bch->ecc_buf2 = bch_alloc(size: words*sizeof(*bch->ecc_buf2), err: &err);
1347	bch->xi_tab = bch_alloc(size: m*sizeof(*bch->xi_tab), err: &err);
1348	bch->syn = bch_alloc(size: `2`tsizeof(*bch->syn), err: &err);
1349	bch->cache = bch_alloc(size: `2`tsizeof(*bch->cache), err: &err);
1350	bch->elp = bch_alloc(size: (t+`1`)*sizeof(struct gf_poly_deg1), err: &err);
1351	bch->swap_bits = swap_bits;
1352
1353	for (i = `0`; i < ARRAY_SIZE(bch->poly_2t); i++)
1354	bch->poly_2t[i] = bch_alloc(GF_POLY_SZ(`2`*t), err: &err);
1355
1356	if (err)
1357	goto fail;
1358
1359	err = build_gf_tables(bch, poly: prim_poly);
1360	if (err)
1361	goto fail;
1362
1363	/ use generator polynomial for computing encoding tables /
1364	genpoly = compute_generator_polynomial(bch);
1365	if (genpoly == NULL)
1366	goto fail;
1367
1368	build_mod8_tables(bch, g: genpoly);
1369	kfree(objp: genpoly);
1370
1371	err = build_deg2_base(bch);
1372	if (err)
1373	goto fail;
1374
1375	return bch;
1376
1377	fail:
1378	bch_free(bch);
1379	return NULL;
1380	}
1381	EXPORT_SYMBOL_GPL(bch_init);
1382
1383	/**
1384	* bch_free - free the BCH control structure
1385	* @bch: BCH control structure to release
1386	*/
1387	void bch_free(struct bch_control *bch)
1388	{
1389	unsigned int i;
1390
1391	if (bch) {
1392	kfree(objp: bch->a_pow_tab);
1393	kfree(objp: bch->a_log_tab);
1394	kfree(objp: bch->mod8_tab);
1395	kfree(objp: bch->ecc_buf);
1396	kfree(objp: bch->ecc_buf2);
1397	kfree(objp: bch->xi_tab);
1398	kfree(objp: bch->syn);
1399	kfree(objp: bch->cache);
1400	kfree(objp: bch->elp);
1401
1402	for (i = `0`; i < ARRAY_SIZE(bch->poly_2t); i++)
1403	kfree(objp: bch->poly_2t[i]);
1404
1405	kfree(objp: bch);
1406	}
1407	}
1408	EXPORT_SYMBOL_GPL(bch_free);
1409
1410	MODULE_LICENSE("GPL");
1411	MODULE_AUTHOR("Ivan Djelic <ivan.djelic@parrot.com>");
1412	MODULE_DESCRIPTION("Binary BCH encoder/decoder");
1413

source code of linux/lib/bch.c