1 | /* mpn_mul_n -- Multiply two natural numbers of length n. |
---|---|

2 | |

3 | Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc. |

4 | |

5 | This file is part of the GNU MP Library. |

6 | |

7 | The GNU MP Library is free software; you can redistribute it and/or modify |

8 | it under the terms of the GNU Lesser General Public License as published by |

9 | the Free Software Foundation; either version 2.1 of the License, or (at your |

10 | option) any later version. |

11 | |

12 | The GNU MP Library is distributed in the hope that it will be useful, but |

13 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |

14 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |

15 | License for more details. |

16 | |

17 | You should have received a copy of the GNU Lesser General Public License |

18 | along with the GNU MP Library; see the file COPYING.LIB. If not, write to |

19 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |

20 | MA 02111-1307, USA. */ |

21 | |

22 | #include <config.h> |

23 | #include "gmp-impl.h" |

24 | |

25 | /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP), |

26 | both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are |

27 | always stored. Return the most significant limb. |

28 | |

29 | Argument constraints: |

30 | 1. PRODP != UP and PRODP != VP, i.e. the destination |

31 | must be distinct from the multiplier and the multiplicand. */ |

32 | |

33 | /* If KARATSUBA_THRESHOLD is not already defined, define it to a |

34 | value which is good on most machines. */ |

35 | #ifndef KARATSUBA_THRESHOLD |

36 | #define KARATSUBA_THRESHOLD 32 |

37 | #endif |

38 | |

39 | /* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */ |

40 | #if KARATSUBA_THRESHOLD < 2 |

41 | #undef KARATSUBA_THRESHOLD |

42 | #define KARATSUBA_THRESHOLD 2 |

43 | #endif |

44 | |

45 | /* Handle simple cases with traditional multiplication. |

46 | |

47 | This is the most critical code of multiplication. All multiplies rely |

48 | on this, both small and huge. Small ones arrive here immediately. Huge |

49 | ones arrive here as this is the base case for Karatsuba's recursive |

50 | algorithm below. */ |

51 | |

52 | void |

53 | #if __STDC__ |

54 | impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) |

55 | #else |

56 | impn_mul_n_basecase (prodp, up, vp, size) |

57 | mp_ptr prodp; |

58 | mp_srcptr up; |

59 | mp_srcptr vp; |

60 | mp_size_t size; |

61 | #endif |

62 | { |

63 | mp_size_t i; |

64 | mp_limb_t cy_limb; |

65 | mp_limb_t v_limb; |

66 | |

67 | /* Multiply by the first limb in V separately, as the result can be |

68 | stored (not added) to PROD. We also avoid a loop for zeroing. */ |

69 | v_limb = vp[0]; |

70 | if (v_limb <= 1) |

71 | { |

72 | if (v_limb == 1) |

73 | MPN_COPY (prodp, up, size); |

74 | else |

75 | MPN_ZERO (prodp, size); |

76 | cy_limb = 0; |

77 | } |

78 | else |

79 | cy_limb = mpn_mul_1 (prodp, up, size, v_limb); |

80 | |

81 | prodp[size] = cy_limb; |

82 | prodp++; |

83 | |

84 | /* For each iteration in the outer loop, multiply one limb from |

85 | U with one limb from V, and add it to PROD. */ |

86 | for (i = 1; i < size; i++) |

87 | { |

88 | v_limb = vp[i]; |

89 | if (v_limb <= 1) |

90 | { |

91 | cy_limb = 0; |

92 | if (v_limb == 1) |

93 | cy_limb = mpn_add_n (prodp, prodp, up, size); |

94 | } |

95 | else |

96 | cy_limb = mpn_addmul_1 (prodp, up, size, v_limb); |

97 | |

98 | prodp[size] = cy_limb; |

99 | prodp++; |

100 | } |

101 | } |

102 | |

103 | void |

104 | #if __STDC__ |

105 | impn_mul_n (mp_ptr prodp, |

106 | mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace) |

107 | #else |

108 | impn_mul_n (prodp, up, vp, size, tspace) |

109 | mp_ptr prodp; |

110 | mp_srcptr up; |

111 | mp_srcptr vp; |

112 | mp_size_t size; |

113 | mp_ptr tspace; |

114 | #endif |

115 | { |

116 | if ((size & 1) != 0) |

117 | { |

118 | /* The size is odd, the code code below doesn't handle that. |

119 | Multiply the least significant (size - 1) limbs with a recursive |

120 | call, and handle the most significant limb of S1 and S2 |

121 | separately. */ |

122 | /* A slightly faster way to do this would be to make the Karatsuba |

123 | code below behave as if the size were even, and let it check for |

124 | odd size in the end. I.e., in essence move this code to the end. |

125 | Doing so would save us a recursive call, and potentially make the |

126 | stack grow a lot less. */ |

127 | |

128 | mp_size_t esize = size - 1; /* even size */ |

129 | mp_limb_t cy_limb; |

130 | |

131 | MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace); |

132 | cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]); |

133 | prodp[esize + esize] = cy_limb; |

134 | cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]); |

135 | |

136 | prodp[esize + size] = cy_limb; |

137 | } |

138 | else |

139 | { |

140 | /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm. |

141 | |

142 | Split U in two pieces, U1 and U0, such that |

143 | U = U0 + U1*(B**n), |

144 | and V in V1 and V0, such that |

145 | V = V0 + V1*(B**n). |

146 | |

147 | UV is then computed recursively using the identity |

148 | |

149 | 2n n n n |

150 | UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V |

151 | 1 1 1 0 0 1 0 0 |

152 | |

153 | Where B = 2**BITS_PER_MP_LIMB. */ |

154 | |

155 | mp_size_t hsize = size >> 1; |

156 | mp_limb_t cy; |

157 | int negflg; |

158 | |

159 | /*** Product H. ________________ ________________ |

160 | |_____U1 x V1____||____U0 x V0_____| */ |

161 | /* Put result in upper part of PROD and pass low part of TSPACE |

162 | as new TSPACE. */ |

163 | MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace); |

164 | |

165 | /*** Product M. ________________ |

166 | |_(U1-U0)(V0-V1)_| */ |

167 | if (mpn_cmp (up + hsize, up, hsize) >= 0) |

168 | { |

169 | mpn_sub_n (prodp, up + hsize, up, hsize); |

170 | negflg = 0; |

171 | } |

172 | else |

173 | { |

174 | mpn_sub_n (prodp, up, up + hsize, hsize); |

175 | negflg = 1; |

176 | } |

177 | if (mpn_cmp (vp + hsize, vp, hsize) >= 0) |

178 | { |

179 | mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize); |

180 | negflg ^= 1; |

181 | } |

182 | else |

183 | { |

184 | mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize); |

185 | /* No change of NEGFLG. */ |

186 | } |

187 | /* Read temporary operands from low part of PROD. |

188 | Put result in low part of TSPACE using upper part of TSPACE |

189 | as new TSPACE. */ |

190 | MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size); |

191 | |

192 | /*** Add/copy product H. */ |

193 | MPN_COPY (prodp + hsize, prodp + size, hsize); |

194 | cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize); |

195 | |

196 | /*** Add product M (if NEGFLG M is a negative number). */ |

197 | if (negflg) |

198 | cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size); |

199 | else |

200 | cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size); |

201 | |

202 | /*** Product L. ________________ ________________ |

203 | |________________||____U0 x V0_____| */ |

204 | /* Read temporary operands from low part of PROD. |

205 | Put result in low part of TSPACE using upper part of TSPACE |

206 | as new TSPACE. */ |

207 | MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size); |

208 | |

209 | /*** Add/copy Product L (twice). */ |

210 | |

211 | cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size); |

212 | if (cy) |

213 | mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy); |

214 | |

215 | MPN_COPY (prodp, tspace, hsize); |

216 | cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize); |

217 | if (cy) |

218 | mpn_add_1 (prodp + size, prodp + size, size, 1); |

219 | } |

220 | } |

221 |