1 | // Optimizations for random number functions, x86 version -*- C++ -*- |
---|---|

2 | |

3 | // Copyright (C) 2012-2017 Free Software Foundation, Inc. |

4 | // |

5 | // This file is part of the GNU ISO C++ Library. This library is free |

6 | // software; you can redistribute it and/or modify it under the |

7 | // terms of the GNU General Public License as published by the |

8 | // Free Software Foundation; either version 3, or (at your option) |

9 | // any later version. |

10 | |

11 | // This library is distributed in the hope that it will be useful, |

12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of |

13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |

14 | // GNU General Public License for more details. |

15 | |

16 | // Under Section 7 of GPL version 3, you are granted additional |

17 | // permissions described in the GCC Runtime Library Exception, version |

18 | // 3.1, as published by the Free Software Foundation. |

19 | |

20 | // You should have received a copy of the GNU General Public License and |

21 | // a copy of the GCC Runtime Library Exception along with this program; |

22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |

23 | // <http://www.gnu.org/licenses/>. |

24 | |

25 | /** @file bits/opt_random.h |

26 | * This is an internal header file, included by other library headers. |

27 | * Do not attempt to use it directly. @headername{random} |

28 | */ |

29 | |

30 | #ifndef _BITS_OPT_RANDOM_H |

31 | #define _BITS_OPT_RANDOM_H 1 |

32 | |

33 | #ifdef __SSE3__ |

34 | #include <pmmintrin.h> |

35 | #endif |

36 | |

37 | |

38 | #pragma GCC system_header |

39 | |

40 | |

41 | namespace std _GLIBCXX_VISIBILITY(default) |

42 | { |

43 | _GLIBCXX_BEGIN_NAMESPACE_VERSION |

44 | |

45 | #ifdef __SSE3__ |

46 | template<> |

47 | template<typename _UniformRandomNumberGenerator> |

48 | void |

49 | normal_distribution<double>:: |

50 | __generate(typename normal_distribution<double>::result_type* __f, |

51 | typename normal_distribution<double>::result_type* __t, |

52 | _UniformRandomNumberGenerator& __urng, |

53 | const param_type& __param) |

54 | { |

55 | typedef uint64_t __uctype; |

56 | |

57 | if (__f == __t) |

58 | return; |

59 | |

60 | if (_M_saved_available) |

61 | { |

62 | _M_saved_available = false; |

63 | *__f++ = _M_saved * __param.stddev() + __param.mean(); |

64 | |

65 | if (__f == __t) |

66 | return; |

67 | } |

68 | |

69 | constexpr uint64_t __maskval = 0xfffffffffffffull; |

70 | static const __m128i __mask = _mm_set1_epi64x(__maskval); |

71 | static const __m128i __two = _mm_set1_epi64x(0x4000000000000000ull); |

72 | static const __m128d __three = _mm_set1_pd(3.0); |

73 | const __m128d __av = _mm_set1_pd(__param.mean()); |

74 | |

75 | const __uctype __urngmin = __urng.min(); |

76 | const __uctype __urngmax = __urng.max(); |

77 | const __uctype __urngrange = __urngmax - __urngmin; |

78 | const __uctype __uerngrange = __urngrange + 1; |

79 | |

80 | while (__f + 1 < __t) |

81 | { |

82 | double __le; |

83 | __m128d __x; |

84 | do |

85 | { |

86 | union |

87 | { |

88 | __m128i __i; |

89 | __m128d __d; |

90 | } __v; |

91 | |

92 | if (__urngrange > __maskval) |

93 | { |

94 | if (__detail::_Power_of_2(__uerngrange)) |

95 | __v.__i = _mm_and_si128(_mm_set_epi64x(__urng(), |

96 | __urng()), |

97 | __mask); |

98 | else |

99 | { |

100 | const __uctype __uerange = __maskval + 1; |

101 | const __uctype __scaling = __urngrange / __uerange; |

102 | const __uctype __past = __uerange * __scaling; |

103 | uint64_t __v1; |

104 | do |

105 | __v1 = __uctype(__urng()) - __urngmin; |

106 | while (__v1 >= __past); |

107 | __v1 /= __scaling; |

108 | uint64_t __v2; |

109 | do |

110 | __v2 = __uctype(__urng()) - __urngmin; |

111 | while (__v2 >= __past); |

112 | __v2 /= __scaling; |

113 | |

114 | __v.__i = _mm_set_epi64x(__v1, __v2); |

115 | } |

116 | } |

117 | else if (__urngrange == __maskval) |

118 | __v.__i = _mm_set_epi64x(__urng(), __urng()); |

119 | else if ((__urngrange + 2) * __urngrange >= __maskval |

120 | && __detail::_Power_of_2(__uerngrange)) |

121 | { |

122 | uint64_t __v1 = __urng() * __uerngrange + __urng(); |

123 | uint64_t __v2 = __urng() * __uerngrange + __urng(); |

124 | |

125 | __v.__i = _mm_and_si128(_mm_set_epi64x(__v1, __v2), |

126 | __mask); |

127 | } |

128 | else |

129 | { |

130 | size_t __nrng = 2; |

131 | __uctype __high = __maskval / __uerngrange / __uerngrange; |

132 | while (__high > __uerngrange) |

133 | { |

134 | ++__nrng; |

135 | __high /= __uerngrange; |

136 | } |

137 | const __uctype __highrange = __high + 1; |

138 | const __uctype __scaling = __urngrange / __highrange; |

139 | const __uctype __past = __highrange * __scaling; |

140 | __uctype __tmp; |

141 | |

142 | uint64_t __v1; |

143 | do |

144 | { |

145 | do |

146 | __tmp = __uctype(__urng()) - __urngmin; |

147 | while (__tmp >= __past); |

148 | __v1 = __tmp / __scaling; |

149 | for (size_t __cnt = 0; __cnt < __nrng; ++__cnt) |

150 | { |

151 | __tmp = __v1; |

152 | __v1 *= __uerngrange; |

153 | __v1 += __uctype(__urng()) - __urngmin; |

154 | } |

155 | } |

156 | while (__v1 > __maskval || __v1 < __tmp); |

157 | |

158 | uint64_t __v2; |

159 | do |

160 | { |

161 | do |

162 | __tmp = __uctype(__urng()) - __urngmin; |

163 | while (__tmp >= __past); |

164 | __v2 = __tmp / __scaling; |

165 | for (size_t __cnt = 0; __cnt < __nrng; ++__cnt) |

166 | { |

167 | __tmp = __v2; |

168 | __v2 *= __uerngrange; |

169 | __v2 += __uctype(__urng()) - __urngmin; |

170 | } |

171 | } |

172 | while (__v2 > __maskval || __v2 < __tmp); |

173 | |

174 | __v.__i = _mm_set_epi64x(__v1, __v2); |

175 | } |

176 | |

177 | __v.__i = _mm_or_si128(__v.__i, __two); |

178 | __x = _mm_sub_pd(__v.__d, __three); |

179 | __m128d __m = _mm_mul_pd(__x, __x); |

180 | __le = _mm_cvtsd_f64(_mm_hadd_pd (__m, __m)); |

181 | } |

182 | while (__le == 0.0 || __le >= 1.0); |

183 | |

184 | double __mult = (std::sqrt(-2.0 * std::log(__le) / __le) |

185 | * __param.stddev()); |

186 | |

187 | __x = _mm_add_pd(_mm_mul_pd(__x, _mm_set1_pd(__mult)), __av); |

188 | |

189 | _mm_storeu_pd(__f, __x); |

190 | __f += 2; |

191 | } |

192 | |

193 | if (__f != __t) |

194 | { |

195 | result_type __x, __y, __r2; |

196 | |

197 | __detail::_Adaptor<_UniformRandomNumberGenerator, result_type> |

198 | __aurng(__urng); |

199 | |

200 | do |

201 | { |

202 | __x = result_type(2.0) * __aurng() - 1.0; |

203 | __y = result_type(2.0) * __aurng() - 1.0; |

204 | __r2 = __x * __x + __y * __y; |

205 | } |

206 | while (__r2 > 1.0 || __r2 == 0.0); |

207 | |

208 | const result_type __mult = std::sqrt(-2 * std::log(__r2) / __r2); |

209 | _M_saved = __x * __mult; |

210 | _M_saved_available = true; |

211 | *__f = __y * __mult * __param.stddev() + __param.mean(); |

212 | } |

213 | } |

214 | #endif |

215 | |

216 | |

217 | _GLIBCXX_END_NAMESPACE_VERSION |

218 | } // namespace |

219 | |

220 | |

221 | #endif // _BITS_OPT_RANDOM_H |

222 |