math_private.h 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. /*
  2. * ====================================================
  3. * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
  4. *
  5. * Developed at SunPro, a Sun Microsystems, Inc. business.
  6. * Permission to use, copy, modify, and distribute this
  7. * software is freely granted, provided that this notice
  8. * is preserved.
  9. * ====================================================
  10. */
  11. #ifndef _MATH_PRIVATE_H_
  12. #define _MATH_PRIVATE_H_
  13. #include <endian.h>
  14. #include <sys/types.h>
  15. /* The original fdlibm code used statements like:
  16. n0 = ((*(int*)&one)>>29)^1; * index of high word *
  17. ix0 = *(n0+(int*)&x); * high word of x *
  18. ix1 = *((1-n0)+(int*)&x); * low word of x *
  19. to dig two 32 bit words out of the 64 bit IEEE floating point
  20. value. That is non-ANSI, and, moreover, the gcc instruction
  21. scheduler gets it wrong. We instead use the following macros.
  22. Unlike the original code, we determine the endianness at compile
  23. time, not at run time; I don't see much benefit to selecting
  24. endianness at run time. */
  25. /* A union which permits us to convert between a double and two 32 bit
  26. ints. */
  27. /*
  28. * Math on arm is special (read: stupid):
  29. * For FPA, float words are always big-endian.
  30. * For VFP, float words follow the memory system mode.
  31. * For Maverick, float words are always little-endian.
  32. */
  33. #if !defined(__MAVERICK__) && ((__BYTE_ORDER == __BIG_ENDIAN) || \
  34. (!defined(__VFP_FP__) && (defined(__arm__) || defined(__thumb__))))
  35. typedef union
  36. {
  37. double value;
  38. struct
  39. {
  40. u_int32_t msw;
  41. u_int32_t lsw;
  42. } parts;
  43. } ieee_double_shape_type;
  44. #else
  45. typedef union
  46. {
  47. double value;
  48. struct
  49. {
  50. u_int32_t lsw;
  51. u_int32_t msw;
  52. } parts;
  53. } ieee_double_shape_type;
  54. #endif
  55. /* Get two 32 bit ints from a double. */
  56. #define EXTRACT_WORDS(ix0,ix1,d) \
  57. do { \
  58. ieee_double_shape_type ew_u; \
  59. ew_u.value = (d); \
  60. (ix0) = ew_u.parts.msw; \
  61. (ix1) = ew_u.parts.lsw; \
  62. } while (0)
  63. /* Get the more significant 32 bit int from a double. */
  64. #define GET_HIGH_WORD(i,d) \
  65. do { \
  66. ieee_double_shape_type gh_u; \
  67. gh_u.value = (d); \
  68. (i) = gh_u.parts.msw; \
  69. } while (0)
  70. /* Get the less significant 32 bit int from a double. */
  71. #define GET_LOW_WORD(i,d) \
  72. do { \
  73. ieee_double_shape_type gl_u; \
  74. gl_u.value = (d); \
  75. (i) = gl_u.parts.lsw; \
  76. } while (0)
  77. /* Set a double from two 32 bit ints. */
  78. #define INSERT_WORDS(d,ix0,ix1) \
  79. do { \
  80. ieee_double_shape_type iw_u; \
  81. iw_u.parts.msw = (ix0); \
  82. iw_u.parts.lsw = (ix1); \
  83. (d) = iw_u.value; \
  84. } while (0)
  85. /* Set the more significant 32 bits of a double from an int. */
  86. #define SET_HIGH_WORD(d,v) \
  87. do { \
  88. ieee_double_shape_type sh_u; \
  89. sh_u.value = (d); \
  90. sh_u.parts.msw = (v); \
  91. (d) = sh_u.value; \
  92. } while (0)
  93. /* Set the less significant 32 bits of a double from an int. */
  94. #define SET_LOW_WORD(d,v) \
  95. do { \
  96. ieee_double_shape_type sl_u; \
  97. sl_u.value = (d); \
  98. sl_u.parts.lsw = (v); \
  99. (d) = sl_u.value; \
  100. } while (0)
  101. /* A union which permits us to convert between a float and a 32 bit
  102. int. */
  103. typedef union
  104. {
  105. float value;
  106. u_int32_t word;
  107. } ieee_float_shape_type;
  108. /* Get a 32 bit int from a float. */
  109. #define GET_FLOAT_WORD(i,d) \
  110. do { \
  111. ieee_float_shape_type gf_u; \
  112. gf_u.value = (d); \
  113. (i) = gf_u.word; \
  114. } while (0)
  115. /* Set a float from a 32 bit int. */
  116. #define SET_FLOAT_WORD(d,i) \
  117. do { \
  118. ieee_float_shape_type sf_u; \
  119. sf_u.word = (i); \
  120. (d) = sf_u.value; \
  121. } while (0)
  122. /* ieee style elementary functions */
  123. extern double __ieee754_sqrt (double) attribute_hidden;
  124. extern double __ieee754_acos (double) attribute_hidden;
  125. extern double __ieee754_acosh (double) attribute_hidden;
  126. extern double __ieee754_log (double) attribute_hidden;
  127. extern double __ieee754_log2 (double) attribute_hidden;
  128. extern double __ieee754_atanh (double) attribute_hidden;
  129. extern double __ieee754_asin (double) attribute_hidden;
  130. extern double __ieee754_atan2 (double,double) attribute_hidden;
  131. extern double __ieee754_exp (double) attribute_hidden;
  132. extern double __ieee754_cosh (double) attribute_hidden;
  133. extern double __ieee754_fmod (double,double) attribute_hidden;
  134. extern double __ieee754_pow (double,double) attribute_hidden;
  135. extern double __ieee754_lgamma_r (double,int *) attribute_hidden;
  136. /*extern double __ieee754_gamma_r (double,int *) attribute_hidden;*/
  137. extern double __ieee754_lgamma (double) attribute_hidden;
  138. /*extern double __ieee754_gamma (double) attribute_hidden;*/
  139. extern double __ieee754_log10 (double) attribute_hidden;
  140. extern double __ieee754_sinh (double) attribute_hidden;
  141. extern double __ieee754_hypot (double,double) attribute_hidden;
  142. extern double __ieee754_j0 (double) attribute_hidden;
  143. extern double __ieee754_j1 (double) attribute_hidden;
  144. extern double __ieee754_y0 (double) attribute_hidden;
  145. extern double __ieee754_y1 (double) attribute_hidden;
  146. extern double __ieee754_jn (int,double) attribute_hidden;
  147. extern double __ieee754_yn (int,double) attribute_hidden;
  148. extern double __ieee754_remainder (double,double) attribute_hidden;
  149. extern int __ieee754_rem_pio2 (double,double*) attribute_hidden;
  150. extern double __ieee754_scalb (double,double) attribute_hidden;
  151. /* fdlibm kernel function */
  152. #ifndef _IEEE_LIBM
  153. extern double __kernel_standard (double,double,int) attribute_hidden;
  154. #endif
  155. extern double __kernel_sin (double,double,int) attribute_hidden;
  156. extern double __kernel_cos (double,double) attribute_hidden;
  157. extern double __kernel_tan (double,double,int) attribute_hidden;
  158. extern int __kernel_rem_pio2 (double*,double*,int,int,int,const int*) attribute_hidden;
  159. /*
  160. * math_opt_barrier(x): safely load x, even if it was manipulated
  161. * by non-floationg point operations. This macro returns the value of x.
  162. * This ensures compiler does not (ab)use its knowledge about x value
  163. * and don't optimize future operations. Example:
  164. * float x;
  165. * SET_FLOAT_WORD(x, 0x80000001); // sets a bit pattern
  166. * y = math_opt_barrier(x); // "compiler, do not cheat!"
  167. * y = y * y; // compiler can't optimize, must use real multiply insn
  168. *
  169. * math_force_eval(x): force expression x to be evaluated.
  170. * Useful if otherwise compiler may eliminate the expression
  171. * as unused. This macro returns no value.
  172. * Example: "void fn(float f) { f = f * f; }"
  173. * versus "void fn(float f) { f = f * f; math_force_eval(f); }"
  174. *
  175. * Currently, math_force_eval(x) stores x into
  176. * a floating point register or memory *of the appropriate size*.
  177. * There is no guarantee this will not change.
  178. */
  179. #if defined(__i386__)
  180. #define math_opt_barrier(x) ({ \
  181. __typeof(x) __x = (x); \
  182. /* "t": load x into top-of-stack fpreg */ \
  183. __asm ("" : "=t" (__x) : "0" (__x)); \
  184. __x; \
  185. })
  186. #define math_force_eval(x) do { \
  187. __typeof(x) __x = (x); \
  188. if (sizeof(__x) <= sizeof(double)) \
  189. /* "m": store x into a memory location */ \
  190. __asm __volatile ("" : : "m" (__x)); \
  191. else /* long double */ \
  192. /* "f": load x into (any) fpreg */ \
  193. __asm __volatile ("" : : "f" (__x)); \
  194. } while (0)
  195. #endif
  196. #if defined(__x86_64__)
  197. #define math_opt_barrier(x) ({ \
  198. __typeof(x) __x = (x); \
  199. if (sizeof(__x) <= sizeof(double)) \
  200. /* "x": load into XMM SSE register */ \
  201. __asm ("" : "=x" (__x) : "0" (__x)); \
  202. else /* long double */ \
  203. /* "t": load x into top-of-stack fpreg */ \
  204. __asm ("" : "=t" (__x) : "0" (__x)); \
  205. __x; \
  206. })
  207. #define math_force_eval(x) do { \
  208. __typeof(x) __x = (x); \
  209. if (sizeof(__x) <= sizeof(double)) \
  210. /* "x": load into XMM SSE register */ \
  211. __asm __volatile ("" : : "x" (__x)); \
  212. else /* long double */ \
  213. /* "f": load x into (any) fpreg */ \
  214. __asm __volatile ("" : : "f" (__x)); \
  215. } while (0)
  216. #endif
  217. /* Default implementations force store to a memory location */
  218. #ifndef math_opt_barrier
  219. #define math_opt_barrier(x) ({ __typeof(x) __x = (x); __asm ("" : "+m" (__x)); __x; })
  220. #endif
  221. #ifndef math_force_eval
  222. #define math_force_eval(x) do { __typeof(x) __x = (x); __asm __volatile ("" : : "m" (__x)); } while (0)
  223. #endif
  224. #endif /* _MATH_PRIVATE_H_ */