fixed.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. /*
  10. * libmad - MPEG audio decoder library
  11. * Copyright (C) 2000-2004 Underbit Technologies, Inc.
  12. *
  13. * This program is free software; you can redistribute it and/or modify
  14. * it under the terms of the GNU General Public License as published by
  15. * the Free Software Foundation; either version 2 of the License, or
  16. * (at your option) any later version.
  17. *
  18. * This program is distributed in the hope that it will be useful,
  19. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  21. * GNU General Public License for more details.
  22. *
  23. * You should have received a copy of the GNU General Public License
  24. * along with this program; if not, write to the Free Software
  25. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  26. *
  27. * $Id: fixed.h,v 1.38 2004/02/17 02:02:03 rob Exp $
  28. */
  29. # ifndef LIBMAD_FIXED_H
  30. # define LIBMAD_FIXED_H
  31. typedef int mad_fixed_t; // must be 32 bits
  32. typedef int mad_fixed64hi_t; // must be 32 bits
  33. typedef u32int mad_fixed64lo_t; // must be 32 bits
  34. typedef vlong mad_fixed64_t;
  35. # if defined(FPM_FLOAT)
  36. typedef double mad_sample_t;
  37. # else
  38. typedef mad_fixed_t mad_sample_t;
  39. # endif
  40. /*
  41. * Fixed-point format: 0xABBBBBBB
  42. * A == whole part (sign + 3 bits)
  43. * B == fractional part (28 bits)
  44. *
  45. * Values are signed two's complement, so the effective range is:
  46. * 0x80000000 to 0x7fffffff
  47. * -8.0 to +7.9999999962747097015380859375
  48. *
  49. * The smallest representable value is:
  50. * 0x00000001 == 0.0000000037252902984619140625 (i.e. about 3.725e-9)
  51. *
  52. * 28 bits of fractional accuracy represent about
  53. * 8.6 digits of decimal accuracy.
  54. *
  55. * Fixed-point numbers can be added or subtracted as normal
  56. * integers, but multiplication requires shifting the 64-bit result
  57. * from 56 fractional bits back to 28 (and rounding.)
  58. *
  59. * Changing the definition of MAD_F_FRACBITS is only partially
  60. * supported, and must be done with care.
  61. */
  62. # define MAD_F_FRACBITS 28
  63. # define MAD_F(x) ((mad_fixed_t) (x))
  64. # define MAD_F_MIN ((mad_fixed_t) -0x80000000L)
  65. # define MAD_F_MAX ((mad_fixed_t) +0x7fffffffL)
  66. # define MAD_F_ONE MAD_F(0x10000000)
  67. # define mad_f_tofixed(x) ((mad_fixed_t) \
  68. ((x) * (double) (1L << MAD_F_FRACBITS) + 0.5))
  69. # define mad_f_todouble(x) ((double) \
  70. ((x) / (double) (1L << MAD_F_FRACBITS)))
  71. # define mad_f_intpart(x) ((x) >> MAD_F_FRACBITS)
  72. # define mad_f_fracpart(x) ((x) & ((1L << MAD_F_FRACBITS) - 1))
  73. /* (x should be positive) */
  74. # define mad_f_fromint(x) ((x) << MAD_F_FRACBITS)
  75. # define mad_f_add(x, y) ((x) + (y))
  76. # define mad_f_sub(x, y) ((x) - (y))
  77. # if defined(FPM_FLOAT)
  78. # error "FPM_FLOAT not yet supported"
  79. # undef MAD_F
  80. # define MAD_F(x) mad_f_todouble(x)
  81. # define mad_f_mul(x, y) ((x) * (y))
  82. # define mad_f_scale64
  83. # undef ASO_ZEROCHECK
  84. # elif defined(FPM_64BIT)
  85. /*
  86. * This version should be the most accurate if 64-bit types are supported by
  87. * the compiler, although it may not be the most efficient.
  88. */
  89. # if defined(OPT_ACCURACY)
  90. # define mad_f_mul(x, y) \
  91. ((mad_fixed_t) \
  92. ((((mad_fixed64_t) (x) * (y)) + \
  93. (1L << (MAD_F_SCALEBITS - 1))) >> MAD_F_SCALEBITS))
  94. # else
  95. # define mad_f_mul(x, y) \
  96. ((mad_fixed_t) (((mad_fixed64_t) (x) * (y)) >> MAD_F_SCALEBITS))
  97. # endif
  98. # define MAD_F_SCALEBITS MAD_F_FRACBITS
  99. /* --- Intel --------------------------------------------------------------- */
  100. # elif defined(FPM_INTEL)
  101. # if defined(_MSC_VER)
  102. # pragma warning(push)
  103. # pragma warning(disable: 4035) /* no return value */
  104. static __forceinline
  105. mad_fixed_t mad_f_mul_inline(mad_fixed_t x, mad_fixed_t y)
  106. {
  107. enum {
  108. fracbits = MAD_F_FRACBITS
  109. };
  110. __asm {
  111. mov eax, x
  112. imul y
  113. shrd eax, edx, fracbits
  114. }
  115. /* implicit return of eax */
  116. }
  117. # pragma warning(pop)
  118. # define mad_f_mul mad_f_mul_inline
  119. # define mad_f_scale64
  120. # else
  121. /*
  122. * This Intel version is fast and accurate; the disposition of the least
  123. * significant bit depends on OPT_ACCURACY via mad_f_scale64().
  124. */
  125. # define MAD_F_MLX(hi, lo, x, y) \
  126. asm ("imull %3" \
  127. : "=a" (lo), "=d" (hi) \
  128. : "%a" (x), "rm" (y) \
  129. : "cc")
  130. # if defined(OPT_ACCURACY)
  131. /*
  132. * This gives best accuracy but is not very fast.
  133. */
  134. # define MAD_F_MLA(hi, lo, x, y) \
  135. ({ mad_fixed64hi_t __hi; \
  136. mad_fixed64lo_t __lo; \
  137. MAD_F_MLX(__hi, __lo, (x), (y)); \
  138. asm ("addl %2,%0\n\t" \
  139. "adcl %3,%1" \
  140. : "=rm" (lo), "=rm" (hi) \
  141. : "r" (__lo), "r" (__hi), "0" (lo), "1" (hi) \
  142. : "cc"); \
  143. })
  144. # endif /* OPT_ACCURACY */
  145. # if defined(OPT_ACCURACY)
  146. /*
  147. * Surprisingly, this is faster than SHRD followed by ADC.
  148. */
  149. # define mad_f_scale64(hi, lo) \
  150. ({ mad_fixed64hi_t __hi_; \
  151. mad_fixed64lo_t __lo_; \
  152. mad_fixed_t __result; \
  153. asm ("addl %4,%2\n\t" \
  154. "adcl %5,%3" \
  155. : "=rm" (__lo_), "=rm" (__hi_) \
  156. : "0" (lo), "1" (hi), \
  157. "ir" (1L << (MAD_F_SCALEBITS - 1)), "ir" (0) \
  158. : "cc"); \
  159. asm ("shrdl %3,%2,%1" \
  160. : "=rm" (__result) \
  161. : "0" (__lo_), "r" (__hi_), "I" (MAD_F_SCALEBITS) \
  162. : "cc"); \
  163. __result; \
  164. })
  165. # elif defined(OPT_INTEL)
  166. /*
  167. * Alternate Intel scaling that may or may not perform better.
  168. */
  169. # define mad_f_scale64(hi, lo) \
  170. ({ mad_fixed_t __result; \
  171. asm ("shrl %3,%1\n\t" \
  172. "shll %4,%2\n\t" \
  173. "orl %2,%1" \
  174. : "=rm" (__result) \
  175. : "0" (lo), "r" (hi), \
  176. "I" (MAD_F_SCALEBITS), "I" (32 - MAD_F_SCALEBITS) \
  177. : "cc"); \
  178. __result; \
  179. })
  180. # else
  181. # define mad_f_scale64(hi, lo) \
  182. ({ mad_fixed_t __result; \
  183. asm ("shrdl %3,%2,%1" \
  184. : "=rm" (__result) \
  185. : "0" (lo), "r" (hi), "I" (MAD_F_SCALEBITS) \
  186. : "cc"); \
  187. __result; \
  188. })
  189. # endif /* OPT_ACCURACY */
  190. # define MAD_F_SCALEBITS MAD_F_FRACBITS
  191. # endif
  192. /* --- ARM ----------------------------------------------------------------- */
  193. # elif defined(FPM_ARM)
  194. /*
  195. * This ARM V4 version is as accurate as FPM_64BIT but much faster. The
  196. * least significant bit is properly rounded at no CPU cycle cost!
  197. */
  198. # if 1
  199. /*
  200. * This is faster than the default implementation via MAD_F_MLX() and
  201. * mad_f_scale64().
  202. */
  203. # define mad_f_mul(x, y) \
  204. ({ mad_fixed64hi_t __hi; \
  205. mad_fixed64lo_t __lo; \
  206. mad_fixed_t __result; \
  207. asm ("smull %0, %1, %3, %4\n\t" \
  208. "movs %0, %0, lsr %5\n\t" \
  209. "adc %2, %0, %1, lsl %6" \
  210. : "=&r" (__lo), "=&r" (__hi), "=r" (__result) \
  211. : "%r" (x), "r" (y), \
  212. "M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS) \
  213. : "cc"); \
  214. __result; \
  215. })
  216. # endif
  217. # define MAD_F_MLX(hi, lo, x, y) \
  218. asm ("smull %0, %1, %2, %3" \
  219. : "=&r" (lo), "=&r" (hi) \
  220. : "%r" (x), "r" (y))
  221. # define MAD_F_MLA(hi, lo, x, y) \
  222. asm ("smlal %0, %1, %2, %3" \
  223. : "+r" (lo), "+r" (hi) \
  224. : "%r" (x), "r" (y))
  225. # define MAD_F_MLN(hi, lo) \
  226. asm ("rsbs %0, %2, #0\n\t" \
  227. "rsc %1, %3, #0" \
  228. : "=r" (lo), "=r" (hi) \
  229. : "0" (lo), "1" (hi) \
  230. : "cc")
  231. # define mad_f_scale64(hi, lo) \
  232. ({ mad_fixed_t __result; \
  233. asm ("movs %0, %1, lsr %3\n\t" \
  234. "adc %0, %0, %2, lsl %4" \
  235. : "=&r" (__result) \
  236. : "r" (lo), "r" (hi), \
  237. "M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS) \
  238. : "cc"); \
  239. __result; \
  240. })
  241. # define MAD_F_SCALEBITS MAD_F_FRACBITS
  242. /* --- MIPS ---------------------------------------------------------------- */
  243. # elif defined(FPM_MIPS)
  244. /*
  245. * This MIPS version is fast and accurate; the disposition of the least
  246. * significant bit depends on OPT_ACCURACY via mad_f_scale64().
  247. */
  248. # define MAD_F_MLX(hi, lo, x, y) \
  249. asm ("mult %2,%3" \
  250. : "=l" (lo), "=h" (hi) \
  251. : "%r" (x), "r" (y))
  252. # if defined(HAVE_MADD_ASM)
  253. # define MAD_F_MLA(hi, lo, x, y) \
  254. asm ("madd %2,%3" \
  255. : "+l" (lo), "+h" (hi) \
  256. : "%r" (x), "r" (y))
  257. # elif defined(HAVE_MADD16_ASM)
  258. /*
  259. * This loses significant accuracy due to the 16-bit integer limit in the
  260. * multiply/accumulate instruction.
  261. */
  262. # define MAD_F_ML0(hi, lo, x, y) \
  263. asm ("mult %2,%3" \
  264. : "=l" (lo), "=h" (hi) \
  265. : "%r" ((x) >> 12), "r" ((y) >> 16))
  266. # define MAD_F_MLA(hi, lo, x, y) \
  267. asm ("madd16 %2,%3" \
  268. : "+l" (lo), "+h" (hi) \
  269. : "%r" ((x) >> 12), "r" ((y) >> 16))
  270. # define MAD_F_MLZ(hi, lo) ((mad_fixed_t) (lo))
  271. # endif
  272. # if defined(OPT_SPEED)
  273. # define mad_f_scale64(hi, lo) \
  274. ((mad_fixed_t) ((hi) << (32 - MAD_F_SCALEBITS)))
  275. # define MAD_F_SCALEBITS MAD_F_FRACBITS
  276. # endif
  277. /* --- SPARC --------------------------------------------------------------- */
  278. # elif defined(FPM_SPARC)
  279. /*
  280. * This SPARC V8 version is fast and accurate; the disposition of the least
  281. * significant bit depends on OPT_ACCURACY via mad_f_scale64().
  282. */
  283. # define MAD_F_MLX(hi, lo, x, y) \
  284. asm ("smul %2, %3, %0\n\t" \
  285. "rd %%y, %1" \
  286. : "=r" (lo), "=r" (hi) \
  287. : "%r" (x), "rI" (y))
  288. /* --- PowerPC ------------------------------------------------------------- */
  289. # elif defined(FPM_PPC)
  290. /*
  291. * This PowerPC version is fast and accurate; the disposition of the least
  292. * significant bit depends on OPT_ACCURACY via mad_f_scale64().
  293. */
  294. # define MAD_F_MLX(hi, lo, x, y) \
  295. do { \
  296. asm ("mullw %0,%1,%2" \
  297. : "=r" (lo) \
  298. : "%r" (x), "r" (y)); \
  299. asm ("mulhw %0,%1,%2" \
  300. : "=r" (hi) \
  301. : "%r" (x), "r" (y)); \
  302. } \
  303. while (0)
  304. # if defined(OPT_ACCURACY)
  305. /*
  306. * This gives best accuracy but is not very fast.
  307. */
  308. # define MAD_F_MLA(hi, lo, x, y) \
  309. ({ mad_fixed64hi_t __hi; \
  310. mad_fixed64lo_t __lo; \
  311. MAD_F_MLX(__hi, __lo, (x), (y)); \
  312. asm ("addc %0,%2,%3\n\t" \
  313. "adde %1,%4,%5" \
  314. : "=r" (lo), "=r" (hi) \
  315. : "%r" (lo), "r" (__lo), \
  316. "%r" (hi), "r" (__hi) \
  317. : "xer"); \
  318. })
  319. # endif
  320. # if defined(OPT_ACCURACY)
  321. /*
  322. * This is slower than the truncating version below it.
  323. */
  324. # define mad_f_scale64(hi, lo) \
  325. ({ mad_fixed_t __result, __round; \
  326. asm ("rotrwi %0,%1,%2" \
  327. : "=r" (__result) \
  328. : "r" (lo), "i" (MAD_F_SCALEBITS)); \
  329. asm ("extrwi %0,%1,1,0" \
  330. : "=r" (__round) \
  331. : "r" (__result)); \
  332. asm ("insrwi %0,%1,%2,0" \
  333. : "+r" (__result) \
  334. : "r" (hi), "i" (MAD_F_SCALEBITS)); \
  335. asm ("add %0,%1,%2" \
  336. : "=r" (__result) \
  337. : "%r" (__result), "r" (__round)); \
  338. __result; \
  339. })
  340. # else
  341. # define mad_f_scale64(hi, lo) \
  342. ({ mad_fixed_t __result; \
  343. asm ("rotrwi %0,%1,%2" \
  344. : "=r" (__result) \
  345. : "r" (lo), "i" (MAD_F_SCALEBITS)); \
  346. asm ("insrwi %0,%1,%2,0" \
  347. : "+r" (__result) \
  348. : "r" (hi), "i" (MAD_F_SCALEBITS)); \
  349. __result; \
  350. })
  351. # endif
  352. # define MAD_F_SCALEBITS MAD_F_FRACBITS
  353. /* --- Default ------------------------------------------------------------- */
  354. # elif defined(FPM_DEFAULT)
  355. /*
  356. * This version is the most portable but it loses significant accuracy.
  357. * Furthermore, accuracy is biased against the second argument, so care
  358. * should be taken when ordering operands.
  359. *
  360. * The scale factors are constant as this is not used with SSO.
  361. *
  362. * Pre-rounding is required to stay within the limits of compliance.
  363. */
  364. # if defined(OPT_SPEED)
  365. # define mad_f_mul(x, y) (((x) >> 12) * ((y) >> 16))
  366. # else
  367. # define mad_f_mul(x, y) ((((x) + (1L << 11)) >> 12) * \
  368. (((y) + (1L << 15)) >> 16))
  369. # endif
  370. /* ------------------------------------------------------------------------- */
  371. # else
  372. # error "no FPM selected"
  373. # endif
  374. /* default implementations */
  375. # if !defined(mad_f_mul)
  376. # define mad_f_mul(x, y) \
  377. ({ register mad_fixed64hi_t __hi; \
  378. register mad_fixed64lo_t __lo; \
  379. MAD_F_MLX(__hi, __lo, (x), (y)); \
  380. mad_f_scale64(__hi, __lo); \
  381. })
  382. # endif
  383. # if !defined(MAD_F_MLA)
  384. # define MAD_F_ML0(hi, lo, x, y) ((lo) = mad_f_mul((x), (y)))
  385. # define MAD_F_MLA(hi, lo, x, y) ((lo) += mad_f_mul((x), (y)))
  386. # define MAD_F_MLN(hi, lo) ((lo) = -(lo))
  387. # define MAD_F_MLZ(hi, lo) ((void) (hi), (mad_fixed_t) (lo))
  388. # endif
  389. # if !defined(MAD_F_ML0)
  390. # define MAD_F_ML0(hi, lo, x, y) MAD_F_MLX((hi), (lo), (x), (y))
  391. # endif
  392. # if !defined(MAD_F_MLN)
  393. # define MAD_F_MLN(hi, lo) ((hi) = ((lo) = -(lo)) ? ~(hi) : -(hi))
  394. # endif
  395. # if !defined(MAD_F_MLZ)
  396. # define MAD_F_MLZ(hi, lo) mad_f_scale64((hi), (lo))
  397. # endif
  398. # if !defined(mad_f_scale64)
  399. # if defined(OPT_ACCURACY)
  400. # define mad_f_scale64(hi, lo) \
  401. ((((mad_fixed_t) \
  402. (((hi) << (32 - (MAD_F_SCALEBITS - 1))) | \
  403. ((lo) >> (MAD_F_SCALEBITS - 1)))) + 1) >> 1)
  404. # else
  405. # define mad_f_scale64(hi, lo) \
  406. ((mad_fixed_t) \
  407. (((hi) << (32 - MAD_F_SCALEBITS)) | \
  408. ((lo) >> MAD_F_SCALEBITS)))
  409. # endif
  410. # define MAD_F_SCALEBITS MAD_F_FRACBITS
  411. # endif
  412. /* C routines */
  413. mad_fixed_t mad_f_abs(mad_fixed_t);
  414. mad_fixed_t mad_f_div(mad_fixed_t, mad_fixed_t);
  415. # endif