strtod.c 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517
  1. #include <u.h>
  2. #include <libc.h>
  3. #include "fmtdef.h"
  4. static ulong
  5. umuldiv(ulong a, ulong b, ulong c)
  6. {
  7. double d;
  8. d = ((double)a * (double)b) / (double)c;
  9. if(d >= 4294967295.)
  10. d = 4294967295.;
  11. return (ulong)d;
  12. }
  13. /*
  14. * This routine will convert to arbitrary precision
  15. * floating point entirely in multi-precision fixed.
  16. * The answer is the closest floating point number to
  17. * the given decimal number. Exactly half way are
  18. * rounded ala ieee rules.
  19. * Method is to scale input decimal between .500 and .999...
  20. * with external power of 2, then binary search for the
  21. * closest mantissa to this decimal number.
  22. * Nmant is is the required precision. (53 for ieee dp)
  23. * Nbits is the max number of bits/word. (must be <= 28)
  24. * Prec is calculated - the number of words of fixed mantissa.
  25. */
  26. enum
  27. {
  28. Nbits = 28, /* bits safely represented in a ulong */
  29. Nmant = 53, /* bits of precision required */
  30. Prec = (Nmant+Nbits+1)/Nbits, /* words of Nbits each to represent mantissa */
  31. Sigbit = 1<<(Prec*Nbits-Nmant), /* first significant bit of Prec-th word */
  32. Ndig = 1500,
  33. One = (ulong)(1<<Nbits),
  34. Half = (ulong)(One>>1),
  35. Maxe = 310,
  36. Fsign = 1<<0, /* found - */
  37. Fesign = 1<<1, /* found e- */
  38. Fdpoint = 1<<2, /* found . */
  39. S0 = 0, /* _ _S0 +S1 #S2 .S3 */
  40. S1, /* _+ #S2 .S3 */
  41. S2, /* _+# #S2 .S4 eS5 */
  42. S3, /* _+. #S4 */
  43. S4, /* _+#.# #S4 eS5 */
  44. S5, /* _+#.#e +S6 #S7 */
  45. S6, /* _+#.#e+ #S7 */
  46. S7, /* _+#.#e+# #S7 */
  47. };
  48. static int xcmp(char*, char*);
  49. static int fpcmp(char*, ulong*);
  50. static void frnorm(ulong*);
  51. static void divascii(char*, int*, int*, int*);
  52. static void mulascii(char*, int*, int*, int*);
  53. typedef struct Tab Tab;
  54. struct Tab
  55. {
  56. int bp;
  57. int siz;
  58. char* cmp;
  59. };
  60. #ifndef ERANGE
  61. #define ERANGE 12345
  62. #endif
  63. double
  64. fmtstrtod(const char *as, char **aas)
  65. {
  66. int na, ex, dp, bp, c, i, flag, state;
  67. ulong low[Prec], hig[Prec], mid[Prec];
  68. double d;
  69. char *s, a[Ndig];
  70. flag = 0; /* Fsign, Fesign, Fdpoint */
  71. na = 0; /* number of digits of a[] */
  72. dp = 0; /* na of decimal point */
  73. ex = 0; /* exonent */
  74. state = S0;
  75. for(s=(char*)as;; s++) {
  76. c = *s;
  77. if(c >= '0' && c <= '9') {
  78. switch(state) {
  79. case S0:
  80. case S1:
  81. case S2:
  82. state = S2;
  83. break;
  84. case S3:
  85. case S4:
  86. state = S4;
  87. break;
  88. case S5:
  89. case S6:
  90. case S7:
  91. state = S7;
  92. ex = ex*10 + (c-'0');
  93. continue;
  94. }
  95. if(na == 0 && c == '0') {
  96. dp--;
  97. continue;
  98. }
  99. if(na < Ndig-50)
  100. a[na++] = c;
  101. continue;
  102. }
  103. switch(c) {
  104. case '\t':
  105. case '\n':
  106. case '\v':
  107. case '\f':
  108. case '\r':
  109. case ' ':
  110. if(state == S0)
  111. continue;
  112. break;
  113. case '-':
  114. if(state == S0)
  115. flag |= Fsign;
  116. else
  117. flag |= Fesign;
  118. case '+':
  119. if(state == S0)
  120. state = S1;
  121. else
  122. if(state == S5)
  123. state = S6;
  124. else
  125. break; /* syntax */
  126. continue;
  127. case '.':
  128. flag |= Fdpoint;
  129. dp = na;
  130. if(state == S0 || state == S1) {
  131. state = S3;
  132. continue;
  133. }
  134. if(state == S2) {
  135. state = S4;
  136. continue;
  137. }
  138. break;
  139. case 'e':
  140. case 'E':
  141. if(state == S2 || state == S4) {
  142. state = S5;
  143. continue;
  144. }
  145. break;
  146. }
  147. break;
  148. }
  149. /*
  150. * clean up return char-pointer
  151. */
  152. switch(state) {
  153. case S0:
  154. if(xcmp(s, "nan") == 0) {
  155. if(aas != nil)
  156. *aas = s+3;
  157. goto retnan;
  158. }
  159. case S1:
  160. if(xcmp(s, "infinity") == 0) {
  161. if(aas != nil)
  162. *aas = s+8;
  163. goto retinf;
  164. }
  165. if(xcmp(s, "inf") == 0) {
  166. if(aas != nil)
  167. *aas = s+3;
  168. goto retinf;
  169. }
  170. case S3:
  171. if(aas != nil)
  172. *aas = (char*)as;
  173. goto ret0; /* no digits found */
  174. case S6:
  175. s--; /* back over +- */
  176. case S5:
  177. s--; /* back over e */
  178. break;
  179. }
  180. if(aas != nil)
  181. *aas = s;
  182. if(flag & Fdpoint)
  183. while(na > 0 && a[na-1] == '0')
  184. na--;
  185. if(na == 0)
  186. goto ret0; /* zero */
  187. a[na] = 0;
  188. if(!(flag & Fdpoint))
  189. dp = na;
  190. if(flag & Fesign)
  191. ex = -ex;
  192. dp += ex;
  193. if(dp < -Maxe){
  194. errno = ERANGE;
  195. goto ret0; /* underflow by exp */
  196. } else
  197. if(dp > +Maxe)
  198. goto retinf; /* overflow by exp */
  199. /*
  200. * normalize the decimal ascii number
  201. * to range .[5-9][0-9]* e0
  202. */
  203. bp = 0; /* binary exponent */
  204. while(dp > 0)
  205. divascii(a, &na, &dp, &bp);
  206. while(dp < 0 || a[0] < '5')
  207. mulascii(a, &na, &dp, &bp);
  208. /* close approx by naive conversion */
  209. mid[0] = 0;
  210. mid[1] = 1;
  211. for(i=0; c=a[i]; i++) {
  212. mid[0] = mid[0]*10 + (c-'0');
  213. mid[1] = mid[1]*10;
  214. if(i >= 8)
  215. break;
  216. }
  217. low[0] = umuldiv(mid[0], One, mid[1]);
  218. hig[0] = umuldiv(mid[0]+1, One, mid[1]);
  219. for(i=1; i<Prec; i++) {
  220. low[i] = 0;
  221. hig[i] = One-1;
  222. }
  223. /* binary search for closest mantissa */
  224. for(;;) {
  225. /* mid = (hig + low) / 2 */
  226. c = 0;
  227. for(i=0; i<Prec; i++) {
  228. mid[i] = hig[i] + low[i];
  229. if(c)
  230. mid[i] += One;
  231. c = mid[i] & 1;
  232. mid[i] >>= 1;
  233. }
  234. frnorm(mid);
  235. /* compare */
  236. c = fpcmp(a, mid);
  237. if(c > 0) {
  238. c = 1;
  239. for(i=0; i<Prec; i++)
  240. if(low[i] != mid[i]) {
  241. c = 0;
  242. low[i] = mid[i];
  243. }
  244. if(c)
  245. break; /* between mid and hig */
  246. continue;
  247. }
  248. if(c < 0) {
  249. for(i=0; i<Prec; i++)
  250. hig[i] = mid[i];
  251. continue;
  252. }
  253. /* only hard part is if even/odd roundings wants to go up */
  254. c = mid[Prec-1] & (Sigbit-1);
  255. if(c == Sigbit/2 && (mid[Prec-1]&Sigbit) == 0)
  256. mid[Prec-1] -= c;
  257. break; /* exactly mid */
  258. }
  259. /* normal rounding applies */
  260. c = mid[Prec-1] & (Sigbit-1);
  261. mid[Prec-1] -= c;
  262. if(c >= Sigbit/2) {
  263. mid[Prec-1] += Sigbit;
  264. frnorm(mid);
  265. }
  266. goto out;
  267. ret0:
  268. return 0;
  269. retnan:
  270. return __NaN();
  271. retinf:
  272. /*
  273. * Unix strtod requires these. Plan 9 would return Inf(0) or Inf(-1). */
  274. errno = ERANGE;
  275. if(flag & Fsign)
  276. return -HUGE_VAL;
  277. return HUGE_VAL;
  278. out:
  279. d = 0;
  280. for(i=0; i<Prec; i++)
  281. d = d*One + mid[i];
  282. if(flag & Fsign)
  283. d = -d;
  284. d = ldexp(d, bp - Prec*Nbits);
  285. if(d == 0){ /* underflow */
  286. errno = ERANGE;
  287. }
  288. return d;
  289. }
  290. static void
  291. frnorm(ulong *f)
  292. {
  293. int i, c;
  294. c = 0;
  295. for(i=Prec-1; i>0; i--) {
  296. f[i] += c;
  297. c = f[i] >> Nbits;
  298. f[i] &= One-1;
  299. }
  300. f[0] += c;
  301. }
  302. static int
  303. fpcmp(char *a, ulong* f)
  304. {
  305. ulong tf[Prec];
  306. int i, d, c;
  307. for(i=0; i<Prec; i++)
  308. tf[i] = f[i];
  309. for(;;) {
  310. /* tf *= 10 */
  311. for(i=0; i<Prec; i++)
  312. tf[i] = tf[i]*10;
  313. frnorm(tf);
  314. d = (tf[0] >> Nbits) + '0';
  315. tf[0] &= One-1;
  316. /* compare next digit */
  317. c = *a;
  318. if(c == 0) {
  319. if('0' < d)
  320. return -1;
  321. if(tf[0] != 0)
  322. goto cont;
  323. for(i=1; i<Prec; i++)
  324. if(tf[i] != 0)
  325. goto cont;
  326. return 0;
  327. }
  328. if(c > d)
  329. return +1;
  330. if(c < d)
  331. return -1;
  332. a++;
  333. cont:;
  334. }
  335. }
  336. static void
  337. divby(char *a, int *na, int b)
  338. {
  339. int n, c;
  340. char *p;
  341. p = a;
  342. n = 0;
  343. while(n>>b == 0) {
  344. c = *a++;
  345. if(c == 0) {
  346. while(n) {
  347. c = n*10;
  348. if(c>>b)
  349. break;
  350. n = c;
  351. }
  352. goto xx;
  353. }
  354. n = n*10 + c-'0';
  355. (*na)--;
  356. }
  357. for(;;) {
  358. c = n>>b;
  359. n -= c<<b;
  360. *p++ = c + '0';
  361. c = *a++;
  362. if(c == 0)
  363. break;
  364. n = n*10 + c-'0';
  365. }
  366. (*na)++;
  367. xx:
  368. while(n) {
  369. n = n*10;
  370. c = n>>b;
  371. n -= c<<b;
  372. *p++ = c + '0';
  373. (*na)++;
  374. }
  375. *p = 0;
  376. }
  377. static Tab tab1[] =
  378. {
  379. 1, 0, "",
  380. 3, 1, "7",
  381. 6, 2, "63",
  382. 9, 3, "511",
  383. 13, 4, "8191",
  384. 16, 5, "65535",
  385. 19, 6, "524287",
  386. 23, 7, "8388607",
  387. 26, 8, "67108863",
  388. 27, 9, "134217727",
  389. };
  390. static void
  391. divascii(char *a, int *na, int *dp, int *bp)
  392. {
  393. int b, d;
  394. Tab *t;
  395. d = *dp;
  396. if(d >= (int)(nelem(tab1)))
  397. d = (int)(nelem(tab1))-1;
  398. t = tab1 + d;
  399. b = t->bp;
  400. if(memcmp(a, t->cmp, t->siz) > 0)
  401. d--;
  402. *dp -= d;
  403. *bp += b;
  404. divby(a, na, b);
  405. }
  406. static void
  407. mulby(char *a, char *p, char *q, int b)
  408. {
  409. int n, c;
  410. n = 0;
  411. *p = 0;
  412. for(;;) {
  413. q--;
  414. if(q < a)
  415. break;
  416. c = *q - '0';
  417. c = (c<<b) + n;
  418. n = c/10;
  419. c -= n*10;
  420. p--;
  421. *p = c + '0';
  422. }
  423. while(n) {
  424. c = n;
  425. n = c/10;
  426. c -= n*10;
  427. p--;
  428. *p = c + '0';
  429. }
  430. }
  431. static Tab tab2[] =
  432. {
  433. 1, 1, "", /* dp = 0-0 */
  434. 3, 3, "125",
  435. 6, 5, "15625",
  436. 9, 7, "1953125",
  437. 13, 10, "1220703125",
  438. 16, 12, "152587890625",
  439. 19, 14, "19073486328125",
  440. 23, 17, "11920928955078125",
  441. 26, 19, "1490116119384765625",
  442. 27, 19, "7450580596923828125", /* dp 8-9 */
  443. };
  444. static void
  445. mulascii(char *a, int *na, int *dp, int *bp)
  446. {
  447. char *p;
  448. int d, b;
  449. Tab *t;
  450. d = -*dp;
  451. if(d >= (int)(nelem(tab2)))
  452. d = (int)(nelem(tab2))-1;
  453. t = tab2 + d;
  454. b = t->bp;
  455. if(memcmp(a, t->cmp, t->siz) < 0)
  456. d--;
  457. p = a + *na;
  458. *bp -= b;
  459. *dp += d;
  460. *na += d;
  461. mulby(a, p+d, p, b);
  462. }
  463. static int
  464. xcmp(char *a, char *b)
  465. {
  466. int c1, c2;
  467. while((c1 = *b++)) {
  468. c2 = *a++;
  469. if(isupper(c2))
  470. c2 = tolower(c2);
  471. if(c1 != c2)
  472. return 1;
  473. }
  474. return 0;
  475. }