DtNlUtils.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740
  1. /*
  2. * CDE - Common Desktop Environment
  3. *
  4. * Copyright (c) 1993-2012, The Open Group. All rights reserved.
  5. *
  6. * These libraries and programs are free software; you can
  7. * redistribute them and/or modify them under the terms of the GNU
  8. * Lesser General Public License as published by the Free Software
  9. * Foundation; either version 2 of the License, or (at your option)
  10. * any later version.
  11. *
  12. * These libraries and programs are distributed in the hope that
  13. * they will be useful, but WITHOUT ANY WARRANTY; without even the
  14. * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15. * PURPOSE. See the GNU Lesser General Public License for more
  16. * details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with these libraries and programs; if not, write
  20. * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
  21. * Floor, Boston, MA 02110-1301 USA
  22. */
  23. /* $TOG: DtNlUtils.c /main/10 1999/10/15 12:07:23 mgreess $ */
  24. /*
  25. * (c) Copyright 1993, 1994 Hewlett-Packard Company *
  26. * (c) Copyright 1993, 1994 International Business Machines Corp. *
  27. * (c) Copyright 1993, 1994 Sun Microsystems, Inc. *
  28. * (c) Copyright 1993, 1994 Novell, Inc. *
  29. */
  30. /*****************************************************************************/
  31. /* */
  32. /* This file contains the Dt versions of common string functions, which */
  33. /* have not yet been provided by the HP-UX platform. */
  34. /* These functions know how to handle multi-byte strings. */
  35. /* */
  36. /*****************************************************************************/
  37. #include <ctype.h>
  38. #include <string.h>
  39. #include <Dt/DtNlUtils.h>
  40. #include "DtSvcLock.h"
  41. /*
  42. * Globals
  43. */
  44. Boolean _DtNl_is_multibyte = False;
  45. #ifdef NLS16
  46. /*
  47. * Dt nls initialization function.
  48. * will see if multibyte characters are
  49. * supported for the locale. If multibyte characters are not supported,
  50. * then all of our string utilites simply call the standard libc function.
  51. */
  52. void
  53. Dt_nlInit( void )
  54. {
  55. char * bc;
  56. static Boolean first = True;
  57. _DtSvcProcessLock();
  58. if (!first) {
  59. _DtSvcProcessUnlock();
  60. return;
  61. }
  62. first = False;
  63. _DtSvcProcessUnlock();
  64. if (MB_CUR_MAX > 1)
  65. _DtNl_is_multibyte = True;
  66. else
  67. _DtNl_is_multibyte = False;
  68. }
  69. /*
  70. * Dt version of strtok(s1, s2).
  71. * Returns a pointer to the span of characters in s1 terminated by
  72. * one of the characters in s2. Only s1 can be multibyte.
  73. */
  74. char *
  75. Dt_strtok(
  76. char *s1,
  77. char *s2 )
  78. {
  79. static char *ptr;
  80. char * return_ptr;
  81. int len;
  82. int offset;
  83. /* Use standard libc function, if no multibyte */
  84. if (!_DtNl_is_multibyte)
  85. return(strtok(s1, s2));
  86. /*
  87. * If this is the first call, save the string pointer, and bypass
  88. * any leading separators.
  89. */
  90. if (s1)
  91. ptr = s1 + Dt_strspn(s1, s2);
  92. /* A Null string pointer has no tokens */
  93. if (ptr == NULL)
  94. return(NULL);
  95. /* Find out where the first terminator is */
  96. if ((len = Dt_strcspn(ptr, s2)) <= 0)
  97. {
  98. /* No tokens left */
  99. return(NULL);
  100. }
  101. /* Keep track of where the token started */
  102. return_ptr = ptr;
  103. /* Null out the terminator; we need to know how many bytes are
  104. * occupied by the terminator, so that we can skip over it to
  105. * the next character.
  106. */
  107. /*
  108. * We have to take care of the case when mblen() returns -1.
  109. */
  110. offset = mblen(ptr + len, MB_CUR_MAX);
  111. if( offset == -1 )
  112. offset = 1;
  113. *(ptr + len) = '\0';
  114. ptr += (len + offset);
  115. /*
  116. * In preparation for the next pass, skip any other occurrences of
  117. * the terminator characters which were joined with the terminator
  118. * we first encountered.
  119. */
  120. len = Dt_strspn(ptr, s2);
  121. ptr += len;
  122. return(return_ptr);
  123. }
  124. char *
  125. Dt_strtok_r(
  126. char *s1,
  127. char *s2,
  128. char **ptr )
  129. {
  130. char * return_ptr;
  131. int len;
  132. int offset;
  133. /* Use standard libc function, if no multibyte */
  134. if (!_DtNl_is_multibyte)
  135. return((char*) strtok_r(s1, s2, ptr));
  136. /*
  137. * If this is the first call, save the string pointer, and bypass
  138. * any leading separators.
  139. */
  140. if (s1)
  141. *ptr = s1 + Dt_strspn(s1, s2);
  142. /* A Null string pointer has no tokens */
  143. if (*ptr == NULL)
  144. return(NULL);
  145. /* Find out where the first terminator is */
  146. if ((len = Dt_strcspn(*ptr, s2)) <= 0)
  147. {
  148. /* No tokens left */
  149. return(NULL);
  150. }
  151. /* Keep track of where the token started */
  152. return_ptr = *ptr;
  153. /* Null out the terminator; we need to know how many bytes are
  154. * occupied by the terminator, so that we can skip over it to
  155. * the next character.
  156. */
  157. /*
  158. * We have to take care of the case when mblen() returns -1.
  159. */
  160. offset = mblen(*ptr + len, MB_CUR_MAX);
  161. if( offset == -1 )
  162. offset = 1;
  163. *(*ptr + len) = '\0';
  164. *ptr += (len + offset);
  165. /*
  166. * In preparation for the next pass, skip any other occurrences of
  167. * the terminator characters which were joined with the terminator
  168. * we first encountered.
  169. */
  170. len = Dt_strspn(*ptr, s2);
  171. *ptr += len;
  172. return(return_ptr);
  173. }
  174. /*
  175. * Dt version of strspn(s1, s2).
  176. * Returns the span of characters in s1 contained in s2.
  177. * Only s1 can be multibyte.
  178. */
  179. int
  180. Dt_strspn(
  181. char *s1,
  182. char *s2 )
  183. {
  184. wchar_t s1char, s2char;
  185. int s1len, s2len;
  186. int i;
  187. int count;
  188. char * ptr;
  189. Boolean match;
  190. /* Use the standard libc function, if multibyte is not present */
  191. if (!_DtNl_is_multibyte)
  192. return(strspn(s1, s2));
  193. /* A Null string has no spans */
  194. if (s1 == NULL)
  195. return(0);
  196. count = 0;
  197. while (*s1)
  198. {
  199. /* Extract the next character from s1; may be multibyte */
  200. if ((s1len = mbtowc(&s1char, s1, MB_CUR_MAX)) < 0)
  201. return(0);
  202. s1 += s1len;
  203. /*
  204. * Compare this character against all the chars in s2. Keep
  205. * working through s1, until a character is found in s1 which
  206. * is not contained in s2.
  207. */
  208. ptr = s2;
  209. match = False;
  210. while (*ptr)
  211. {
  212. /* Extract the next character from s2; cannot be multibyte */
  213. s2char = *ptr++;
  214. /* If a match is found, keep processing s1 */
  215. if (s1char == s2char)
  216. {
  217. match = True;
  218. count += s1len;
  219. break;
  220. }
  221. }
  222. /*
  223. * If we made it here because all of s2 was searched, and a match
  224. * was not found against s1, then we are done.
  225. */
  226. if (!match)
  227. return(count);
  228. }
  229. return(count);
  230. }
  231. /*
  232. * Dt version of strcspn(s1, s2).
  233. * Returns the span of characters in s1 not contained in s2.
  234. * Only s1 can be multibyte.
  235. */
  236. int
  237. Dt_strcspn(
  238. char *s1,
  239. char *s2 )
  240. {
  241. wchar_t s1char, s2char;
  242. int s1len, s2len;
  243. int i;
  244. int count;
  245. char * ptr;
  246. /* Use the standard libc function, if multibyte is not present */
  247. if (!_DtNl_is_multibyte)
  248. return(strcspn(s1, s2));
  249. /* An empty string has no spans */
  250. if (s1 == NULL)
  251. return(0);
  252. count = 0;
  253. while (*s1)
  254. {
  255. /* Extract the next character from s1; may be multibyte */
  256. if ((s1len = mbtowc(&s1char, s1, MB_CUR_MAX)) < 0)
  257. return(0);
  258. s1 += s1len;
  259. /*
  260. * Compare this character against all the chars in s2. Keep
  261. * working through s1, until a character is found in s1 which
  262. * is contained in s2.
  263. */
  264. ptr = s2;
  265. while (*ptr)
  266. {
  267. /* Extract the next character from s2; cannot be multibyte */
  268. s2char = *ptr++;
  269. /* If a match occurs, then we are done */
  270. if (s1char == s2char)
  271. return(count);
  272. }
  273. /*
  274. * If we've made it here, then we searched all of s2, and none of
  275. * its components matched s1; continue with the next character
  276. * in s1.
  277. */
  278. count += s1len;
  279. }
  280. return(count);
  281. }
  282. /*
  283. * Dt version of strchr(s, c).
  284. * Returns a pointer to the first occurrence of 'c' in 's'.
  285. */
  286. char *
  287. Dt_strchr(
  288. char *s,
  289. char c )
  290. {
  291. wchar_t schar;
  292. int i;
  293. int slen;
  294. wchar_t wc;
  295. char foo[2];
  296. if (s == NULL)
  297. return(NULL);
  298. /* Use standard libc function if multibyte is not enabled */
  299. if (!_DtNl_is_multibyte)
  300. return(strchr(s, c));
  301. foo[0] = c;
  302. foo[1] = '\0';
  303. mbtowc(&wc, foo, 2);
  304. do
  305. {
  306. /* Extract next char from 's'; may be multibyte */
  307. if ((slen = mbtowc(&schar, s, MB_CUR_MAX)) < 0)
  308. return(NULL);
  309. s += slen;
  310. /* If we match 'c', then return a pointer to this character */
  311. if (schar == wc)
  312. return (s - slen);
  313. } while (slen > 0);
  314. /* No match was found */
  315. return(NULL);
  316. }
  317. /*
  318. * Dt version of strrchr(s, c).
  319. * Returns a pointer to the last occurrence of 'c' in 's'.
  320. */
  321. char *
  322. Dt_strrchr(
  323. char *s,
  324. char c )
  325. {
  326. wchar_t schar;
  327. int i;
  328. int slen;
  329. char * last = NULL;
  330. wchar_t wc;
  331. char foo[2];
  332. if (s == NULL)
  333. return(NULL);
  334. /* Use standard libc function if multibyte is not enabled */
  335. if (!_DtNl_is_multibyte)
  336. return(strrchr(s, c));
  337. foo[0] = c;
  338. foo[1] = '\0';
  339. mbtowc(&wc, foo, 2);
  340. do
  341. {
  342. /* Extract next char from 's'; may be multibyte */
  343. if ((slen = mbtowc(&schar, s, MB_CUR_MAX)) < 0)
  344. return(NULL);
  345. s += slen;
  346. /* If we match 'c', keep track of it, and keep looking */
  347. if (schar == wc)
  348. last = s - slen;
  349. } while (slen > 0);
  350. return(last);
  351. }
  352. /*
  353. * Dt equivalent of s[strlen(s) - 1]
  354. * Returns the last character in the string 's'.
  355. */
  356. void
  357. Dt_lastChar(
  358. char *s,
  359. char **cptr,
  360. int *lenptr )
  361. {
  362. int len = 0;
  363. if ((s == NULL) || (*s == '\0'))
  364. {
  365. *lenptr = 0;
  366. *cptr = NULL;
  367. return;
  368. }
  369. /* Use the easy method, if possible */
  370. if (!_DtNl_is_multibyte)
  371. {
  372. *cptr = s + strlen(s) - 1;
  373. *lenptr = 1;
  374. return;
  375. }
  376. /* Move through the string, keeping a ptr to the last character found */
  377. while (*s)
  378. {
  379. /*
  380. * We have to take care of the case when mbtowc() returns -1
  381. */
  382. len = mbtowc(NULL, s, MB_CUR_MAX);
  383. if ( len == -1 )
  384. len = 1;
  385. s += len;
  386. }
  387. /* Backup to the character before the NULL */
  388. *lenptr = mblen(s-len, MB_CUR_MAX);
  389. *cptr = s - len;
  390. }
  391. /*
  392. * Dt equivalent of strlen()
  393. * Returns the number of characters (not bytes) in a string
  394. */
  395. int
  396. Dt_charCount(
  397. char *s )
  398. {
  399. int count = 0;
  400. int len;
  401. if (s == NULL)
  402. return(0);
  403. if (!_DtNl_is_multibyte)
  404. return(strlen(s));
  405. /* Move through the string, counting each character present */
  406. while (*s)
  407. {
  408. len = mblen(s, MB_CUR_MAX);
  409. /* if invalid character, still count it and continue */
  410. if (len == -1)
  411. len = 1;
  412. s += len;
  413. count++;
  414. }
  415. return(count);
  416. }
  417. /******************************************************************************
  418. *
  419. * _Dt_NextChar(s)
  420. * return a pointer to the next multi-byte character after the character
  421. * pointed to by "s". If "s" does not point to a valid multi-byte
  422. * character advance one byte.
  423. *
  424. ******************************************************************************/
  425. char *
  426. _Dt_NextChar(char *s)
  427. {
  428. int len=1;
  429. if (_DtNl_is_multibyte || (MB_CUR_MAX > 1))
  430. len = mblen ( s, MB_CUR_MAX);
  431. /*
  432. * If "s" did not point to a vaild multi-byte character,
  433. * move ahead one byte.
  434. */
  435. if ( len == -1 )
  436. len = 1;
  437. return s + len;
  438. }
  439. /******************************************************************************
  440. *
  441. * _Dt_PrevChar(start,s)
  442. * return a pointer to the multi-byte character preceding the
  443. * character pointed to by "s". If "s" does not point to a valid
  444. * multi-byte character retreat one byte. "start" should point to
  445. * a character preceding "s" in the multi-byte string.
  446. *
  447. ******************************************************************************/
  448. char *
  449. _Dt_PrevChar(const char *start, char *s)
  450. {
  451. char *p;
  452. int len;
  453. if ( !_DtNl_is_multibyte || (MB_CUR_MAX == 1) )
  454. return (s - 1);
  455. /*
  456. * Check if "*s" is a valid multi-byte character.
  457. * if not just return the previous byte.
  458. */
  459. if ( mblen(s,MB_CUR_MAX) < 0 )
  460. return (s - 1);
  461. /*
  462. * "start" must be less than "s" ; if not return
  463. * (s-1)
  464. */
  465. if ( start >= s )
  466. return (s - 1);
  467. /*
  468. * Check that "start" points to a valid multi-byte character.
  469. * otherwise return "s-1"
  470. */
  471. if ( mblen(start,MB_CUR_MAX) < 0 )
  472. return (s-1);
  473. /*
  474. * Starting from "start" traverse the string until we find
  475. * the character preceding "s".
  476. */
  477. /*
  478. * We have to take care of the case when mblen() returns -1.
  479. */
  480. for (p = (char *)start;
  481. p + (len = (mblen(p,MB_CUR_MAX) == -1 ? 1 : mblen(p,MB_CUR_MAX))) < s;
  482. p += len)
  483. /* NULL STATEMENT */;
  484. /*
  485. * We should always find a multi-byte character preceding "s" if
  486. * "*s" is a valid multi-byte char and not the first character of
  487. * the text.
  488. */
  489. /* myassert(p < s); */
  490. return p;
  491. }
  492. /*
  493. * Dt mult-byte equivalent of isspace()
  494. */
  495. int
  496. _Dt_isspace(char *s)
  497. {
  498. if ( !_DtNl_is_multibyte || MB_CUR_MAX == 1 )
  499. return isspace((u_char)*s);
  500. if ( mblen(s,MB_CUR_MAX) == 1 )
  501. return isspace((u_char)*s);
  502. else
  503. return 0;
  504. }
  505. /*
  506. * Dt mult-byte equivalent of isdigit()
  507. */
  508. int
  509. _Dt_isdigit(char *s)
  510. {
  511. if ( !_DtNl_is_multibyte || MB_CUR_MAX == 1 )
  512. return isdigit(*s);
  513. if ( mblen(s,MB_CUR_MAX) == 1 )
  514. return isdigit(*s);
  515. else
  516. return 0;
  517. }
  518. /*
  519. * Dt equivalent of &(s[n])
  520. * Returns a pointer to the indicated character
  521. */
  522. char *
  523. _DtGetNthChar(
  524. char *s,
  525. int n )
  526. {
  527. int count;
  528. int len;
  529. if ((s == NULL) || (n < 0) || (n > Dt_charCount(s)))
  530. return(NULL);
  531. count = 0;
  532. while ((count < n) && (*s))
  533. {
  534. if (_DtNl_is_multibyte)
  535. len = mblen(s, MB_CUR_MAX);
  536. else
  537. len = 1;
  538. /*
  539. * We have to take care of the case when mblen() returns -1.
  540. */
  541. if ( len == -1 )
  542. len = 1;
  543. s += len;
  544. count++;
  545. }
  546. return(s);
  547. }
  548. /*
  549. * multibyte version of strpbrk().
  550. * Only cs can be multibyte.
  551. */
  552. char *
  553. _dt_strpbrk(
  554. char *cs,
  555. char *ct)
  556. {
  557. int len;
  558. size_t i;
  559. if(MB_CUR_MAX == 1)
  560. return(strpbrk(cs, ct));
  561. while(*cs) {
  562. len = mblen(cs, MB_CUR_MAX);
  563. if(len < 1)
  564. len = 1;
  565. if(len == 1) {
  566. for(i = 0; i < strlen(ct); i++) {
  567. if(*cs == *(ct + i))
  568. return(cs);
  569. }
  570. }
  571. cs += len;
  572. }
  573. return(NULL);
  574. }
  575. /*
  576. * returns 1 if a character before s2 in s1 is single-byte,
  577. * returns 0 if it is multi-byte.
  578. */
  579. int
  580. _is_previous_single(
  581. char *s1,
  582. char *s2)
  583. {
  584. int n = 1;
  585. if(MB_CUR_MAX == 1)
  586. return(1);
  587. while(*s1) {
  588. if(s1 == s2) {
  589. if(n > 1)
  590. return(0);
  591. else
  592. return(1);
  593. }
  594. n = mblen(s1, MB_CUR_MAX) > 1 ? mblen(s1, MB_CUR_MAX) : 1;
  595. s1 += n;
  596. }
  597. return(1);
  598. }
  599. #else
  600. char *
  601. _DtGetNthChar(
  602. char *s,
  603. int n )
  604. {
  605. if ((s == NULL) || (n < 0) || (n > strlen(s)))
  606. return(NULL);
  607. return (s + n);
  608. }
  609. char *
  610. _dt_strpbrk(
  611. char *cs,
  612. char *ct)
  613. {
  614. return(strpbrk(cs, ct));
  615. }
  616. int
  617. _is_previous_single(
  618. char *s1,
  619. char *s2)
  620. {
  621. return(1);
  622. }
  623. #endif /* NLS16 */