unicode.c 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include <u.h>
  10. #include <libc.h>
  11. #include <bio.h>
  12. char usage[] = "unicode { [-t] hex hex ... | hexmin-hexmax ... | [-n] char ... }";
  13. char hex[] = "0123456789abcdefABCDEF";
  14. int numout = 0;
  15. int text = 0;
  16. char *err;
  17. Biobuf bout;
  18. char *range(char*[]);
  19. char *nums(char*[]);
  20. char *chars(char*[]);
  21. void
  22. main(int argc, char *argv[])
  23. {
  24. ARGBEGIN{
  25. case 'n':
  26. numout = 1;
  27. break;
  28. case 't':
  29. text = 1;
  30. break;
  31. }ARGEND
  32. Binit(&bout, 1, OWRITE);
  33. if(argc == 0){
  34. fprint(2, "usage: %s\n", usage);
  35. exits("usage");
  36. }
  37. if(!numout && utfrune(argv[0], '-'))
  38. exits(range(argv));
  39. if(numout || strchr(hex, argv[0][0])==0)
  40. exits(nums(argv));
  41. exits(chars(argv));
  42. }
  43. char*
  44. range(char *argv[])
  45. {
  46. char *q;
  47. int min, max;
  48. int i;
  49. while(*argv){
  50. q = *argv;
  51. if(strchr(hex, q[0]) == 0){
  52. err:
  53. fprint(2, "unicode: bad range %s\n", *argv);
  54. return "bad range";
  55. }
  56. min = strtoul(q, &q, 16);
  57. if(min<0 || min>Runemax || *q!='-')
  58. goto err;
  59. q++;
  60. if(strchr(hex, *q) == 0)
  61. goto err;
  62. max = strtoul(q, &q, 16);
  63. if(max<0 || max>Runemax || max<min || *q!=0)
  64. goto err;
  65. i = 0;
  66. do{
  67. Bprint(&bout, "%.6x %C", min, min);
  68. i++;
  69. if(min==max || (i&7)==0)
  70. Bprint(&bout, "\n");
  71. else
  72. Bprint(&bout, "\t");
  73. min++;
  74. }while(min<=max);
  75. argv++;
  76. }
  77. return 0;
  78. }
  79. char*
  80. nums(char *argv[])
  81. {
  82. char *q;
  83. Rune r;
  84. int w, rsz;
  85. char utferr[UTFmax];
  86. r = Runeerror;
  87. rsz = runetochar(utferr, &r);
  88. while(*argv){
  89. q = *argv;
  90. while(*q){
  91. w = chartorune(&r, q);
  92. if(r==Runeerror){
  93. if(strlen(q) != rsz || memcmp(q, utferr, rsz) != 0){
  94. fprint(2, "unicode: invalid utf string %s\n", *argv);
  95. return "bad utf";
  96. }
  97. }
  98. Bprint(&bout, "%.6x\n", r);
  99. q += w;
  100. }
  101. argv++;
  102. }
  103. return 0;
  104. }
  105. char*
  106. chars(char *argv[])
  107. {
  108. char *q;
  109. int m;
  110. while(*argv){
  111. q = *argv;
  112. if(strchr(hex, q[0]) == 0){
  113. err:
  114. fprint(2, "unicode: bad unicode value %s\n", *argv);
  115. return "bad char";
  116. }
  117. m = strtoul(q, &q, 16);
  118. if(m<0 || m>Runemax || *q!=0)
  119. goto err;
  120. Bprint(&bout, "%C", m);
  121. if(!text)
  122. Bprint(&bout, "\n");
  123. argv++;
  124. }
  125. return 0;
  126. }