unicode.c 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <bio.h>
  4. char usage[] = "unicode { [-t] hex hex ... | hexmin-hexmax ... | [-n] char ... }";
  5. char hex[] = "0123456789abcdefABCDEF";
  6. int numout = 0;
  7. int text = 0;
  8. char *err;
  9. Biobuf bout;
  10. char *range(char*[]);
  11. char *nums(char*[]);
  12. char *chars(char*[]);
  13. void
  14. main(int argc, char *argv[])
  15. {
  16. ARGBEGIN{
  17. case 'n':
  18. numout = 1;
  19. break;
  20. case 't':
  21. text = 1;
  22. break;
  23. }ARGEND
  24. Binit(&bout, 1, OWRITE);
  25. if(argc == 0){
  26. fprint(2, "usage: %s\n", usage);
  27. exits("usage");
  28. }
  29. if(!numout && utfrune(argv[0], '-'))
  30. exits(range(argv));
  31. if(numout || strchr(hex, argv[0][0])==0)
  32. exits(nums(argv));
  33. exits(chars(argv));
  34. }
  35. char*
  36. range(char *argv[])
  37. {
  38. char *q;
  39. int min, max;
  40. int i;
  41. while(*argv){
  42. q = *argv;
  43. if(strchr(hex, q[0]) == 0){
  44. err:
  45. fprint(2, "unicode: bad range %s\n", *argv);
  46. return "bad range";
  47. }
  48. min = strtoul(q, &q, 16);
  49. if(min<0 || min>Runemax || *q!='-')
  50. goto err;
  51. q++;
  52. if(strchr(hex, *q) == 0)
  53. goto err;
  54. max = strtoul(q, &q, 16);
  55. if(max<0 || max>Runemax || max<min || *q!=0)
  56. goto err;
  57. i = 0;
  58. do{
  59. Bprint(&bout, "%.6x %C", min, min);
  60. i++;
  61. if(min==max || (i&7)==0)
  62. Bprint(&bout, "\n");
  63. else
  64. Bprint(&bout, "\t");
  65. min++;
  66. }while(min<=max);
  67. argv++;
  68. }
  69. return 0;
  70. }
  71. char*
  72. nums(char *argv[])
  73. {
  74. char *q;
  75. Rune r;
  76. int w, rsz;
  77. char utferr[UTFmax];
  78. r = Runeerror;
  79. rsz = runetochar(utferr, &r);
  80. while(*argv){
  81. q = *argv;
  82. while(*q){
  83. w = chartorune(&r, q);
  84. if(r==Runeerror){
  85. if(strlen(q) != rsz || memcmp(q, utferr, rsz) != 0){
  86. fprint(2, "unicode: invalid utf string %s\n", *argv);
  87. return "bad utf";
  88. }
  89. }
  90. Bprint(&bout, "%.6x\n", r);
  91. q += w;
  92. }
  93. argv++;
  94. }
  95. return 0;
  96. }
  97. char*
  98. chars(char *argv[])
  99. {
  100. char *q;
  101. int m;
  102. while(*argv){
  103. q = *argv;
  104. if(strchr(hex, q[0]) == 0){
  105. err:
  106. fprint(2, "unicode: bad unicode value %s\n", *argv);
  107. return "bad char";
  108. }
  109. m = strtoul(q, &q, 16);
  110. if(m<0 || m>Runemax || *q!=0)
  111. goto err;
  112. Bprint(&bout, "%C", m);
  113. if(!text)
  114. Bprint(&bout, "\n");
  115. argv++;
  116. }
  117. return 0;
  118. }