kuten.h 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. /*
  10. following astonishing goo courtesy of kogure.
  11. */
  12. /*
  13. * MicroSoft Kanji Encoding (SJIS) Transformation
  14. */
  15. /*
  16. * void
  17. * J2S(unsigned char *_h, unsigned char *_l)
  18. * JIS X 208 to MS kanji transformation.
  19. *
  20. * Calling/Exit State:
  21. * _h and _l should be in their valid range.
  22. * No return value.
  23. */
  24. #define J2S(_h, _l) { \
  25. /* lower: 21-7e >> 40-9d,9e-fb >> 40-7e,(skip 7f),80-fc */ \
  26. if (((_l) += (((_h)-- % 2) ? 0x1f : 0x7d)) > 0x7e) (_l)++; \
  27. /* upper: 21-7e >> 81-af >> 81-9f,(skip a0-df),e0-ef */ \
  28. if (((_h) = ((_h) / 2 + 0x71)) > 0x9f) (_h) += 0x40; \
  29. }
  30. /*
  31. * void
  32. * S2J(unsigned char *_h, unsigned char *_l)
  33. * MS kanji to JIS X 208 transformation.
  34. *
  35. * Calling/Exit State:
  36. * _h and _l should be in valid range.
  37. * No return value.
  38. */
  39. #define S2J(_h, _l) { \
  40. /* lower: 40-7e,80-fc >> 21-5f,61-dd >> 21-7e,7f-dc */ \
  41. if (((_l) -= 0x1f) > 0x60) (_l)--; \
  42. /* upper: 81-9f,e0-ef >> 00-1e,5f-6e >> 00-2e >> 21-7d */ \
  43. if (((_h) -= 0x81) > 0x5e) (_h) -= 0x40; (_h) *= 2, (_h) += 0x21; \
  44. /* upper: ,21-7d >> ,22-7e ; lower: ,7f-dc >> ,21-7e */ \
  45. if ((_l) > 0x7e) (_h)++, (_l) -= 0x5e; \
  46. }
  47. /*
  48. * int
  49. * ISJKANA(const unsigned char *_b)
  50. * Tests given byte is in the range of JIS X 0201 katakana.
  51. *
  52. * Calling/Exit State:
  53. * Returns 1 if it is, or 0 otherwise.
  54. */
  55. #define ISJKANA(_b) (0xa0 <= (_b) && (_b) < 0xe0)
  56. /*
  57. * int
  58. * CANS2JH(const unsigned char *_h)
  59. * Tests given byte is in the range of valid first byte of MS
  60. * kanji code; either acts as a subroutine of CANS2J() macro
  61. * or can be used to parse MS kanji encoded strings.
  62. *
  63. * Calling/Exit State:
  64. * Returns 1 if it is, or 0 otherwise.
  65. */
  66. #define CANS2JH(_h) ((0x81 <= (_h) && (_h) < 0xf0) && !ISJKANA(_h))
  67. /*
  68. * int
  69. * CANS2JL(const unsigned char *_l)
  70. * Tests given byte is in the range of valid second byte of MS
  71. * kanji code; acts as a subroutine of CANS2J() macro.
  72. *
  73. * Calling/Exit State:
  74. * Returns 1 if it is, or 0 otherwise.
  75. */
  76. #define CANS2JL(_l) (0x40 <= (_l) && (_l) < 0xfd && (_l) != 0x7f)
  77. /*
  78. * int
  79. * CANS2J(const unsigned char *_h, const unsinged char *_l)
  80. * Tests given bytes form a MS kanji code point which can be
  81. * transformed to a valid JIS X 208 code point.
  82. *
  83. * Calling/Exit State:
  84. * Returns 1 if they are, or 0 otherwise.
  85. */
  86. #define CANS2J(_h, _l) (CANS2JH(_h) && CANS2JL(_l))
  87. /*
  88. * int
  89. * CANJ2SB(const unsigned char *_b)
  90. * Tests given bytes is in the range of valid 94 graphic
  91. * character set; acts as a subroutine of CANJ2S() macro.
  92. *
  93. * Calling/Exit State:
  94. * Returns 1 if it is, or 0 otherwise.
  95. */
  96. #define CANJ2SB(_b) (0x21 <= (_b) && (_b) < 0x7f)
  97. /*
  98. * int
  99. * CANJ2S(const unsigned char *_h, const unsigned char *_l)
  100. * Tests given bytes form valid JIS X 208 code points
  101. * (which can be transformed to MS kanji).
  102. *
  103. * Calling/Exit State:
  104. * Returns 1 if they are, or 0 otherwise.
  105. */
  106. #define CANJ2S(_h, _l) (CANJ2SB(_h) && CANJ2SB(_l))
  107. #define JIS208MAX 8407
  108. #define GB2312MAX 8795
  109. #define BIG5MAX 13973
  110. extern Rune tabjis208[JIS208MAX]; /* runes indexed by kuten */
  111. extern Rune tabgb2312[GB2312MAX];
  112. extern Rune tabbig5[BIG5MAX];