unicode.c 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. /*
  2. * linux/fs/hfsplus/unicode.c
  3. *
  4. * Copyright (C) 2001
  5. * Brad Boyer (flar@allandria.com)
  6. * (C) 2003 Ardis Technologies <roman@ardistech.com>
  7. *
  8. * Handler routines for unicode strings
  9. */
  10. #include <linux/types.h>
  11. #include <linux/nls.h>
  12. #include "hfsplus_fs.h"
  13. #include "hfsplus_raw.h"
  14. /* Fold the case of a unicode char, given the 16 bit value */
  15. /* Returns folded char, or 0 if ignorable */
  16. static inline u16 case_fold(u16 c)
  17. {
  18. u16 tmp;
  19. tmp = hfsplus_case_fold_table[c >> 8];
  20. if (tmp)
  21. tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
  22. else
  23. tmp = c;
  24. return tmp;
  25. }
  26. /* Compare unicode strings, return values like normal strcmp */
  27. int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
  28. const struct hfsplus_unistr *s2)
  29. {
  30. u16 len1, len2, c1, c2;
  31. const hfsplus_unichr *p1, *p2;
  32. len1 = be16_to_cpu(s1->length);
  33. len2 = be16_to_cpu(s2->length);
  34. p1 = s1->unicode;
  35. p2 = s2->unicode;
  36. while (1) {
  37. c1 = c2 = 0;
  38. while (len1 && !c1) {
  39. c1 = case_fold(be16_to_cpu(*p1));
  40. p1++;
  41. len1--;
  42. }
  43. while (len2 && !c2) {
  44. c2 = case_fold(be16_to_cpu(*p2));
  45. p2++;
  46. len2--;
  47. }
  48. if (c1 != c2)
  49. return (c1 < c2) ? -1 : 1;
  50. if (!c1 && !c2)
  51. return 0;
  52. }
  53. }
  54. /* Compare names as a sequence of 16-bit unsigned integers */
  55. int hfsplus_strcmp(const struct hfsplus_unistr *s1,
  56. const struct hfsplus_unistr *s2)
  57. {
  58. u16 len1, len2, c1, c2;
  59. const hfsplus_unichr *p1, *p2;
  60. int len;
  61. len1 = be16_to_cpu(s1->length);
  62. len2 = be16_to_cpu(s2->length);
  63. p1 = s1->unicode;
  64. p2 = s2->unicode;
  65. for (len = min(len1, len2); len > 0; len--) {
  66. c1 = be16_to_cpu(*p1);
  67. c2 = be16_to_cpu(*p2);
  68. if (c1 != c2)
  69. return c1 < c2 ? -1 : 1;
  70. p1++;
  71. p2++;
  72. }
  73. return len1 < len2 ? -1 :
  74. len1 > len2 ? 1 : 0;
  75. }
  76. #define Hangul_SBase 0xac00
  77. #define Hangul_LBase 0x1100
  78. #define Hangul_VBase 0x1161
  79. #define Hangul_TBase 0x11a7
  80. #define Hangul_SCount 11172
  81. #define Hangul_LCount 19
  82. #define Hangul_VCount 21
  83. #define Hangul_TCount 28
  84. #define Hangul_NCount (Hangul_VCount * Hangul_TCount)
  85. static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
  86. {
  87. int i, s, e;
  88. s = 1;
  89. e = p[1];
  90. if (!e || cc < p[s * 2] || cc > p[e * 2])
  91. return NULL;
  92. do {
  93. i = (s + e) / 2;
  94. if (cc > p[i * 2])
  95. s = i + 1;
  96. else if (cc < p[i * 2])
  97. e = i - 1;
  98. else
  99. return hfsplus_compose_table + p[i * 2 + 1];
  100. } while (s <= e);
  101. return NULL;
  102. }
  103. int hfsplus_uni2asc(struct super_block *sb,
  104. const struct hfsplus_unistr *ustr,
  105. char *astr, int *len_p)
  106. {
  107. const hfsplus_unichr *ip;
  108. struct nls_table *nls = HFSPLUS_SB(sb)->nls;
  109. u8 *op;
  110. u16 cc, c0, c1;
  111. u16 *ce1, *ce2;
  112. int i, len, ustrlen, res, compose;
  113. op = astr;
  114. ip = ustr->unicode;
  115. ustrlen = be16_to_cpu(ustr->length);
  116. len = *len_p;
  117. ce1 = NULL;
  118. compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
  119. while (ustrlen > 0) {
  120. c0 = be16_to_cpu(*ip++);
  121. ustrlen--;
  122. /* search for single decomposed char */
  123. if (likely(compose))
  124. ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
  125. if (ce1)
  126. cc = ce1[0];
  127. else
  128. cc = 0;
  129. if (cc) {
  130. /* start of a possibly decomposed Hangul char */
  131. if (cc != 0xffff)
  132. goto done;
  133. if (!ustrlen)
  134. goto same;
  135. c1 = be16_to_cpu(*ip) - Hangul_VBase;
  136. if (c1 < Hangul_VCount) {
  137. /* compose the Hangul char */
  138. cc = (c0 - Hangul_LBase) * Hangul_VCount;
  139. cc = (cc + c1) * Hangul_TCount;
  140. cc += Hangul_SBase;
  141. ip++;
  142. ustrlen--;
  143. if (!ustrlen)
  144. goto done;
  145. c1 = be16_to_cpu(*ip) - Hangul_TBase;
  146. if (c1 > 0 && c1 < Hangul_TCount) {
  147. cc += c1;
  148. ip++;
  149. ustrlen--;
  150. }
  151. goto done;
  152. }
  153. }
  154. while (1) {
  155. /* main loop for common case of not composed chars */
  156. if (!ustrlen)
  157. goto same;
  158. c1 = be16_to_cpu(*ip);
  159. if (likely(compose))
  160. ce1 = hfsplus_compose_lookup(
  161. hfsplus_compose_table, c1);
  162. if (ce1)
  163. break;
  164. switch (c0) {
  165. case 0:
  166. c0 = 0x2400;
  167. break;
  168. case '/':
  169. c0 = ':';
  170. break;
  171. }
  172. res = nls->uni2char(c0, op, len);
  173. if (res < 0) {
  174. if (res == -ENAMETOOLONG)
  175. goto out;
  176. *op = '?';
  177. res = 1;
  178. }
  179. op += res;
  180. len -= res;
  181. c0 = c1;
  182. ip++;
  183. ustrlen--;
  184. }
  185. ce2 = hfsplus_compose_lookup(ce1, c0);
  186. if (ce2) {
  187. i = 1;
  188. while (i < ustrlen) {
  189. ce1 = hfsplus_compose_lookup(ce2,
  190. be16_to_cpu(ip[i]));
  191. if (!ce1)
  192. break;
  193. i++;
  194. ce2 = ce1;
  195. }
  196. cc = ce2[0];
  197. if (cc) {
  198. ip += i;
  199. ustrlen -= i;
  200. goto done;
  201. }
  202. }
  203. same:
  204. switch (c0) {
  205. case 0:
  206. cc = 0x2400;
  207. break;
  208. case '/':
  209. cc = ':';
  210. break;
  211. default:
  212. cc = c0;
  213. }
  214. done:
  215. res = nls->uni2char(cc, op, len);
  216. if (res < 0) {
  217. if (res == -ENAMETOOLONG)
  218. goto out;
  219. *op = '?';
  220. res = 1;
  221. }
  222. op += res;
  223. len -= res;
  224. }
  225. res = 0;
  226. out:
  227. *len_p = (char *)op - astr;
  228. return res;
  229. }
  230. /*
  231. * Convert one or more ASCII characters into a single unicode character.
  232. * Returns the number of ASCII characters corresponding to the unicode char.
  233. */
  234. static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
  235. wchar_t *uc)
  236. {
  237. int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
  238. if (size <= 0) {
  239. *uc = '?';
  240. size = 1;
  241. }
  242. switch (*uc) {
  243. case 0x2400:
  244. *uc = 0;
  245. break;
  246. case ':':
  247. *uc = '/';
  248. break;
  249. }
  250. return size;
  251. }
  252. /* Decomposes a single unicode character. */
  253. static inline u16 *decompose_unichar(wchar_t uc, int *size)
  254. {
  255. int off;
  256. off = hfsplus_decompose_table[(uc >> 12) & 0xf];
  257. if (off == 0 || off == 0xffff)
  258. return NULL;
  259. off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
  260. if (!off)
  261. return NULL;
  262. off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
  263. if (!off)
  264. return NULL;
  265. off = hfsplus_decompose_table[off + (uc & 0xf)];
  266. *size = off & 3;
  267. if (*size == 0)
  268. return NULL;
  269. return hfsplus_decompose_table + (off / 4);
  270. }
  271. int hfsplus_asc2uni(struct super_block *sb,
  272. struct hfsplus_unistr *ustr, int max_unistr_len,
  273. const char *astr, int len)
  274. {
  275. int size, dsize, decompose;
  276. u16 *dstr, outlen = 0;
  277. wchar_t c;
  278. decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
  279. while (outlen < max_unistr_len && len > 0) {
  280. size = asc2unichar(sb, astr, len, &c);
  281. if (decompose)
  282. dstr = decompose_unichar(c, &dsize);
  283. else
  284. dstr = NULL;
  285. if (dstr) {
  286. if (outlen + dsize > max_unistr_len)
  287. break;
  288. do {
  289. ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
  290. } while (--dsize > 0);
  291. } else
  292. ustr->unicode[outlen++] = cpu_to_be16(c);
  293. astr += size;
  294. len -= size;
  295. }
  296. ustr->length = cpu_to_be16(outlen);
  297. if (len > 0)
  298. return -ENAMETOOLONG;
  299. return 0;
  300. }
  301. /*
  302. * Hash a string to an integer as appropriate for the HFS+ filesystem.
  303. * Composed unicode characters are decomposed and case-folding is performed
  304. * if the appropriate bits are (un)set on the superblock.
  305. */
  306. int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
  307. struct qstr *str)
  308. {
  309. struct super_block *sb = dentry->d_sb;
  310. const char *astr;
  311. const u16 *dstr;
  312. int casefold, decompose, size, len;
  313. unsigned long hash;
  314. wchar_t c;
  315. u16 c2;
  316. casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
  317. decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
  318. hash = init_name_hash();
  319. astr = str->name;
  320. len = str->len;
  321. while (len > 0) {
  322. int uninitialized_var(dsize);
  323. size = asc2unichar(sb, astr, len, &c);
  324. astr += size;
  325. len -= size;
  326. if (decompose)
  327. dstr = decompose_unichar(c, &dsize);
  328. else
  329. dstr = NULL;
  330. if (dstr) {
  331. do {
  332. c2 = *dstr++;
  333. if (casefold)
  334. c2 = case_fold(c2);
  335. if (!casefold || c2)
  336. hash = partial_name_hash(c2, hash);
  337. } while (--dsize > 0);
  338. } else {
  339. c2 = c;
  340. if (casefold)
  341. c2 = case_fold(c2);
  342. if (!casefold || c2)
  343. hash = partial_name_hash(c2, hash);
  344. }
  345. }
  346. str->hash = end_name_hash(hash);
  347. return 0;
  348. }
  349. /*
  350. * Compare strings with HFS+ filename ordering.
  351. * Composed unicode characters are decomposed and case-folding is performed
  352. * if the appropriate bits are (un)set on the superblock.
  353. */
  354. int hfsplus_compare_dentry(const struct dentry *parent,
  355. const struct inode *pinode,
  356. const struct dentry *dentry, const struct inode *inode,
  357. unsigned int len, const char *str, const struct qstr *name)
  358. {
  359. struct super_block *sb = parent->d_sb;
  360. int casefold, decompose, size;
  361. int dsize1, dsize2, len1, len2;
  362. const u16 *dstr1, *dstr2;
  363. const char *astr1, *astr2;
  364. u16 c1, c2;
  365. wchar_t c;
  366. casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
  367. decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
  368. astr1 = str;
  369. len1 = len;
  370. astr2 = name->name;
  371. len2 = name->len;
  372. dsize1 = dsize2 = 0;
  373. dstr1 = dstr2 = NULL;
  374. while (len1 > 0 && len2 > 0) {
  375. if (!dsize1) {
  376. size = asc2unichar(sb, astr1, len1, &c);
  377. astr1 += size;
  378. len1 -= size;
  379. if (decompose)
  380. dstr1 = decompose_unichar(c, &dsize1);
  381. if (!decompose || !dstr1) {
  382. c1 = c;
  383. dstr1 = &c1;
  384. dsize1 = 1;
  385. }
  386. }
  387. if (!dsize2) {
  388. size = asc2unichar(sb, astr2, len2, &c);
  389. astr2 += size;
  390. len2 -= size;
  391. if (decompose)
  392. dstr2 = decompose_unichar(c, &dsize2);
  393. if (!decompose || !dstr2) {
  394. c2 = c;
  395. dstr2 = &c2;
  396. dsize2 = 1;
  397. }
  398. }
  399. c1 = *dstr1;
  400. c2 = *dstr2;
  401. if (casefold) {
  402. c1 = case_fold(c1);
  403. if (!c1) {
  404. dstr1++;
  405. dsize1--;
  406. continue;
  407. }
  408. c2 = case_fold(c2);
  409. if (!c2) {
  410. dstr2++;
  411. dsize2--;
  412. continue;
  413. }
  414. }
  415. if (c1 < c2)
  416. return -1;
  417. else if (c1 > c2)
  418. return 1;
  419. dstr1++;
  420. dsize1--;
  421. dstr2++;
  422. dsize2--;
  423. }
  424. if (len1 < len2)
  425. return -1;
  426. if (len1 > len2)
  427. return 1;
  428. return 0;
  429. }