|
@@ -15,6 +15,7 @@
|
|
#include <linux/errno.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/kmod.h>
|
|
#include <linux/kmod.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/spinlock.h>
|
|
|
|
+#include <asm/byteorder.h>
|
|
|
|
|
|
static struct nls_table default_table;
|
|
static struct nls_table default_table;
|
|
static struct nls_table *tables = &default_table;
|
|
static struct nls_table *tables = &default_table;
|
|
@@ -43,10 +44,17 @@ static const struct utf8_table utf8_table[] =
|
|
{0, /* end of table */}
|
|
{0, /* end of table */}
|
|
};
|
|
};
|
|
|
|
|
|
-int
|
|
|
|
-utf8_mbtowc(wchar_t *p, const __u8 *s, int n)
|
|
|
|
|
|
+#define UNICODE_MAX 0x0010ffff
|
|
|
|
+#define PLANE_SIZE 0x00010000
|
|
|
|
+
|
|
|
|
+#define SURROGATE_MASK 0xfffff800
|
|
|
|
+#define SURROGATE_PAIR 0x0000d800
|
|
|
|
+#define SURROGATE_LOW 0x00000400
|
|
|
|
+#define SURROGATE_BITS 0x000003ff
|
|
|
|
+
|
|
|
|
+int utf8_to_utf32(const u8 *s, int len, unicode_t *pu)
|
|
{
|
|
{
|
|
- long l;
|
|
|
|
|
|
+ unsigned long l;
|
|
int c0, c, nc;
|
|
int c0, c, nc;
|
|
const struct utf8_table *t;
|
|
const struct utf8_table *t;
|
|
|
|
|
|
@@ -57,12 +65,13 @@ utf8_mbtowc(wchar_t *p, const __u8 *s, int n)
|
|
nc++;
|
|
nc++;
|
|
if ((c0 & t->cmask) == t->cval) {
|
|
if ((c0 & t->cmask) == t->cval) {
|
|
l &= t->lmask;
|
|
l &= t->lmask;
|
|
- if (l < t->lval)
|
|
|
|
|
|
+ if (l < t->lval || l > UNICODE_MAX ||
|
|
|
|
+ (l & SURROGATE_MASK) == SURROGATE_PAIR)
|
|
return -1;
|
|
return -1;
|
|
- *p = l;
|
|
|
|
|
|
+ *pu = (unicode_t) l;
|
|
return nc;
|
|
return nc;
|
|
}
|
|
}
|
|
- if (n <= nc)
|
|
|
|
|
|
+ if (len <= nc)
|
|
return -1;
|
|
return -1;
|
|
s++;
|
|
s++;
|
|
c = (*s ^ 0x80) & 0xFF;
|
|
c = (*s ^ 0x80) & 0xFF;
|
|
@@ -72,76 +81,119 @@ utf8_mbtowc(wchar_t *p, const __u8 *s, int n)
|
|
}
|
|
}
|
|
return -1;
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
+EXPORT_SYMBOL(utf8_to_utf32);
|
|
|
|
|
|
-int
|
|
|
|
-utf8_mbstowcs(wchar_t *pwcs, const __u8 *s, int n)
|
|
|
|
|
|
+int utf32_to_utf8(unicode_t u, u8 *s, int maxlen)
|
|
{
|
|
{
|
|
- __u16 *op;
|
|
|
|
- const __u8 *ip;
|
|
|
|
- int size;
|
|
|
|
-
|
|
|
|
- op = pwcs;
|
|
|
|
- ip = s;
|
|
|
|
- while (*ip && n > 0) {
|
|
|
|
- if (*ip & 0x80) {
|
|
|
|
- size = utf8_mbtowc(op, ip, n);
|
|
|
|
- if (size == -1) {
|
|
|
|
- /* Ignore character and move on */
|
|
|
|
- ip++;
|
|
|
|
- n--;
|
|
|
|
- } else {
|
|
|
|
- op++;
|
|
|
|
- ip += size;
|
|
|
|
- n -= size;
|
|
|
|
- }
|
|
|
|
- } else {
|
|
|
|
- *op++ = *ip++;
|
|
|
|
- n--;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- return (op - pwcs);
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-int
|
|
|
|
-utf8_wctomb(__u8 *s, wchar_t wc, int maxlen)
|
|
|
|
-{
|
|
|
|
- long l;
|
|
|
|
|
|
+ unsigned long l;
|
|
int c, nc;
|
|
int c, nc;
|
|
const struct utf8_table *t;
|
|
const struct utf8_table *t;
|
|
-
|
|
|
|
|
|
+
|
|
if (!s)
|
|
if (!s)
|
|
return 0;
|
|
return 0;
|
|
-
|
|
|
|
- l = wc;
|
|
|
|
|
|
+
|
|
|
|
+ l = u;
|
|
|
|
+ if (l > UNICODE_MAX || (l & SURROGATE_MASK) == SURROGATE_PAIR)
|
|
|
|
+ return -1;
|
|
|
|
+
|
|
nc = 0;
|
|
nc = 0;
|
|
for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) {
|
|
for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) {
|
|
nc++;
|
|
nc++;
|
|
if (l <= t->lmask) {
|
|
if (l <= t->lmask) {
|
|
c = t->shift;
|
|
c = t->shift;
|
|
- *s = t->cval | (l >> c);
|
|
|
|
|
|
+ *s = (u8) (t->cval | (l >> c));
|
|
while (c > 0) {
|
|
while (c > 0) {
|
|
c -= 6;
|
|
c -= 6;
|
|
s++;
|
|
s++;
|
|
- *s = 0x80 | ((l >> c) & 0x3F);
|
|
|
|
|
|
+ *s = (u8) (0x80 | ((l >> c) & 0x3F));
|
|
}
|
|
}
|
|
return nc;
|
|
return nc;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return -1;
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
+EXPORT_SYMBOL(utf32_to_utf8);
|
|
|
|
|
|
-int
|
|
|
|
-utf8_wcstombs(__u8 *s, const wchar_t *pwcs, int maxlen)
|
|
|
|
|
|
+int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs)
|
|
{
|
|
{
|
|
- const __u16 *ip;
|
|
|
|
- __u8 *op;
|
|
|
|
|
|
+ u16 *op;
|
|
int size;
|
|
int size;
|
|
|
|
+ unicode_t u;
|
|
|
|
+
|
|
|
|
+ op = pwcs;
|
|
|
|
+ while (*s && len > 0) {
|
|
|
|
+ if (*s & 0x80) {
|
|
|
|
+ size = utf8_to_utf32(s, len, &u);
|
|
|
|
+ if (size < 0) {
|
|
|
|
+ /* Ignore character and move on */
|
|
|
|
+ size = 1;
|
|
|
|
+ } else if (u >= PLANE_SIZE) {
|
|
|
|
+ u -= PLANE_SIZE;
|
|
|
|
+ *op++ = (wchar_t) (SURROGATE_PAIR |
|
|
|
|
+ ((u >> 10) & SURROGATE_BITS));
|
|
|
|
+ *op++ = (wchar_t) (SURROGATE_PAIR |
|
|
|
|
+ SURROGATE_LOW |
|
|
|
|
+ (u & SURROGATE_BITS));
|
|
|
|
+ } else {
|
|
|
|
+ *op++ = (wchar_t) u;
|
|
|
|
+ }
|
|
|
|
+ s += size;
|
|
|
|
+ len -= size;
|
|
|
|
+ } else {
|
|
|
|
+ *op++ = *s++;
|
|
|
|
+ len--;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return op - pwcs;
|
|
|
|
+}
|
|
|
|
+EXPORT_SYMBOL(utf8s_to_utf16s);
|
|
|
|
+
|
|
|
|
+static inline unsigned long get_utf16(unsigned c, enum utf16_endian endian)
|
|
|
|
+{
|
|
|
|
+ switch (endian) {
|
|
|
|
+ default:
|
|
|
|
+ return c;
|
|
|
|
+ case UTF16_LITTLE_ENDIAN:
|
|
|
|
+ return __le16_to_cpu(c);
|
|
|
|
+ case UTF16_BIG_ENDIAN:
|
|
|
|
+ return __be16_to_cpu(c);
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian,
|
|
|
|
+ u8 *s, int maxlen)
|
|
|
|
+{
|
|
|
|
+ u8 *op;
|
|
|
|
+ int size;
|
|
|
|
+ unsigned long u, v;
|
|
|
|
|
|
op = s;
|
|
op = s;
|
|
- ip = pwcs;
|
|
|
|
- while (*ip && maxlen > 0) {
|
|
|
|
- if (*ip > 0x7f) {
|
|
|
|
- size = utf8_wctomb(op, *ip, maxlen);
|
|
|
|
|
|
+ while (len > 0 && maxlen > 0) {
|
|
|
|
+ u = get_utf16(*pwcs, endian);
|
|
|
|
+ if (!u)
|
|
|
|
+ break;
|
|
|
|
+ pwcs++;
|
|
|
|
+ len--;
|
|
|
|
+ if (u > 0x7f) {
|
|
|
|
+ if ((u & SURROGATE_MASK) == SURROGATE_PAIR) {
|
|
|
|
+ if (u & SURROGATE_LOW) {
|
|
|
|
+ /* Ignore character and move on */
|
|
|
|
+ continue;
|
|
|
|
+ }
|
|
|
|
+ if (len <= 0)
|
|
|
|
+ break;
|
|
|
|
+ v = get_utf16(*pwcs, endian);
|
|
|
|
+ if ((v & SURROGATE_MASK) != SURROGATE_PAIR ||
|
|
|
|
+ !(v & SURROGATE_LOW)) {
|
|
|
|
+ /* Ignore character and move on */
|
|
|
|
+ continue;
|
|
|
|
+ }
|
|
|
|
+ u = PLANE_SIZE + ((u & SURROGATE_BITS) << 10)
|
|
|
|
+ + (v & SURROGATE_BITS);
|
|
|
|
+ pwcs++;
|
|
|
|
+ len--;
|
|
|
|
+ }
|
|
|
|
+ size = utf32_to_utf8(u, op, maxlen);
|
|
if (size == -1) {
|
|
if (size == -1) {
|
|
/* Ignore character and move on */
|
|
/* Ignore character and move on */
|
|
} else {
|
|
} else {
|
|
@@ -149,13 +201,13 @@ utf8_wcstombs(__u8 *s, const wchar_t *pwcs, int maxlen)
|
|
maxlen -= size;
|
|
maxlen -= size;
|
|
}
|
|
}
|
|
} else {
|
|
} else {
|
|
- *op++ = (__u8) *ip;
|
|
|
|
|
|
+ *op++ = (u8) u;
|
|
maxlen--;
|
|
maxlen--;
|
|
}
|
|
}
|
|
- ip++;
|
|
|
|
}
|
|
}
|
|
- return (op - s);
|
|
|
|
|
|
+ return op - s;
|
|
}
|
|
}
|
|
|
|
+EXPORT_SYMBOL(utf16s_to_utf8s);
|
|
|
|
|
|
int register_nls(struct nls_table * nls)
|
|
int register_nls(struct nls_table * nls)
|
|
{
|
|
{
|
|
@@ -467,9 +519,5 @@ EXPORT_SYMBOL(unregister_nls);
|
|
EXPORT_SYMBOL(unload_nls);
|
|
EXPORT_SYMBOL(unload_nls);
|
|
EXPORT_SYMBOL(load_nls);
|
|
EXPORT_SYMBOL(load_nls);
|
|
EXPORT_SYMBOL(load_nls_default);
|
|
EXPORT_SYMBOL(load_nls_default);
|
|
-EXPORT_SYMBOL(utf8_mbtowc);
|
|
|
|
-EXPORT_SYMBOL(utf8_mbstowcs);
|
|
|
|
-EXPORT_SYMBOL(utf8_wctomb);
|
|
|
|
-EXPORT_SYMBOL(utf8_wcstombs);
|
|
|
|
|
|
|
|
MODULE_LICENSE("Dual BSD/GPL");
|
|
MODULE_LICENSE("Dual BSD/GPL");
|