compr_lzari.c 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. /*
  2. * JFFS2 -- Journalling Flash File System, Version 2.
  3. *
  4. * Copyright (C) 2004 Patrik Kluba,
  5. * University of Szeged, Hungary
  6. *
  7. * For licensing information, see the file 'LICENCE' in the
  8. * jffs2 directory.
  9. *
  10. * $Id: compr_lzari.c,v 1.3 2004/06/23 16:34:39 havasi Exp $
  11. *
  12. */
  13. /*
  14. Lempel-Ziv-Arithmetic coding compression module for jffs2
  15. Based on the LZARI source included in LDS (lossless datacompression sources)
  16. */
  17. /* -*- Mode: C; indent-tabs-mode: t; c-basic-offset: 4; tab-width: 4 -*- */
  18. /*
  19. Original copyright follows:
  20. **************************************************************
  21. LZARI.C -- A Data Compression Program
  22. (tab = 4 spaces)
  23. **************************************************************
  24. 4/7/1989 Haruhiko Okumura
  25. Use, distribute, and modify this program freely.
  26. Please send me your improved versions.
  27. PC-VAN SCIENCE
  28. NIFTY-Serve PAF01022
  29. CompuServe 74050,1022
  30. **************************************************************
  31. LZARI.C (c)1989 by Haruyasu Yoshizaki, Haruhiko Okumura, and Kenji Rikitake.
  32. All rights reserved. Permission granted for non-commercial use.
  33. */
  34. /*
  35. 2004-02-18 pajko <pajko(AT)halom(DOT)u-szeged(DOT)hu>
  36. Removed unused variables and fixed no return value
  37. 2004-02-16 pajko <pajko(AT)halom(DOT)u-szeged(DOT)hu>
  38. Initial release
  39. */
  40. #include <config.h>
  41. #if ((CONFIG_COMMANDS & CFG_CMD_JFFS2) && defined(CONFIG_JFFS2_LZO_LZARI))
  42. #include <linux/stddef.h>
  43. #include <jffs2/jffs2.h>
  44. #define N 4096 /* size of ring buffer */
  45. #define F 60 /* upper limit for match_length */
  46. #define THRESHOLD 2 /* encode string into position and length
  47. if match_length is greater than this */
  48. #define NIL N /* index for root of binary search trees */
  49. static unsigned char
  50. text_buf[N + F - 1]; /* ring buffer of size N,
  51. with extra F-1 bytes to facilitate string comparison */
  52. /********** Arithmetic Compression **********/
  53. /* If you are not familiar with arithmetic compression, you should read
  54. I. E. Witten, R. M. Neal, and J. G. Cleary,
  55. Communications of the ACM, Vol. 30, pp. 520-540 (1987),
  56. from which much have been borrowed. */
  57. #define M 15
  58. /* Q1 (= 2 to the M) must be sufficiently large, but not so
  59. large as the unsigned long 4 * Q1 * (Q1 - 1) overflows. */
  60. #define Q1 (1UL << M)
  61. #define Q2 (2 * Q1)
  62. #define Q3 (3 * Q1)
  63. #define Q4 (4 * Q1)
  64. #define MAX_CUM (Q1 - 1)
  65. #define N_CHAR (256 - THRESHOLD + F)
  66. /* character code = 0, 1, ..., N_CHAR - 1 */
  67. static unsigned long char_to_sym[N_CHAR], sym_to_char[N_CHAR + 1];
  68. static unsigned long
  69. sym_freq[N_CHAR + 1], /* frequency for symbols */
  70. sym_cum[N_CHAR + 1], /* cumulative freq for symbols */
  71. position_cum[N + 1]; /* cumulative freq for positions */
  72. static void StartModel(void) /* Initialize model */
  73. {
  74. unsigned long ch, sym, i;
  75. sym_cum[N_CHAR] = 0;
  76. for (sym = N_CHAR; sym >= 1; sym--) {
  77. ch = sym - 1;
  78. char_to_sym[ch] = sym; sym_to_char[sym] = ch;
  79. sym_freq[sym] = 1;
  80. sym_cum[sym - 1] = sym_cum[sym] + sym_freq[sym];
  81. }
  82. sym_freq[0] = 0; /* sentinel (!= sym_freq[1]) */
  83. position_cum[N] = 0;
  84. for (i = N; i >= 1; i--)
  85. position_cum[i - 1] = position_cum[i] + 10000 / (i + 200);
  86. /* empirical distribution function (quite tentative) */
  87. /* Please devise a better mechanism! */
  88. }
  89. static void UpdateModel(unsigned long sym)
  90. {
  91. unsigned long c, ch_i, ch_sym;
  92. unsigned long i;
  93. if (sym_cum[0] >= MAX_CUM) {
  94. c = 0;
  95. for (i = N_CHAR; i > 0; i--) {
  96. sym_cum[i] = c;
  97. c += (sym_freq[i] = (sym_freq[i] + 1) >> 1);
  98. }
  99. sym_cum[0] = c;
  100. }
  101. for (i = sym; sym_freq[i] == sym_freq[i - 1]; i--) ;
  102. if (i < sym) {
  103. ch_i = sym_to_char[i]; ch_sym = sym_to_char[sym];
  104. sym_to_char[i] = ch_sym; sym_to_char[sym] = ch_i;
  105. char_to_sym[ch_i] = sym; char_to_sym[ch_sym] = i;
  106. }
  107. sym_freq[i]++;
  108. while (--i > 0) sym_cum[i]++;
  109. sym_cum[0]++;
  110. }
  111. static unsigned long BinarySearchSym(unsigned long x)
  112. /* 1 if x >= sym_cum[1],
  113. N_CHAR if sym_cum[N_CHAR] > x,
  114. i such that sym_cum[i - 1] > x >= sym_cum[i] otherwise */
  115. {
  116. unsigned long i, j, k;
  117. i = 1; j = N_CHAR;
  118. while (i < j) {
  119. k = (i + j) / 2;
  120. if (sym_cum[k] > x) i = k + 1; else j = k;
  121. }
  122. return i;
  123. }
  124. unsigned long BinarySearchPos(unsigned long x)
  125. /* 0 if x >= position_cum[1],
  126. N - 1 if position_cum[N] > x,
  127. i such that position_cum[i] > x >= position_cum[i + 1] otherwise */
  128. {
  129. unsigned long i, j, k;
  130. i = 1; j = N;
  131. while (i < j) {
  132. k = (i + j) / 2;
  133. if (position_cum[k] > x) i = k + 1; else j = k;
  134. }
  135. return i - 1;
  136. }
  137. static int Decode(unsigned char *srcbuf, unsigned char *dstbuf, unsigned long srclen,
  138. unsigned long dstlen) /* Just the reverse of Encode(). */
  139. {
  140. unsigned long i, r, j, k, c, range, sym;
  141. unsigned char *ip, *op;
  142. unsigned char *srcend = srcbuf + srclen;
  143. unsigned char *dstend = dstbuf + dstlen;
  144. unsigned char buffer = 0;
  145. unsigned char mask = 0;
  146. unsigned long low = 0;
  147. unsigned long high = Q4;
  148. unsigned long value = 0;
  149. ip = srcbuf;
  150. op = dstbuf;
  151. for (i = 0; i < M + 2; i++) {
  152. value *= 2;
  153. if ((mask >>= 1) == 0) {
  154. buffer = (ip >= srcend) ? 0 : *(ip++);
  155. mask = 128;
  156. }
  157. value += ((buffer & mask) != 0);
  158. }
  159. StartModel();
  160. for (i = 0; i < N - F; i++) text_buf[i] = ' ';
  161. r = N - F;
  162. while (op < dstend) {
  163. range = high - low;
  164. sym = BinarySearchSym((unsigned long)
  165. (((value - low + 1) * sym_cum[0] - 1) / range));
  166. high = low + (range * sym_cum[sym - 1]) / sym_cum[0];
  167. low += (range * sym_cum[sym ]) / sym_cum[0];
  168. for ( ; ; ) {
  169. if (low >= Q2) {
  170. value -= Q2; low -= Q2; high -= Q2;
  171. } else if (low >= Q1 && high <= Q3) {
  172. value -= Q1; low -= Q1; high -= Q1;
  173. } else if (high > Q2) break;
  174. low += low; high += high;
  175. value *= 2;
  176. if ((mask >>= 1) == 0) {
  177. buffer = (ip >= srcend) ? 0 : *(ip++);
  178. mask = 128;
  179. }
  180. value += ((buffer & mask) != 0);
  181. }
  182. c = sym_to_char[sym];
  183. UpdateModel(sym);
  184. if (c < 256) {
  185. if (op >= dstend) return -1;
  186. *(op++) = c;
  187. text_buf[r++] = c;
  188. r &= (N - 1);
  189. } else {
  190. j = c - 255 + THRESHOLD;
  191. range = high - low;
  192. i = BinarySearchPos((unsigned long)
  193. (((value - low + 1) * position_cum[0] - 1) / range));
  194. high = low + (range * position_cum[i ]) / position_cum[0];
  195. low += (range * position_cum[i + 1]) / position_cum[0];
  196. for ( ; ; ) {
  197. if (low >= Q2) {
  198. value -= Q2; low -= Q2; high -= Q2;
  199. } else if (low >= Q1 && high <= Q3) {
  200. value -= Q1; low -= Q1; high -= Q1;
  201. } else if (high > Q2) break;
  202. low += low; high += high;
  203. value *= 2;
  204. if ((mask >>= 1) == 0) {
  205. buffer = (ip >= srcend) ? 0 : *(ip++);
  206. mask = 128;
  207. }
  208. value += ((buffer & mask) != 0);
  209. }
  210. i = (r - i - 1) & (N - 1);
  211. for (k = 0; k < j; k++) {
  212. c = text_buf[(i + k) & (N - 1)];
  213. if (op >= dstend) return -1;
  214. *(op++) = c;
  215. text_buf[r++] = c;
  216. r &= (N - 1);
  217. }
  218. }
  219. }
  220. return 0;
  221. }
  222. int lzari_decompress(unsigned char *data_in, unsigned char *cpage_out,
  223. u32 srclen, u32 destlen)
  224. {
  225. return Decode(data_in, cpage_out, srclen, destlen);
  226. }
  227. #endif /* ((CONFIG_COMMANDS & CFG_CMD_JFFS2) && defined(CONFIG_JFFS2_LZO_LZARI)) */