lzrw3.c 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743
  1. /*
  2. * $Source: /homes/cvs/ftape-stacked/ftape/compressor/lzrw3.c,v $
  3. * $Revision: 1.1 $
  4. * $Date: 1997/10/05 19:12:29 $
  5. *
  6. * Implementation of Ross Williams lzrw3 algorithm. Adaption for zftape.
  7. *
  8. */
  9. #include "../compressor/lzrw3.h" /* Defines single exported function "compress". */
  10. /******************************************************************************/
  11. /* */
  12. /* LZRW3.C */
  13. /* */
  14. /******************************************************************************/
  15. /* */
  16. /* Author : Ross Williams. */
  17. /* Date : 30-Jun-1991. */
  18. /* Release : 1. */
  19. /* */
  20. /******************************************************************************/
  21. /* */
  22. /* This file contains an implementation of the LZRW3 data compression */
  23. /* algorithm in C. */
  24. /* */
  25. /* The algorithm is a general purpose compression algorithm that runs fast */
  26. /* and gives reasonable compression. The algorithm is a member of the Lempel */
  27. /* Ziv family of algorithms and bases its compression on the presence in the */
  28. /* data of repeated substrings. */
  29. /* */
  30. /* This algorithm is unpatented and the code is public domain. As the */
  31. /* algorithm is based on the LZ77 class of algorithms, it is unlikely to be */
  32. /* the subject of a patent challenge. */
  33. /* */
  34. /* Unlike the LZRW1 and LZRW1-A algorithms, the LZRW3 algorithm is */
  35. /* deterministic and is guaranteed to yield the same compressed */
  36. /* representation for a given file each time it is run. */
  37. /* */
  38. /* The LZRW3 algorithm was originally designed and implemented */
  39. /* by Ross Williams on 31-Dec-1990. */
  40. /* */
  41. /* Here are the results of applying this code, compiled under THINK C 4.0 */
  42. /* and running on a Mac-SE (8MHz 68000), to the standard calgary corpus. */
  43. /* */
  44. /* +----------------------------------------------------------------+ */
  45. /* | DATA COMPRESSION TEST | */
  46. /* | ===================== | */
  47. /* | Time of run : Sun 30-Jun-1991 09:31PM | */
  48. /* | Timing accuracy : One part in 100 | */
  49. /* | Context length : 262144 bytes (= 256.0000K) | */
  50. /* | Test suite : Calgary Corpus Suite | */
  51. /* | Files in suite : 14 | */
  52. /* | Algorithm : LZRW3 | */
  53. /* | Note: All averages are calculated from the un-rounded values. | */
  54. /* +----------------------------------------------------------------+ */
  55. /* | File Name Length CxB ComLen %Remn Bits Com K/s Dec K/s | */
  56. /* | ---------- ------ --- ------ ----- ---- ------- ------- | */
  57. /* | rpus:Bib.D 111261 1 55033 49.5 3.96 19.46 32.27 | */
  58. /* | us:Book1.D 768771 3 467962 60.9 4.87 17.03 31.07 | */
  59. /* | us:Book2.D 610856 3 317102 51.9 4.15 19.39 34.15 | */
  60. /* | rpus:Geo.D 102400 1 82424 80.5 6.44 11.65 18.18 | */
  61. /* | pus:News.D 377109 2 205670 54.5 4.36 17.14 27.47 | */
  62. /* | pus:Obj1.D 21504 1 13027 60.6 4.85 13.40 18.95 | */
  63. /* | pus:Obj2.D 246814 1 116286 47.1 3.77 19.31 30.10 | */
  64. /* | s:Paper1.D 53161 1 27522 51.8 4.14 18.60 31.15 | */
  65. /* | s:Paper2.D 82199 1 45160 54.9 4.40 18.45 32.84 | */
  66. /* | rpus:Pic.D 513216 2 122388 23.8 1.91 35.29 51.05 | */
  67. /* | us:Progc.D 39611 1 19669 49.7 3.97 18.87 30.64 | */
  68. /* | us:Progl.D 71646 1 28247 39.4 3.15 24.34 40.66 | */
  69. /* | us:Progp.D 49379 1 19377 39.2 3.14 23.91 39.23 | */
  70. /* | us:Trans.D 93695 1 33481 35.7 2.86 25.48 40.37 | */
  71. /* +----------------------------------------------------------------+ */
  72. /* | Average 224401 1 110953 50.0 4.00 20.17 32.72 | */
  73. /* +----------------------------------------------------------------+ */
  74. /* */
  75. /******************************************************************************/
  76. /******************************************************************************/
  77. /* The following structure is returned by the "compress" function below when */
  78. /* the user asks the function to return identifying information. */
  79. /* The most important field in the record is the working memory field which */
  80. /* tells the calling program how much working memory should be passed to */
  81. /* "compress" when it is called to perform a compression or decompression. */
  82. /* LZRW3 uses the same amount of memory during compression and decompression. */
  83. /* For more information on this structure see "compress.h". */
  84. #define U(X) ((ULONG) X)
  85. #define SIZE_P_BYTE (U(sizeof(UBYTE *)))
  86. #define SIZE_WORD (U(sizeof(UWORD )))
  87. #define ALIGNMENT_FUDGE (U(16))
  88. #define MEM_REQ ( U(4096)*(SIZE_P_BYTE) + ALIGNMENT_FUDGE )
  89. static struct compress_identity identity =
  90. {
  91. U(0x032DDEA8), /* Algorithm identification number. */
  92. MEM_REQ, /* Working memory (bytes) required. */
  93. "LZRW3", /* Name of algorithm. */
  94. "1.0", /* Version number of algorithm. */
  95. "31-Dec-1990", /* Date of algorithm. */
  96. "Public Domain", /* Copyright notice. */
  97. "Ross N. Williams", /* Author of algorithm. */
  98. "Renaissance Software", /* Affiliation of author. */
  99. "Public Domain" /* Vendor of algorithm. */
  100. };
  101. LOCAL void compress_compress (UBYTE *,UBYTE *,ULONG,UBYTE *, LONG *);
  102. LOCAL void compress_decompress(UBYTE *,UBYTE *,LONG, UBYTE *, ULONG *);
  103. /******************************************************************************/
  104. /* This function is the only function exported by this module. */
  105. /* Depending on its first parameter, the function can be requested to */
  106. /* compress a block of memory, decompress a block of memory, or to identify */
  107. /* itself. For more information, see the specification file "compress.h". */
  108. EXPORT void lzrw3_compress(
  109. UWORD action, /* Action to be performed. */
  110. UBYTE *wrk_mem, /* Address of working memory we can use.*/
  111. UBYTE *src_adr, /* Address of input data. */
  112. LONG src_len, /* Length of input data. */
  113. UBYTE *dst_adr, /* Address to put output data. */
  114. void *p_dst_len /* Address of longword for length of output data.*/
  115. )
  116. {
  117. switch (action)
  118. {
  119. case COMPRESS_ACTION_IDENTITY:
  120. *((struct compress_identity **)p_dst_len)= &identity;
  121. break;
  122. case COMPRESS_ACTION_COMPRESS:
  123. compress_compress(wrk_mem,src_adr,src_len,dst_adr,(LONG *)p_dst_len);
  124. break;
  125. case COMPRESS_ACTION_DECOMPRESS:
  126. compress_decompress(wrk_mem,src_adr,src_len,dst_adr,(LONG *)p_dst_len);
  127. break;
  128. }
  129. }
  130. /******************************************************************************/
  131. /* */
  132. /* BRIEF DESCRIPTION OF THE LZRW3 ALGORITHM */
  133. /* ======================================== */
  134. /* The LZRW3 algorithm is identical to the LZRW1-A algorithm except that */
  135. /* instead of transmitting history offsets, it transmits hash table indexes. */
  136. /* In order to decode the indexes, the decompressor must maintain an */
  137. /* identical hash table. Copy items are straightforward:when the decompressor */
  138. /* receives a copy item, it simply looks up the hash table to translate the */
  139. /* index into a pointer into the data already decompressed. To update the */
  140. /* hash table, it replaces the same table entry with a pointer to the start */
  141. /* of the newly decoded phrase. The tricky part is with literal items, for at */
  142. /* the time that the decompressor receives a literal item the decompressor */
  143. /* does not have the three bytes in the Ziv (that the compressor has) to */
  144. /* perform the three-byte hash. To solve this problem, in LZRW3, both the */
  145. /* compressor and decompressor are wired up so that they "buffer" these */
  146. /* literals and update their hash tables only when three bytes are available. */
  147. /* This makes the maximum buffering 2 bytes. */
  148. /* */
  149. /* Replacement of offsets by hash table indexes yields a few percent extra */
  150. /* compression at the cost of some speed. LZRW3 is slower than LZRW1, LZRW1-A */
  151. /* and LZRW2, but yields better compression. */
  152. /* */
  153. /* Extra compression could be obtained by using a hash table of depth two. */
  154. /* However, increasing the depth above one incurs a significant decrease in */
  155. /* compression speed which was not considered worthwhile. Another reason for */
  156. /* keeping the depth down to one was to allow easy comparison with the */
  157. /* LZRW1-A and LZRW2 algorithms so as to demonstrate the exact effect of the */
  158. /* use of direct hash indexes. */
  159. /* */
  160. /* +---+ */
  161. /* |___|4095 */
  162. /* |___| */
  163. /* +---------------------*_|<---+ /----+---\ */
  164. /* | |___| +---|Hash | */
  165. /* | |___| |Function| */
  166. /* | |___| \--------/ */
  167. /* | |___|0 ^ */
  168. /* | +---+ | */
  169. /* | Hash +-----+ */
  170. /* | Table | */
  171. /* | --- */
  172. /* v ^^^ */
  173. /* +-------------------------------------|----------------+ */
  174. /* |||||||||||||||||||||||||||||||||||||||||||||||||||||||| */
  175. /* +-------------------------------------|----------------+ */
  176. /* | |1......18| | */
  177. /* |<------- Lempel=History ------------>|<--Ziv-->| | */
  178. /* | (=bytes already processed) |<-Still to go-->| */
  179. /* |<-------------------- INPUT BLOCK ------------------->| */
  180. /* */
  181. /* The diagram above for LZRW3 looks almost identical to the diagram for */
  182. /* LZRW1. The difference is that in LZRW3, the compressor transmits hash */
  183. /* table indices instead of Lempel offsets. For this to work, the */
  184. /* decompressor must maintain a hash table as well as the compressor and both */
  185. /* compressor and decompressor must "buffer" literals, as the decompressor */
  186. /* cannot hash phrases commencing with a literal until another two bytes have */
  187. /* arrived. */
  188. /* */
  189. /* LZRW3 Algorithm Execution Summary */
  190. /* --------------------------------- */
  191. /* 1. Hash the first three bytes of the Ziv to yield a hash table index h. */
  192. /* 2. Look up the hash table yielding history pointer p. */
  193. /* 3. Match where p points with the Ziv. If there is a match of three or */
  194. /* more bytes, code those bytes (in the Ziv) as a copy item, otherwise */
  195. /* code the next byte in the Ziv as a literal item. */
  196. /* 4. Update the hash table as possible subject to the constraint that only */
  197. /* phrases commencing three bytes back from the Ziv can be hashed and */
  198. /* entered into the hash table. (This enables the decompressor to keep */
  199. /* pace). See the description and code for more details. */
  200. /* */
  201. /******************************************************************************/
  202. /* */
  203. /* DEFINITION OF COMPRESSED FILE FORMAT */
  204. /* ==================================== */
  205. /* * A compressed file consists of a COPY FLAG followed by a REMAINDER. */
  206. /* * The copy flag CF uses up four bytes with the first byte being the */
  207. /* least significant. */
  208. /* * If CF=1, then the compressed file represents the remainder of the file */
  209. /* exactly. Otherwise CF=0 and the remainder of the file consists of zero */
  210. /* or more GROUPS, each of which represents one or more bytes. */
  211. /* * Each group consists of two bytes of CONTROL information followed by */
  212. /* sixteen ITEMs except for the last group which can contain from one */
  213. /* to sixteen items. */
  214. /* * An item can be either a LITERAL item or a COPY item. */
  215. /* * Each item corresponds to a bit in the control bytes. */
  216. /* * The first control byte corresponds to the first 8 items in the group */
  217. /* with bit 0 corresponding to the first item in the group and bit 7 to */
  218. /* the eighth item in the group. */
  219. /* * The second control byte corresponds to the second 8 items in the group */
  220. /* with bit 0 corresponding to the ninth item in the group and bit 7 to */
  221. /* the sixteenth item in the group. */
  222. /* * A zero bit in a control word means that the corresponding item is a */
  223. /* literal item. A one bit corresponds to a copy item. */
  224. /* * A literal item consists of a single byte which represents itself. */
  225. /* * A copy item consists of two bytes that represent from 3 to 18 bytes. */
  226. /* * The first byte in a copy item will be denoted C1. */
  227. /* * The second byte in a copy item will be denoted C2. */
  228. /* * Bits will be selected using square brackets. */
  229. /* For example: C1[0..3] is the low nibble of the first control byte. */
  230. /* of copy item C1. */
  231. /* * The LENGTH of a copy item is defined to be C1[0..3]+3 which is a number */
  232. /* in the range [3,18]. */
  233. /* * The INDEX of a copy item is defined to be C1[4..7]*256+C2[0..8] which */
  234. /* is a number in the range [0,4095]. */
  235. /* * A copy item represents the sequence of bytes */
  236. /* text[POS-OFFSET..POS-OFFSET+LENGTH-1] where */
  237. /* text is the entire text of the uncompressed string. */
  238. /* POS is the index in the text of the character following the */
  239. /* string represented by all the items preceeding the item */
  240. /* being defined. */
  241. /* OFFSET is obtained from INDEX by looking up the hash table. */
  242. /* */
  243. /******************************************************************************/
  244. /* The following #define defines the length of the copy flag that appears at */
  245. /* the start of the compressed file. The value of four bytes was chosen */
  246. /* because the fast_copy routine on my Macintosh runs faster if the source */
  247. /* and destination blocks are relatively longword aligned. */
  248. /* The actual flag data appears in the first byte. The rest are zeroed so as */
  249. /* to normalize the compressed representation (i.e. not non-deterministic). */
  250. #define FLAG_BYTES 4
  251. /* The following #defines define the meaning of the values of the copy */
  252. /* flag at the start of the compressed file. */
  253. #define FLAG_COMPRESS 0 /* Signals that output was result of compression. */
  254. #define FLAG_COPY 1 /* Signals that output was simply copied over. */
  255. /* The 68000 microprocessor (on which this algorithm was originally developed */
  256. /* is fussy about non-aligned arrays of words. To avoid these problems the */
  257. /* following macro can be used to "waste" from 0 to 3 bytes so as to align */
  258. /* the argument pointer. */
  259. #define ULONG_ALIGN_UP(X) ((((ULONG)X)+sizeof(ULONG)-1)&~(sizeof(ULONG)-1))
  260. /* The following constant defines the maximum length of an uncompressed item. */
  261. /* This definition must not be changed; its value is hardwired into the code. */
  262. /* The longest number of bytes that can be spanned by a single item is 18 */
  263. /* for the longest copy item. */
  264. #define MAX_RAW_ITEM (18)
  265. /* The following constant defines the maximum length of an uncompressed group.*/
  266. /* This definition must not be changed; its value is hardwired into the code. */
  267. /* A group contains at most 16 items which explains this definition. */
  268. #define MAX_RAW_GROUP (16*MAX_RAW_ITEM)
  269. /* The following constant defines the maximum length of a compressed group. */
  270. /* This definition must not be changed; its value is hardwired into the code. */
  271. /* A compressed group consists of two control bytes followed by up to 16 */
  272. /* compressed items each of which can have a maximum length of two bytes. */
  273. #define MAX_CMP_GROUP (2+16*2)
  274. /* The following constant defines the number of entries in the hash table. */
  275. /* This definition must not be changed; its value is hardwired into the code. */
  276. #define HASH_TABLE_LENGTH (4096)
  277. /* LZRW3, unlike LZRW1(-A), must initialize its hash table so as to enable */
  278. /* the compressor and decompressor to stay in step maintaining identical hash */
  279. /* tables. In an early version of the algorithm, the tables were simply */
  280. /* initialized to zero and a check for zero was included just before the */
  281. /* matching code. However, this test costs time. A better solution is to */
  282. /* initialize all the entries in the hash table to point to a constant */
  283. /* string. The decompressor does the same. This solution requires no extra */
  284. /* test. The contents of the string do not matter so long as the string is */
  285. /* the same for the compressor and decompressor and contains at least */
  286. /* MAX_RAW_ITEM bytes. I chose consecutive decimal digits because they do not */
  287. /* have white space problems (e.g. there is no chance that the compiler will */
  288. /* replace more than one space by a TAB) and because they make the length of */
  289. /* the string obvious by inspection. */
  290. #define START_STRING_18 ((UBYTE *) "123456789012345678")
  291. /* In this algorithm, hash values have to be calculated at more than one */
  292. /* point. The following macro neatens the code up for this. */
  293. #define HASH(PTR) \
  294. (((40543*(((*(PTR))<<8)^((*((PTR)+1))<<4)^(*((PTR)+2))))>>4) & 0xFFF)
  295. /******************************************************************************/
  296. /* Input : Hand over the required amount of working memory in p_wrk_mem. */
  297. /* Input : Specify input block using p_src_first and src_len. */
  298. /* Input : Point p_dst_first to the start of the output zone (OZ). */
  299. /* Input : Point p_dst_len to a ULONG to receive the output length. */
  300. /* Input : Input block and output zone must not overlap. */
  301. /* Output : Length of output block written to *p_dst_len. */
  302. /* Output : Output block in Mem[p_dst_first..p_dst_first+*p_dst_len-1]. May */
  303. /* Output : write in OZ=Mem[p_dst_first..p_dst_first+src_len+MAX_CMP_GROUP-1].*/
  304. /* Output : Upon completion guaranteed *p_dst_len<=src_len+FLAG_BYTES. */
  305. LOCAL void compress_compress(UBYTE *p_wrk_mem,
  306. UBYTE *p_src_first, ULONG src_len,
  307. UBYTE *p_dst_first, LONG *p_dst_len)
  308. {
  309. /* p_src and p_dst step through the source and destination blocks. */
  310. register UBYTE *p_src = p_src_first;
  311. register UBYTE *p_dst = p_dst_first;
  312. /* The following variables are never modified and are used in the */
  313. /* calculations that determine when the main loop terminates. */
  314. UBYTE *p_src_post = p_src_first+src_len;
  315. UBYTE *p_dst_post = p_dst_first+src_len;
  316. UBYTE *p_src_max1 = p_src_first+src_len-MAX_RAW_ITEM;
  317. UBYTE *p_src_max16 = p_src_first+src_len-MAX_RAW_ITEM*16;
  318. /* The variables 'p_control' and 'control' are used to buffer control bits. */
  319. /* Before each group is processed, the next two bytes of the output block */
  320. /* are set aside for the control word for the group about to be processed. */
  321. /* 'p_control' is set to point to the first byte of that word. Meanwhile, */
  322. /* 'control' buffers the control bits being generated during the processing */
  323. /* of the group. Instead of having a counter to keep track of how many items */
  324. /* have been processed (=the number of bits in the control word), at the */
  325. /* start of each group, the top word of 'control' is filled with 1 bits. */
  326. /* As 'control' is shifted for each item, the 1 bits in the top word are */
  327. /* absorbed or destroyed. When they all run out (i.e. when the top word is */
  328. /* all zero bits, we know that we are at the end of a group. */
  329. # define TOPWORD 0xFFFF0000
  330. UBYTE *p_control;
  331. register ULONG control=TOPWORD;
  332. /* THe variable 'hash' always points to the first element of the hash table. */
  333. UBYTE **hash= (UBYTE **) ULONG_ALIGN_UP(p_wrk_mem);
  334. /* The following two variables represent the literal buffer. p_h1 points to */
  335. /* the hash table entry corresponding to the youngest literal. p_h2 points */
  336. /* to the hash table entry corresponding to the second youngest literal. */
  337. /* Note: p_h1=0=>p_h2=0 because zero values denote absence of a pending */
  338. /* literal. The variables are initialized to zero meaning an empty "buffer". */
  339. UBYTE **p_h1=NULL;
  340. UBYTE **p_h2=NULL;
  341. /* To start, we write the flag bytes. Being optimistic, we set the flag to */
  342. /* FLAG_COMPRESS. The remaining flag bytes are zeroed so as to keep the */
  343. /* algorithm deterministic. */
  344. *p_dst++=FLAG_COMPRESS;
  345. {UWORD i; for (i=2;i<=FLAG_BYTES;i++) *p_dst++=0;}
  346. /* Reserve the first word of output as the control word for the first group. */
  347. /* Note: This is undone at the end if the input block is empty. */
  348. p_control=p_dst; p_dst+=2;
  349. /* Initialize all elements of the hash table to point to a constant string. */
  350. /* Use of an unrolled loop speeds this up considerably. */
  351. {UWORD i; UBYTE **p_h=hash;
  352. # define ZH *p_h++=START_STRING_18
  353. for (i=0;i<256;i++) /* 256=HASH_TABLE_LENGTH/16. */
  354. {ZH;ZH;ZH;ZH;
  355. ZH;ZH;ZH;ZH;
  356. ZH;ZH;ZH;ZH;
  357. ZH;ZH;ZH;ZH;}
  358. }
  359. /* The main loop processes either 1 or 16 items per iteration. As its */
  360. /* termination logic is complicated, I have opted for an infinite loop */
  361. /* structure containing 'break' and 'goto' statements. */
  362. while (TRUE)
  363. {/* Begin main processing loop. */
  364. /* Note: All the variables here except unroll should be defined within */
  365. /* the inner loop. Unfortunately the loop hasn't got a block. */
  366. register UBYTE *p; /* Scans through targ phrase during matching. */
  367. register UBYTE *p_ziv= NULL ; /* Points to first byte of current Ziv. */
  368. register UWORD unroll; /* Loop counter for unrolled inner loop. */
  369. register UWORD index; /* Index of current hash table entry. */
  370. register UBYTE **p_h0 = NULL ; /* Pointer to current hash table entry. */
  371. /* Test for overrun and jump to overrun code if necessary. */
  372. if (p_dst>p_dst_post)
  373. goto overrun;
  374. /* The following cascade of if statements efficiently catches and deals */
  375. /* with varying degrees of closeness to the end of the input block. */
  376. /* When we get very close to the end, we stop updating the table and */
  377. /* code the remaining bytes as literals. This makes the code simpler. */
  378. unroll=16;
  379. if (p_src>p_src_max16)
  380. {
  381. unroll=1;
  382. if (p_src>p_src_max1)
  383. {
  384. if (p_src==p_src_post)
  385. break;
  386. else
  387. goto literal;
  388. }
  389. }
  390. /* This inner unrolled loop processes 'unroll' (whose value is either 1 */
  391. /* or 16) items. I have chosen to implement this loop with labels and */
  392. /* gotos to heighten the ease with which the loop may be implemented with */
  393. /* a single decrement and branch instruction in assembly language and */
  394. /* also because the labels act as highly readable place markers. */
  395. /* (Also because we jump into the loop for endgame literals (see above)). */
  396. begin_unrolled_loop:
  397. /* To process the next phrase, we hash the next three bytes and use */
  398. /* the resultant hash table index to look up the hash table. A pointer */
  399. /* to the entry is stored in p_h0 so as to avoid an array lookup. The */
  400. /* hash table entry *p_h0 is looked up yielding a pointer p to a */
  401. /* potential match of the Ziv in the history. */
  402. index=HASH(p_src);
  403. p_h0=&hash[index];
  404. p=*p_h0;
  405. /* Having looked up the candidate position, we are in a position to */
  406. /* attempt a match. The match loop has been unrolled using the PS */
  407. /* macro so that failure within the first three bytes automatically */
  408. /* results in the literal branch being taken. The coding is simple. */
  409. /* p_ziv saves p_src so we can let p_src wander. */
  410. # define PS *p++!=*p_src++
  411. p_ziv=p_src;
  412. if (PS || PS || PS)
  413. {
  414. /* Literal. */
  415. /* Code the literal byte as itself and a zero control bit. */
  416. p_src=p_ziv; literal: *p_dst++=*p_src++; control&=0xFFFEFFFF;
  417. /* We have just coded a literal. If we had two pending ones, that */
  418. /* makes three and we can update the hash table. */
  419. if (p_h2!=0)
  420. {*p_h2=p_ziv-2;}
  421. /* In any case, rotate the hash table pointers for next time. */
  422. p_h2=p_h1; p_h1=p_h0;
  423. }
  424. else
  425. {
  426. /* Copy */
  427. /* Match up to 15 remaining bytes using an unrolled loop and code. */
  428. #if 0
  429. PS || PS || PS || PS || PS || PS || PS || PS ||
  430. PS || PS || PS || PS || PS || PS || PS || p_src++;
  431. #else
  432. if (
  433. !( PS || PS || PS || PS || PS || PS || PS || PS ||
  434. PS || PS || PS || PS || PS || PS || PS )
  435. ) p_src++;
  436. #endif
  437. *p_dst++=((index&0xF00)>>4)|(--p_src-p_ziv-3);
  438. *p_dst++=index&0xFF;
  439. /* As we have just coded three bytes, we are now in a position to */
  440. /* update the hash table with the literal bytes that were pending */
  441. /* upon the arrival of extra context bytes. */
  442. if (p_h1!=0)
  443. {
  444. if (p_h2)
  445. {*p_h2=p_ziv-2; p_h2=NULL;}
  446. *p_h1=p_ziv-1; p_h1=NULL;
  447. }
  448. /* In any case, we can update the hash table based on the current */
  449. /* position as we just coded at least three bytes in a copy items. */
  450. *p_h0=p_ziv;
  451. }
  452. control>>=1;
  453. /* This loop is all set up for a decrement and jump instruction! */
  454. #ifndef linux
  455. ` end_unrolled_loop: if (--unroll) goto begin_unrolled_loop;
  456. #else
  457. /* end_unrolled_loop: */ if (--unroll) goto begin_unrolled_loop;
  458. #endif
  459. /* At this point it will nearly always be the end of a group in which */
  460. /* case, we have to do some control-word processing. However, near the */
  461. /* end of the input block, the inner unrolled loop is only executed once. */
  462. /* This necessitates the 'if' test. */
  463. if ((control&TOPWORD)==0)
  464. {
  465. /* Write the control word to the place we saved for it in the output. */
  466. *p_control++= control &0xFF;
  467. *p_control = (control>>8) &0xFF;
  468. /* Reserve the next word in the output block for the control word */
  469. /* for the group about to be processed. */
  470. p_control=p_dst; p_dst+=2;
  471. /* Reset the control bits buffer. */
  472. control=TOPWORD;
  473. }
  474. } /* End main processing loop. */
  475. /* After the main processing loop has executed, all the input bytes have */
  476. /* been processed. However, the control word has still to be written to the */
  477. /* word reserved for it in the output at the start of the most recent group. */
  478. /* Before writing, the control word has to be shifted so that all the bits */
  479. /* are in the right place. The "empty" bit positions are filled with 1s */
  480. /* which partially fill the top word. */
  481. while(control&TOPWORD) control>>=1;
  482. *p_control++= control &0xFF;
  483. *p_control++=(control>>8) &0xFF;
  484. /* If the last group contained no items, delete the control word too. */
  485. if (p_control==p_dst) p_dst-=2;
  486. /* Write the length of the output block to the dst_len parameter and return. */
  487. *p_dst_len=p_dst-p_dst_first;
  488. return;
  489. /* Jump here as soon as an overrun is detected. An overrun is defined to */
  490. /* have occurred if p_dst>p_dst_first+src_len. That is, the moment the */
  491. /* length of the output written so far exceeds the length of the input block.*/
  492. /* The algorithm checks for overruns at least at the end of each group */
  493. /* which means that the maximum overrun is MAX_CMP_GROUP bytes. */
  494. /* Once an overrun occurs, the only thing to do is to set the copy flag and */
  495. /* copy the input over. */
  496. overrun:
  497. #if 0
  498. *p_dst_first=FLAG_COPY;
  499. fast_copy(p_src_first,p_dst_first+FLAG_BYTES,src_len);
  500. *p_dst_len=src_len+FLAG_BYTES;
  501. #else
  502. fast_copy(p_src_first,p_dst_first,src_len);
  503. *p_dst_len= -src_len; /* return a negative number to indicate uncompressed data */
  504. #endif
  505. }
  506. /******************************************************************************/
  507. /* Input : Hand over the required amount of working memory in p_wrk_mem. */
  508. /* Input : Specify input block using p_src_first and src_len. */
  509. /* Input : Point p_dst_first to the start of the output zone. */
  510. /* Input : Point p_dst_len to a ULONG to receive the output length. */
  511. /* Input : Input block and output zone must not overlap. User knows */
  512. /* Input : upperbound on output block length from earlier compression. */
  513. /* Input : In any case, maximum expansion possible is nine times. */
  514. /* Output : Length of output block written to *p_dst_len. */
  515. /* Output : Output block in Mem[p_dst_first..p_dst_first+*p_dst_len-1]. */
  516. /* Output : Writes only in Mem[p_dst_first..p_dst_first+*p_dst_len-1]. */
  517. LOCAL void compress_decompress( UBYTE *p_wrk_mem,
  518. UBYTE *p_src_first, LONG src_len,
  519. UBYTE *p_dst_first, ULONG *p_dst_len)
  520. {
  521. /* Byte pointers p_src and p_dst scan through the input and output blocks. */
  522. register UBYTE *p_src = p_src_first+FLAG_BYTES;
  523. register UBYTE *p_dst = p_dst_first;
  524. /* we need to avoid a SEGV when trying to uncompress corrupt data */
  525. register UBYTE *p_dst_post = p_dst_first + *p_dst_len;
  526. /* The following two variables are never modified and are used to control */
  527. /* the main loop. */
  528. UBYTE *p_src_post = p_src_first+src_len;
  529. UBYTE *p_src_max16 = p_src_first+src_len-(MAX_CMP_GROUP-2);
  530. /* The hash table is the only resident of the working memory. The hash table */
  531. /* contains HASH_TABLE_LENGTH=4096 pointers to positions in the history. To */
  532. /* keep Macintoshes happy, it is longword aligned. */
  533. UBYTE **hash = (UBYTE **) ULONG_ALIGN_UP(p_wrk_mem);
  534. /* The variable 'control' is used to buffer the control bits which appear in */
  535. /* groups of 16 bits (control words) at the start of each compressed group. */
  536. /* When each group is read, bit 16 of the register is set to one. Whenever */
  537. /* a new bit is needed, the register is shifted right. When the value of the */
  538. /* register becomes 1, we know that we have reached the end of a group. */
  539. /* Initializing the register to 1 thus instructs the code to follow that it */
  540. /* should read a new control word immediately. */
  541. register ULONG control=1;
  542. /* The value of 'literals' is always in the range 0..3. It is the number of */
  543. /* consecutive literal items just seen. We have to record this number so as */
  544. /* to know when to update the hash table. When literals gets to 3, there */
  545. /* have been three consecutive literals and we can update at the position of */
  546. /* the oldest of the three. */
  547. register UWORD literals=0;
  548. /* Check the leading copy flag to see if the compressor chose to use a copy */
  549. /* operation instead of a compression operation. If a copy operation was */
  550. /* used, then all we need to do is copy the data over, set the output length */
  551. /* and return. */
  552. #if 0
  553. if (*p_src_first==FLAG_COPY)
  554. {
  555. fast_copy(p_src_first+FLAG_BYTES,p_dst_first,src_len-FLAG_BYTES);
  556. *p_dst_len=src_len-FLAG_BYTES;
  557. return;
  558. }
  559. #else
  560. if ( src_len < 0 )
  561. {
  562. fast_copy(p_src_first,p_dst_first,-src_len );
  563. *p_dst_len = (ULONG)-src_len;
  564. return;
  565. }
  566. #endif
  567. /* Initialize all elements of the hash table to point to a constant string. */
  568. /* Use of an unrolled loop speeds this up considerably. */
  569. {UWORD i; UBYTE **p_h=hash;
  570. # define ZJ *p_h++=START_STRING_18
  571. for (i=0;i<256;i++) /* 256=HASH_TABLE_LENGTH/16. */
  572. {ZJ;ZJ;ZJ;ZJ;
  573. ZJ;ZJ;ZJ;ZJ;
  574. ZJ;ZJ;ZJ;ZJ;
  575. ZJ;ZJ;ZJ;ZJ;}
  576. }
  577. /* The outer loop processes either 1 or 16 items per iteration depending on */
  578. /* how close p_src is to the end of the input block. */
  579. while (p_src!=p_src_post)
  580. {/* Start of outer loop */
  581. register UWORD unroll; /* Counts unrolled loop executions. */
  582. /* When 'control' has the value 1, it means that the 16 buffered control */
  583. /* bits that were read in at the start of the current group have all been */
  584. /* shifted out and that all that is left is the 1 bit that was injected */
  585. /* into bit 16 at the start of the current group. When we reach the end */
  586. /* of a group, we have to load a new control word and inject a new 1 bit. */
  587. if (control==1)
  588. {
  589. control=0x10000|*p_src++;
  590. control|=(*p_src++)<<8;
  591. }
  592. /* If it is possible that we are within 16 groups from the end of the */
  593. /* input, execute the unrolled loop only once, else process a whole group */
  594. /* of 16 items by looping 16 times. */
  595. unroll= p_src<=p_src_max16 ? 16 : 1;
  596. /* This inner loop processes one phrase (item) per iteration. */
  597. while (unroll--)
  598. { /* Begin unrolled inner loop. */
  599. /* Process a literal or copy item depending on the next control bit. */
  600. if (control&1)
  601. {
  602. /* Copy item. */
  603. register UBYTE *p; /* Points to place from which to copy. */
  604. register UWORD lenmt; /* Length of copy item minus three. */
  605. register UBYTE **p_hte; /* Pointer to current hash table entry.*/
  606. register UBYTE *p_ziv=p_dst; /* Pointer to start of current Ziv. */
  607. /* Read and dismantle the copy word. Work out from where to copy. */
  608. lenmt=*p_src++;
  609. p_hte=&hash[((lenmt&0xF0)<<4)|*p_src++];
  610. p=*p_hte;
  611. lenmt&=0xF;
  612. /* Now perform the copy using a half unrolled loop. */
  613. *p_dst++=*p++;
  614. *p_dst++=*p++;
  615. *p_dst++=*p++;
  616. while (lenmt--)
  617. *p_dst++=*p++;
  618. /* Because we have just received 3 or more bytes in a copy item */
  619. /* (whose bytes we have just installed in the output), we are now */
  620. /* in a position to flush all the pending literal hashings that had */
  621. /* been postponed for lack of bytes. */
  622. if (literals>0)
  623. {
  624. register UBYTE *r=p_ziv-literals;
  625. hash[HASH(r)]=r;
  626. if (literals==2)
  627. {r++; hash[HASH(r)]=r;}
  628. literals=0;
  629. }
  630. /* In any case, we can immediately update the hash table with the */
  631. /* current position. We don't need to do a HASH(...) to work out */
  632. /* where to put the pointer, as the compressor just told us!!! */
  633. *p_hte=p_ziv;
  634. }
  635. else
  636. {
  637. /* Literal item. */
  638. /* Copy over the literal byte. */
  639. *p_dst++=*p_src++;
  640. /* If we now have three literals waiting to be hashed into the hash */
  641. /* table, we can do one of them now (because there are three). */
  642. if (++literals == 3)
  643. {register UBYTE *p=p_dst-3; hash[HASH(p)]=p; literals=2;}
  644. }
  645. /* Shift the control buffer so the next control bit is in bit 0. */
  646. control>>=1;
  647. #if 1
  648. if (p_dst > p_dst_post)
  649. {
  650. /* Shit: we tried to decompress corrupt data */
  651. *p_dst_len = 0;
  652. return;
  653. }
  654. #endif
  655. } /* End unrolled inner loop. */
  656. } /* End of outer loop */
  657. /* Write the length of the decompressed data before returning. */
  658. *p_dst_len=p_dst-p_dst_first;
  659. }
  660. /******************************************************************************/
  661. /* End of LZRW3.C */
  662. /******************************************************************************/