explode.c 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138
  1. /*
  2. * Changes by Gunnar Ritter, Freiburg i. Br., Germany, May 2003.
  3. *
  4. * Derived from unzip 5.40.
  5. *
  6. * Sccsid @(#)explode.c 1.6 (gritter) 9/30/03
  7. */
  8. /* explode.c -- put in the public domain by Mark Adler
  9. version c15, 6 July 1996 */
  10. /* You can do whatever you like with this source file, though I would
  11. prefer that if you modify it and redistribute it that you include
  12. comments to that effect with your name and the date. Thank you.
  13. History:
  14. vers date who what
  15. ---- --------- -------------- ------------------------------------
  16. c1 30 Mar 92 M. Adler explode that uses huft_build from inflate
  17. (this gives over a 70% speed improvement
  18. over the original unimplode.c, which
  19. decoded a bit at a time)
  20. c2 4 Apr 92 M. Adler fixed bug for file sizes a multiple of 32k.
  21. c3 10 Apr 92 M. Adler added a little memory tracking if DEBUG
  22. c4 11 Apr 92 M. Adler added NOMEMCPY do kill use of memcpy()
  23. c5 21 Apr 92 M. Adler added the WSIZE #define to allow reducing
  24. the 32K window size for specialized
  25. applications.
  26. c6 31 May 92 M. Adler added typecasts to eliminate some warnings
  27. c7 27 Jun 92 G. Roelofs added more typecasts.
  28. c8 17 Oct 92 G. Roelofs changed ULONG/UWORD/byte to ulg/ush/uch.
  29. c9 19 Jul 93 J. Bush added more typecasts (to return values);
  30. made l[256] array static for Amiga.
  31. c10 8 Oct 93 G. Roelofs added used_csize for diagnostics; added
  32. buf and unshrink arguments to flush();
  33. undef'd various macros at end for Turbo C;
  34. removed NEXTBYTE macro (now in unzip.h)
  35. and bytebuf variable (not used); changed
  36. memset() to memzero().
  37. c11 9 Jan 94 M. Adler fixed incorrect used_csize calculation.
  38. c12 9 Apr 94 G. Roelofs fixed split comments on preprocessor lines
  39. to avoid bug in Encore compiler.
  40. c13 25 Aug 94 M. Adler fixed distance-length comment (orig c9 fix)
  41. c14 22 Nov 95 S. Maxwell removed unnecessary "static" on auto array
  42. c15 6 Jul 96 W. Haidinger added ulg typecasts to flush() calls.
  43. c16 8 Feb 98 C. Spieler added ZCONST modifiers to const tables
  44. and #ifdef DEBUG around debugging code.
  45. c16b 25 Mar 98 C. Spieler modified DLL code for slide redirection.
  46. 23 May 03 Gunnar Ritter use cpio structures; C99 conversion.
  47. */
  48. /*
  49. Explode imploded (PKZIP method 6 compressed) data. This compression
  50. method searches for as much of the current string of bytes (up to a length
  51. of ~320) in the previous 4K or 8K bytes. If it doesn't find any matches
  52. (of at least length 2 or 3), it codes the next byte. Otherwise, it codes
  53. the length of the matched string and its distance backwards from the
  54. current position. Single bytes ("literals") are preceded by a one (a
  55. single bit) and are either uncoded (the eight bits go directly into the
  56. compressed stream for a total of nine bits) or Huffman coded with a
  57. supplied literal code tree. If literals are coded, then the minimum match
  58. length is three, otherwise it is two.
  59. There are therefore four kinds of imploded streams: 8K search with coded
  60. literals (min match = 3), 4K search with coded literals (min match = 3),
  61. 8K with uncoded literals (min match = 2), and 4K with uncoded literals
  62. (min match = 2). The kind of stream is identified in two bits of a
  63. general purpose bit flag that is outside of the compressed stream.
  64. Distance-length pairs for matched strings are preceded by a zero bit (to
  65. distinguish them from literals) and are always coded. The distance comes
  66. first and is either the low six (4K) or low seven (8K) bits of the
  67. distance (uncoded), followed by the high six bits of the distance coded.
  68. Then the length is six bits coded (0..63 + min match length), and if the
  69. maximum such length is coded, then it's followed by another eight bits
  70. (uncoded) to be added to the coded length. This gives a match length
  71. range of 2..320 or 3..321 bytes.
  72. The literal, length, and distance codes are all represented in a slightly
  73. compressed form themselves. What is sent are the lengths of the codes for
  74. each value, which is sufficient to construct the codes. Each byte of the
  75. code representation is the code length (the low four bits representing
  76. 1..16), and the number of values sequentially with that length (the high
  77. four bits also representing 1..16). There are 256 literal code values (if
  78. literals are coded), 64 length code values, and 64 distance code values,
  79. in that order at the beginning of the compressed stream. Each set of code
  80. values is preceded (redundantly) with a byte indicating how many bytes are
  81. in the code description that follows, in the range 1..256.
  82. The codes themselves are decoded using tables made by huft_build() from
  83. the bit lengths. That routine and its comments are in the inflate.c
  84. module.
  85. */
  86. #include <stdlib.h>
  87. #include <stdio.h>
  88. #include <string.h>
  89. #include <unistd.h>
  90. #include "cpio.h"
  91. #include "unzip.h" /* must supply slide[] (uint8_t) array and NEXTBYTE macro */
  92. /* routines here */
  93. static int get_tree(struct globals *, unsigned *l, unsigned n);
  94. static int explode_lit8(struct globals *, struct huft *tb, struct huft *tl,
  95. struct huft *td, int bb, int bl, int bd);
  96. static int explode_lit4(struct globals *, struct huft *tb, struct huft *tl,
  97. struct huft *td, int bb, int bl, int bd);
  98. static int explode_nolit8(struct globals *, struct huft *tl, struct huft *td,
  99. int bl, int bd);
  100. static int explode_nolit4(struct globals *, struct huft *tl, struct huft *td,
  101. int bl, int bd);
  102. /* The implode algorithm uses a sliding 4K or 8K byte window on the
  103. uncompressed stream to find repeated byte strings. This is implemented
  104. here as a circular buffer. The index is updated simply by incrementing
  105. and then and'ing with 0x0fff (4K-1) or 0x1fff (8K-1). Here, the 32K
  106. buffer of inflate is used, and it works just as well to always have
  107. a 32K circular buffer, so the index is anded with 0x7fff. This is
  108. done to allow the window to also be used as the output buffer. */
  109. /* This must be supplied in an external module useable like
  110. "uint8_t slide[8192];" or "uint8_t *slide;", where the latter would
  111. be malloc'ed. In unzip, slide[] is actually a 32K area for use by
  112. inflate, which uses a 32K sliding window.
  113. */
  114. /* Tables for length and distance */
  115. static const uint16_t cplen2[] =
  116. {2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
  117. 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
  118. 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
  119. 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65};
  120. static const uint16_t cplen3[] =
  121. {3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
  122. 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
  123. 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52,
  124. 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66};
  125. static const uint8_t extra[] =
  126. {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  127. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  128. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  129. 8};
  130. static const uint16_t cpdist4[] =
  131. {1, 65, 129, 193, 257, 321, 385, 449, 513, 577, 641, 705,
  132. 769, 833, 897, 961, 1025, 1089, 1153, 1217, 1281, 1345, 1409, 1473,
  133. 1537, 1601, 1665, 1729, 1793, 1857, 1921, 1985, 2049, 2113, 2177,
  134. 2241, 2305, 2369, 2433, 2497, 2561, 2625, 2689, 2753, 2817, 2881,
  135. 2945, 3009, 3073, 3137, 3201, 3265, 3329, 3393, 3457, 3521, 3585,
  136. 3649, 3713, 3777, 3841, 3905, 3969, 4033};
  137. static const uint16_t cpdist8[] =
  138. {1, 129, 257, 385, 513, 641, 769, 897, 1025, 1153, 1281,
  139. 1409, 1537, 1665, 1793, 1921, 2049, 2177, 2305, 2433, 2561, 2689,
  140. 2817, 2945, 3073, 3201, 3329, 3457, 3585, 3713, 3841, 3969, 4097,
  141. 4225, 4353, 4481, 4609, 4737, 4865, 4993, 5121, 5249, 5377, 5505,
  142. 5633, 5761, 5889, 6017, 6145, 6273, 6401, 6529, 6657, 6785, 6913,
  143. 7041, 7169, 7297, 7425, 7553, 7681, 7809, 7937, 8065};
  144. /* Macros for inflate() bit peeking and grabbing.
  145. The usage is:
  146. NEEDBITS(j)
  147. x = b & mask_bits[j];
  148. DUMPBITS(j)
  149. where NEEDBITS makes sure that b has at least j bits in it, and
  150. DUMPBITS removes the bits from b. The macros use the variable k
  151. for the number of bits in b. Normally, b and k are register
  152. variables for speed.
  153. */
  154. #define NEEDBITS(n) {while(k<(n)){b|=((uint32_t)NEXTBYTE)<<k;k+=8;}}
  155. #define DUMPBITS(n) {b>>=(n);k-=(n);}
  156. #define Bits 16
  157. #define Nob 16
  158. #define Eob 15
  159. #define G (*Gp)
  160. static int
  161. get_tree(struct globals *Gp, unsigned *l, unsigned n)
  162. /*unsigned *l;*/ /* bit lengths */
  163. /*unsigned n;*/ /* number expected */
  164. /* Get the bit lengths for a code representation from the compressed
  165. stream. If get_tree() returns 4, then there is an error in the data.
  166. Otherwise zero is returned. */
  167. {
  168. unsigned i; /* bytes remaining in list */
  169. unsigned k; /* lengths entered */
  170. unsigned j; /* number of codes */
  171. unsigned b; /* bit length for those codes */
  172. /* get bit lengths */
  173. i = NEXTBYTE + 1; /* length/count pairs to read */
  174. k = 0; /* next code */
  175. do {
  176. b = ((j = NEXTBYTE) & 0xf) + 1; /* bits in code (1..16) */
  177. j = ((j & 0xf0) >> 4) + 1; /* codes with those bits (1..16) */
  178. if (k + j > n)
  179. return 4; /* don't overflow l[] */
  180. do {
  181. l[k++] = b;
  182. } while (--j);
  183. } while (--i);
  184. return k != n ? 4 : 0; /* should have read n of them */
  185. }
  186. static int
  187. explode_lit8(struct globals *Gp,
  188. struct huft *tb, struct huft *tl, struct huft *td,
  189. int bb, int bl, int bd)
  190. /*struct huft *tb, *tl, *td;*/ /* literal, length, and distance tables */
  191. /*int bb, bl, bd;*/ /* number of bits decoded by those */
  192. /* Decompress the imploded data using coded literals and an 8K sliding
  193. window. */
  194. {
  195. long s; /* bytes to decompress */
  196. register unsigned e; /* table entry flag/number of extra bits */
  197. unsigned n, d; /* length and index for copy */
  198. unsigned w; /* current window position */
  199. struct huft *t; /* pointer to table entry */
  200. unsigned mb, ml, md; /* masks for bb, bl, and bd bits */
  201. register uint32_t b; /* bit buffer */
  202. register unsigned k; /* number of bits in bit buffer */
  203. unsigned u; /* true if unflushed */
  204. /* explode the coded data */
  205. b = k = w = 0; /* initialize bit buffer, window */
  206. u = 1; /* buffer unflushed */
  207. mb = mask_bits[bb]; /* precompute masks for speed */
  208. ml = mask_bits[bl];
  209. md = mask_bits[bd];
  210. s = G.ucsize;
  211. while (s > 0) /* do until ucsize bytes uncompressed */
  212. {
  213. NEEDBITS(1)
  214. if (b & 1) /* then literal--decode it */
  215. {
  216. DUMPBITS(1)
  217. s--;
  218. NEEDBITS((unsigned)bb) /* get coded literal */
  219. if ((e = (t = tb + ((~(unsigned)b) & mb))->e) > 16)
  220. do {
  221. if (e == 99)
  222. return 1;
  223. DUMPBITS(t->b)
  224. e -= 16;
  225. NEEDBITS(e)
  226. } while ((e = (t = t->v.t + ((~(unsigned)b) & mask_bits[e]))->e) > 16);
  227. DUMPBITS(t->b)
  228. redirSlide[w++] = (uint8_t)t->v.n;
  229. if (w == WSIZE)
  230. {
  231. flush(&G, redirSlide, (uint32_t)w);
  232. w = u = 0;
  233. }
  234. }
  235. else /* else distance/length */
  236. {
  237. DUMPBITS(1)
  238. NEEDBITS(7) /* get distance low bits */
  239. d = (unsigned)b & 0x7f;
  240. DUMPBITS(7)
  241. NEEDBITS((unsigned)bd) /* get coded distance high bits */
  242. if ((e = (t = td + ((~(unsigned)b) & md))->e) > 16)
  243. do {
  244. if (e == 99)
  245. return 1;
  246. DUMPBITS(t->b)
  247. e -= 16;
  248. NEEDBITS(e)
  249. } while ((e = (t = t->v.t + ((~(unsigned)b) & mask_bits[e]))->e) > 16);
  250. DUMPBITS(t->b)
  251. d = w - d - t->v.n; /* construct offset */
  252. NEEDBITS((unsigned)bl) /* get coded length */
  253. if ((e = (t = tl + ((~(unsigned)b) & ml))->e) > 16)
  254. do {
  255. if (e == 99)
  256. return 1;
  257. DUMPBITS(t->b)
  258. e -= 16;
  259. NEEDBITS(e)
  260. } while ((e = (t = t->v.t + ((~(unsigned)b) & mask_bits[e]))->e) > 16);
  261. DUMPBITS(t->b)
  262. n = t->v.n;
  263. if (e) /* get length extra bits */
  264. {
  265. NEEDBITS(8)
  266. n += (unsigned)b & 0xff;
  267. DUMPBITS(8)
  268. }
  269. /* do the copy */
  270. s -= n;
  271. do {
  272. n -= (e = (e = WSIZE - ((d &= WSIZE-1) > w ? d : w)) > n ? n : e);
  273. if (u && w <= d)
  274. {
  275. memset(redirSlide + w, 0, e);
  276. w += e;
  277. d += e;
  278. }
  279. else
  280. #ifndef NOMEMCPY
  281. if (w - d >= e) /* (this test assumes unsigned comparison) */
  282. {
  283. memcpy(redirSlide + w, redirSlide + d, e);
  284. w += e;
  285. d += e;
  286. }
  287. else /* do it slow to avoid memcpy() overlap */
  288. #endif /* !NOMEMCPY */
  289. do {
  290. redirSlide[w++] = redirSlide[d++];
  291. } while (--e);
  292. if (w == WSIZE)
  293. {
  294. flush(&G, redirSlide, (uint32_t)w);
  295. w = u = 0;
  296. }
  297. } while (n);
  298. }
  299. }
  300. /* flush out redirSlide */
  301. flush(&G, redirSlide, (uint32_t)w);
  302. if (G.csize + G.incnt + (k >> 3)) /* should have read csize bytes, but */
  303. { /* sometimes read one too many: k>>3 compensates */
  304. /*G.used_csize = G.zsize - G.csize - G.incnt - (k >> 3);*/
  305. return 5;
  306. }
  307. return 0;
  308. }
  309. static int
  310. explode_lit4(struct globals *Gp,
  311. struct huft *tb, struct huft *tl, struct huft *td,
  312. int bb, int bl, int bd)
  313. /*struct huft *tb, *tl, *td;*/ /* literal, length, and distance tables */
  314. /*int bb, bl, bd;*/ /* number of bits decoded by those */
  315. /* Decompress the imploded data using coded literals and a 4K sliding
  316. window. */
  317. {
  318. long s; /* bytes to decompress */
  319. register unsigned e; /* table entry flag/number of extra bits */
  320. unsigned n, d; /* length and index for copy */
  321. unsigned w; /* current window position */
  322. struct huft *t; /* pointer to table entry */
  323. unsigned mb, ml, md; /* masks for bb, bl, and bd bits */
  324. register uint32_t b; /* bit buffer */
  325. register unsigned k; /* number of bits in bit buffer */
  326. unsigned u; /* true if unflushed */
  327. /* explode the coded data */
  328. b = k = w = 0; /* initialize bit buffer, window */
  329. u = 1; /* buffer unflushed */
  330. mb = mask_bits[bb]; /* precompute masks for speed */
  331. ml = mask_bits[bl];
  332. md = mask_bits[bd];
  333. s = G.ucsize;
  334. while (s > 0) /* do until ucsize bytes uncompressed */
  335. {
  336. NEEDBITS(1)
  337. if (b & 1) /* then literal--decode it */
  338. {
  339. DUMPBITS(1)
  340. s--;
  341. NEEDBITS((unsigned)bb) /* get coded literal */
  342. if ((e = (t = tb + ((~(unsigned)b) & mb))->e) > 16)
  343. do {
  344. if (e == 99)
  345. return 1;
  346. DUMPBITS(t->b)
  347. e -= 16;
  348. NEEDBITS(e)
  349. } while ((e = (t = t->v.t + ((~(unsigned)b) & mask_bits[e]))->e) > 16);
  350. DUMPBITS(t->b)
  351. redirSlide[w++] = (uint8_t)t->v.n;
  352. if (w == WSIZE)
  353. {
  354. flush(&G, redirSlide, (uint32_t)w);
  355. w = u = 0;
  356. }
  357. }
  358. else /* else distance/length */
  359. {
  360. DUMPBITS(1)
  361. NEEDBITS(6) /* get distance low bits */
  362. d = (unsigned)b & 0x3f;
  363. DUMPBITS(6)
  364. NEEDBITS((unsigned)bd) /* get coded distance high bits */
  365. if ((e = (t = td + ((~(unsigned)b) & md))->e) > 16)
  366. do {
  367. if (e == 99)
  368. return 1;
  369. DUMPBITS(t->b)
  370. e -= 16;
  371. NEEDBITS(e)
  372. } while ((e = (t = t->v.t + ((~(unsigned)b) & mask_bits[e]))->e) > 16);
  373. DUMPBITS(t->b)
  374. d = w - d - t->v.n; /* construct offset */
  375. NEEDBITS((unsigned)bl) /* get coded length */
  376. if ((e = (t = tl + ((~(unsigned)b) & ml))->e) > 16)
  377. do {
  378. if (e == 99)
  379. return 1;
  380. DUMPBITS(t->b)
  381. e -= 16;
  382. NEEDBITS(e)
  383. } while ((e = (t = t->v.t + ((~(unsigned)b) & mask_bits[e]))->e) > 16);
  384. DUMPBITS(t->b)
  385. n = t->v.n;
  386. if (e) /* get length extra bits */
  387. {
  388. NEEDBITS(8)
  389. n += (unsigned)b & 0xff;
  390. DUMPBITS(8)
  391. }
  392. /* do the copy */
  393. s -= n;
  394. do {
  395. n -= (e = (e = WSIZE - ((d &= WSIZE-1) > w ? d : w)) > n ? n : e);
  396. if (u && w <= d)
  397. {
  398. memset(redirSlide + w, 0, e);
  399. w += e;
  400. d += e;
  401. }
  402. else
  403. #ifndef NOMEMCPY
  404. if (w - d >= e) /* (this test assumes unsigned comparison) */
  405. {
  406. memcpy(redirSlide + w, redirSlide + d, e);
  407. w += e;
  408. d += e;
  409. }
  410. else /* do it slow to avoid memcpy() overlap */
  411. #endif /* !NOMEMCPY */
  412. do {
  413. redirSlide[w++] = redirSlide[d++];
  414. } while (--e);
  415. if (w == WSIZE)
  416. {
  417. flush(&G, redirSlide, (uint32_t)w);
  418. w = u = 0;
  419. }
  420. } while (n);
  421. }
  422. }
  423. /* flush out redirSlide */
  424. flush(&G, redirSlide, (uint32_t)w);
  425. if (G.csize + G.incnt + (k >> 3)) /* should have read csize bytes, but */
  426. { /* sometimes read one too many: k>>3 compensates */
  427. /*G.used_csize = G.zsize - G.csize - G.incnt - (k >> 3);*/
  428. return 5;
  429. }
  430. return 0;
  431. }
  432. static int
  433. explode_nolit8(struct globals *Gp,
  434. struct huft *tl, struct huft *td, int bl, int bd)
  435. /*struct huft *tl, *td;*/ /* length and distance decoder tables */
  436. /*int bl, bd;*/ /* number of bits decoded by tl[] and td[] */
  437. /* Decompress the imploded data using uncoded literals and an 8K sliding
  438. window. */
  439. {
  440. long s; /* bytes to decompress */
  441. register unsigned e; /* table entry flag/number of extra bits */
  442. unsigned n, d; /* length and index for copy */
  443. unsigned w; /* current window position */
  444. struct huft *t; /* pointer to table entry */
  445. unsigned ml, md; /* masks for bl and bd bits */
  446. register uint32_t b; /* bit buffer */
  447. register unsigned k; /* number of bits in bit buffer */
  448. unsigned u; /* true if unflushed */
  449. /* explode the coded data */
  450. b = k = w = 0; /* initialize bit buffer, window */
  451. u = 1; /* buffer unflushed */
  452. ml = mask_bits[bl]; /* precompute masks for speed */
  453. md = mask_bits[bd];
  454. s = G.ucsize;
  455. while (s > 0) /* do until ucsize bytes uncompressed */
  456. {
  457. NEEDBITS(1)
  458. if (b & 1) /* then literal--get eight bits */
  459. {
  460. DUMPBITS(1)
  461. s--;
  462. NEEDBITS(8)
  463. redirSlide[w++] = (uint8_t)b;
  464. if (w == WSIZE)
  465. {
  466. flush(&G, redirSlide, (uint32_t)w);
  467. w = u = 0;
  468. }
  469. DUMPBITS(8)
  470. }
  471. else /* else distance/length */
  472. {
  473. DUMPBITS(1)
  474. NEEDBITS(7) /* get distance low bits */
  475. d = (unsigned)b & 0x7f;
  476. DUMPBITS(7)
  477. NEEDBITS((unsigned)bd) /* get coded distance high bits */
  478. if ((e = (t = td + ((~(unsigned)b) & md))->e) > 16)
  479. do {
  480. if (e == 99)
  481. return 1;
  482. DUMPBITS(t->b)
  483. e -= 16;
  484. NEEDBITS(e)
  485. } while ((e = (t = t->v.t + ((~(unsigned)b) & mask_bits[e]))->e) > 16);
  486. DUMPBITS(t->b)
  487. d = w - d - t->v.n; /* construct offset */
  488. NEEDBITS((unsigned)bl) /* get coded length */
  489. if ((e = (t = tl + ((~(unsigned)b) & ml))->e) > 16)
  490. do {
  491. if (e == 99)
  492. return 1;
  493. DUMPBITS(t->b)
  494. e -= 16;
  495. NEEDBITS(e)
  496. } while ((e = (t = t->v.t + ((~(unsigned)b) & mask_bits[e]))->e) > 16);
  497. DUMPBITS(t->b)
  498. n = t->v.n;
  499. if (e) /* get length extra bits */
  500. {
  501. NEEDBITS(8)
  502. n += (unsigned)b & 0xff;
  503. DUMPBITS(8)
  504. }
  505. /* do the copy */
  506. s -= n;
  507. do {
  508. n -= (e = (e = WSIZE - ((d &= WSIZE-1) > w ? d : w)) > n ? n : e);
  509. if (u && w <= d)
  510. {
  511. memset(redirSlide + w, 0, e);
  512. w += e;
  513. d += e;
  514. }
  515. else
  516. #ifndef NOMEMCPY
  517. if (w - d >= e) /* (this test assumes unsigned comparison) */
  518. {
  519. memcpy(redirSlide + w, redirSlide + d, e);
  520. w += e;
  521. d += e;
  522. }
  523. else /* do it slow to avoid memcpy() overlap */
  524. #endif /* !NOMEMCPY */
  525. do {
  526. redirSlide[w++] = redirSlide[d++];
  527. } while (--e);
  528. if (w == WSIZE)
  529. {
  530. flush(&G, redirSlide, (uint32_t)w);
  531. w = u = 0;
  532. }
  533. } while (n);
  534. }
  535. }
  536. /* flush out redirSlide */
  537. flush(&G, redirSlide, (uint32_t)w);
  538. if (G.csize + G.incnt + (k >> 3)) /* should have read csize bytes, but */
  539. { /* sometimes read one too many: k>>3 compensates */
  540. /*G.used_csize = G.zsize - G.csize - G.incnt - (k >> 3);*/
  541. return 5;
  542. }
  543. return 0;
  544. }
  545. static int
  546. explode_nolit4(struct globals *Gp,
  547. struct huft *tl, struct huft *td, int bl, int bd)
  548. /*struct huft *tl, *td;*/ /* length and distance decoder tables */
  549. /*int bl, bd;*/ /* number of bits decoded by tl[] and td[] */
  550. /* Decompress the imploded data using uncoded literals and a 4K sliding
  551. window. */
  552. {
  553. long s; /* bytes to decompress */
  554. register unsigned e; /* table entry flag/number of extra bits */
  555. unsigned n, d; /* length and index for copy */
  556. unsigned w; /* current window position */
  557. struct huft *t; /* pointer to table entry */
  558. unsigned ml, md; /* masks for bl and bd bits */
  559. register uint32_t b; /* bit buffer */
  560. register unsigned k; /* number of bits in bit buffer */
  561. unsigned u; /* true if unflushed */
  562. /* explode the coded data */
  563. b = k = w = 0; /* initialize bit buffer, window */
  564. u = 1; /* buffer unflushed */
  565. ml = mask_bits[bl]; /* precompute masks for speed */
  566. md = mask_bits[bd];
  567. s = G.ucsize;
  568. while (s > 0) /* do until ucsize bytes uncompressed */
  569. {
  570. NEEDBITS(1)
  571. if (b & 1) /* then literal--get eight bits */
  572. {
  573. DUMPBITS(1)
  574. s--;
  575. NEEDBITS(8)
  576. redirSlide[w++] = (uint8_t)b;
  577. if (w == WSIZE)
  578. {
  579. flush(&G, redirSlide, (uint32_t)w);
  580. w = u = 0;
  581. }
  582. DUMPBITS(8)
  583. }
  584. else /* else distance/length */
  585. {
  586. DUMPBITS(1)
  587. NEEDBITS(6) /* get distance low bits */
  588. d = (unsigned)b & 0x3f;
  589. DUMPBITS(6)
  590. NEEDBITS((unsigned)bd) /* get coded distance high bits */
  591. if ((e = (t = td + ((~(unsigned)b) & md))->e) > 16)
  592. do {
  593. if (e == 99)
  594. return 1;
  595. DUMPBITS(t->b)
  596. e -= 16;
  597. NEEDBITS(e)
  598. } while ((e = (t = t->v.t + ((~(unsigned)b) & mask_bits[e]))->e) > 16);
  599. DUMPBITS(t->b)
  600. d = w - d - t->v.n; /* construct offset */
  601. NEEDBITS((unsigned)bl) /* get coded length */
  602. if ((e = (t = tl + ((~(unsigned)b) & ml))->e) > 16)
  603. do {
  604. if (e == 99)
  605. return 1;
  606. DUMPBITS(t->b)
  607. e -= 16;
  608. NEEDBITS(e)
  609. } while ((e = (t = t->v.t + ((~(unsigned)b) & mask_bits[e]))->e) > 16);
  610. DUMPBITS(t->b)
  611. n = t->v.n;
  612. if (e) /* get length extra bits */
  613. {
  614. NEEDBITS(8)
  615. n += (unsigned)b & 0xff;
  616. DUMPBITS(8)
  617. }
  618. /* do the copy */
  619. s -= n;
  620. do {
  621. n -= (e = (e = WSIZE - ((d &= WSIZE-1) > w ? d : w)) > n ? n : e);
  622. if (u && w <= d)
  623. {
  624. memset(redirSlide + w, 0, e);
  625. w += e;
  626. d += e;
  627. }
  628. else
  629. #ifndef NOMEMCPY
  630. if (w - d >= e) /* (this test assumes unsigned comparison) */
  631. {
  632. memcpy(redirSlide + w, redirSlide + d, e);
  633. w += e;
  634. d += e;
  635. }
  636. else /* do it slow to avoid memcpy() overlap */
  637. #endif /* !NOMEMCPY */
  638. do {
  639. redirSlide[w++] = redirSlide[d++];
  640. } while (--e);
  641. if (w == WSIZE)
  642. {
  643. flush(&G, redirSlide, (uint32_t)w);
  644. w = u = 0;
  645. }
  646. } while (n);
  647. }
  648. }
  649. /* flush out redirSlide */
  650. flush(&G, redirSlide, (uint32_t)w);
  651. if (G.csize + G.incnt + (k >> 3)) /* should have read csize bytes, but */
  652. { /* sometimes read one too many: k>>3 compensates */
  653. /*G.used_csize = G.zsize - G.csize - G.incnt - (k >> 3);*/
  654. return 5;
  655. }
  656. return 0;
  657. }
  658. #undef G
  659. int
  660. zipexplode(struct file *f, const char *tgt, int tfd, int doswap, uint32_t *crc)
  661. /* Explode an imploded compressed stream. Based on the general purpose
  662. bit flag, decide on coded or uncoded literals, and an 8K or 4K sliding
  663. window. Construct the literal (if any), length, and distance codes and
  664. the tables needed to decode them (using huft_build() from inflate.c),
  665. and call the appropriate routine for the type of data in the remainder
  666. of the stream. The four routines are nearly identical, differing only
  667. in whether the literal is decoded or simply read in, and in how many
  668. bits are read in, uncoded, for the low distance bits. */
  669. {
  670. struct globals G;
  671. unsigned r; /* return codes */
  672. struct huft *tb; /* literal code table */
  673. struct huft *tl; /* length code table */
  674. struct huft *td; /* distance code table */
  675. int bb; /* bits for tb */
  676. int bl; /* bits for tl */
  677. int bd; /* bits for td */
  678. unsigned l[256]; /* bit lengths for codes */
  679. memset(&G, 0, sizeof G);
  680. G.tgt = tgt;
  681. G.tfd = tfd;
  682. G.doswap = doswap;
  683. G.crc = crc;
  684. G.zsize = G.uzsize = f->f_csize;
  685. G.ucsize = f->f_st.st_size;
  686. /* Tune base table sizes. Note: I thought that to truly optimize speed,
  687. I would have to select different bl, bd, and bb values for different
  688. compressed file sizes. I was surprised to find out that the values of
  689. 7, 7, and 9 worked best over a very wide range of sizes, except that
  690. bd = 8 worked marginally better for large compressed sizes. */
  691. bl = 7;
  692. bd = (G.csize + G.incnt) > 200000L ? 8 : 7;
  693. /* With literal tree--minimum match length is 3 */
  694. #ifdef DEBUG
  695. G.hufts = 0; /* initialize huft's malloc'ed */
  696. #endif
  697. if (f->f_gflag & FG_BIT2)
  698. {
  699. bb = 9; /* base table size for literals */
  700. if ((r = get_tree(&G, l, 256)) != 0)
  701. goto err;
  702. if ((r = huft_build(l, 256, 256, NULL, NULL, &tb, &bb,
  703. Bits, Nob, Eob)) != 0)
  704. {
  705. if (r == 1)
  706. huft_free(tb);
  707. goto err;
  708. }
  709. if ((r = get_tree(&G, l, 64)) != 0)
  710. goto err;
  711. if ((r = huft_build(l, 64, 0, cplen3, extra, &tl, &bl,
  712. Bits, Nob, Eob)) != 0)
  713. {
  714. if (r == 1)
  715. huft_free(tl);
  716. huft_free(tb);
  717. goto err;
  718. }
  719. if ((r = get_tree(&G, l, 64)) != 0)
  720. goto err;
  721. if (f->f_gflag & FG_BIT1) /* true if 8K */
  722. {
  723. if ((r = huft_build(l, 64, 0, cpdist8, extra, &td, &bd,
  724. Bits, Nob, Eob)) != 0)
  725. {
  726. if (r == 1)
  727. huft_free(td);
  728. huft_free(tl);
  729. huft_free(tb);
  730. goto err;
  731. }
  732. r = explode_lit8(&G, tb, tl, td, bb, bl, bd);
  733. }
  734. else /* else 4K */
  735. {
  736. if ((r = huft_build(l, 64, 0, cpdist4, extra, &td, &bd,
  737. Bits, Nob, Eob)) != 0)
  738. {
  739. if (r == 1)
  740. huft_free(td);
  741. huft_free(tl);
  742. huft_free(tb);
  743. goto err;
  744. }
  745. r = explode_lit4(&G, tb, tl, td, bb, bl, bd);
  746. }
  747. huft_free(td);
  748. huft_free(tl);
  749. huft_free(tb);
  750. }
  751. else
  752. /* No literal tree--minimum match length is 2 */
  753. {
  754. if ((r = get_tree(&G, l, 64)) != 0)
  755. goto err;
  756. if ((r = huft_build(l, 64, 0, cplen2, extra, &tl, &bl,
  757. Bits, Nob, Eob)) != 0)
  758. {
  759. if (r == 1)
  760. huft_free(tl);
  761. goto err;
  762. }
  763. if ((r = get_tree(&G, l, 64)) != 0)
  764. goto err;
  765. if (f->f_gflag & FG_BIT1) /* true if 8K */
  766. {
  767. if ((r = huft_build(l, 64, 0, cpdist8, extra, &td, &bd,
  768. Bits, Nob, Eob)) != 0)
  769. {
  770. if (r == 1)
  771. huft_free(td);
  772. huft_free(tl);
  773. goto err;
  774. }
  775. r = explode_nolit8(&G, tl, td, bl, bd);
  776. }
  777. else /* else 4K */
  778. {
  779. if ((r = huft_build(l, 64, 0, cpdist4, extra, &td, &bd,
  780. Bits, Nob, Eob)) != 0)
  781. {
  782. if (r == 1)
  783. huft_free(td);
  784. huft_free(tl);
  785. goto err;
  786. }
  787. r = explode_nolit4(&G, tl, td, bl, bd);
  788. }
  789. huft_free(td);
  790. huft_free(tl);
  791. }
  792. Trace((stderr, "<%u > ", G.hufts));
  793. err:
  794. switch (r) {
  795. case 0:
  796. break;
  797. case 5:
  798. while (G.uzsize > 0)
  799. NEXTBYTE;
  800. /*FALLTHRU*/
  801. default:
  802. msg(3, 0, "compression error on \"%s\"\n", f->f_name);
  803. }
  804. return r || G.status ? -1 : 0;
  805. }
  806. /* The following code is derived from: */
  807. /* inflate.c -- put in the public domain by Mark Adler
  808. version c16b, 29 March 1998 */
  809. /* If BMAX needs to be larger than 16, then h and x[] should be uint32_t. */
  810. #define BMAX 16 /* maximum bit length of any code (16 for explode) */
  811. #define N_MAX 288 /* maximum number of codes in any set */
  812. int
  813. huft_build(const unsigned *b, unsigned n, unsigned s,
  814. const uint16_t *d, const uint8_t *e,
  815. struct huft **t, int *m,
  816. int bits, int nob, int eob)
  817. /*const unsigned *b;*/ /* code lengths in bits (all assumed <= BMAX) */
  818. /*unsigned n;*/ /* number of codes (assumed <= N_MAX) */
  819. /*unsigned s;*/ /* number of simple-valued codes (0..s-1) */
  820. /*const uint16_t *d;*/ /* list of base values for non-simple codes */
  821. /*const uint16_t *e;*/ /* list of extra bits for non-simple codes */
  822. /*struct huft **t;*/ /* result: starting table */
  823. /*int *m;*/ /* maximum lookup bits, returns actual */
  824. /* Given a list of code lengths and a maximum table size, make a set of
  825. tables to decode that set of codes. Return zero on success, one if
  826. the given code set is incomplete (the tables are still built in this
  827. case), two if the input is invalid (all zero length codes or an
  828. oversubscribed set of lengths), and three if not enough memory.
  829. The code with value 256 is special, and the tables are constructed
  830. so that no bits beyond that code are fetched when that code is
  831. decoded. */
  832. {
  833. unsigned a; /* counter for codes of length k */
  834. unsigned c[BMAX+1]; /* bit length count table */
  835. unsigned el; /* length of EOB code (value 256) */
  836. unsigned f; /* i repeats in table every f entries */
  837. int g; /* maximum code length */
  838. int h; /* table level */
  839. register unsigned i; /* counter, current code */
  840. register unsigned j; /* counter */
  841. register int k; /* number of bits in current code */
  842. int lx[BMAX+1]; /* memory for l[-1..BMAX-1] */
  843. int *l = lx+1; /* stack of bits per table */
  844. register unsigned *p; /* pointer into c[], b[], or v[] */
  845. register struct huft *q; /* points to current table */
  846. struct huft r; /* table entry for structure assignment */
  847. struct huft *u[BMAX]; /* table stack */
  848. unsigned v[N_MAX]; /* values in order of bit length */
  849. register int w; /* bits before this table == (l * h) */
  850. unsigned x[BMAX+1]; /* bit offsets, then code stack */
  851. unsigned *xp; /* pointer into x */
  852. int y; /* number of dummy codes added */
  853. unsigned z; /* number of entries in current table */
  854. /* Generate counts for each bit length */
  855. el = n > 256 ? b[256] : BMAX; /* set length of EOB code, if any */
  856. memset(c, 0, sizeof c);
  857. p = (unsigned *)b; i = n;
  858. do {
  859. c[*p]++; p++; /* assume all entries <= BMAX */
  860. } while (--i);
  861. if (c[0] == n) /* null input--all zero length codes */
  862. {
  863. *t = NULL;
  864. *m = 0;
  865. return 0;
  866. }
  867. /* Find minimum and maximum length, bound *m by those */
  868. for (j = 1; j <= BMAX; j++)
  869. if (c[j])
  870. break;
  871. k = j; /* minimum code length */
  872. if ((unsigned)*m < j)
  873. *m = j;
  874. for (i = BMAX; i; i--)
  875. if (c[i])
  876. break;
  877. g = i; /* maximum code length */
  878. if ((unsigned)*m > i)
  879. *m = i;
  880. /* Adjust last length count to fill out codes, if needed */
  881. for (y = 1 << j; j < i; j++, y <<= 1)
  882. if ((y -= c[j]) < 0)
  883. return 2; /* bad input: more codes than bits */
  884. if ((y -= c[i]) < 0)
  885. return 2;
  886. c[i] += y;
  887. /* Generate starting offsets into the value table for each length */
  888. x[1] = j = 0;
  889. p = c + 1; xp = x + 2;
  890. while (--i) { /* note that i == g from above */
  891. *xp++ = (j += *p++);
  892. }
  893. /* Make a table of values in order of bit lengths */
  894. memset(v, 0, sizeof v);
  895. p = (unsigned *)b; i = 0;
  896. do {
  897. if ((j = *p++) != 0)
  898. v[x[j]++] = i;
  899. } while (++i < n);
  900. n = x[g]; /* set n to length of v */
  901. /* Generate the Huffman codes and for each, make the table entries */
  902. x[0] = i = 0; /* first Huffman code is zero */
  903. p = v; /* grab values in bit order */
  904. h = -1; /* no tables yet--level -1 */
  905. w = l[-1] = 0; /* no bits decoded yet */
  906. u[0] = NULL; /* just to keep compilers happy */
  907. q = NULL; /* ditto */
  908. z = 0; /* ditto */
  909. /* go through the bit lengths (k already is bits in shortest code) */
  910. for (; k <= g; k++)
  911. {
  912. a = c[k];
  913. while (a--)
  914. {
  915. /* here i is the Huffman code of length k bits for value *p */
  916. /* make tables up to required level */
  917. while (k > w + l[h])
  918. {
  919. w += l[h++]; /* add bits already decoded */
  920. /* compute minimum size table less than or equal to *m bits */
  921. z = (z = g - w) > (unsigned)*m ? *m : z; /* upper limit */
  922. if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */
  923. { /* too few codes for k-w bit table */
  924. f -= a + 1; /* deduct codes from patterns left */
  925. xp = c + k;
  926. while (++j < z) /* try smaller tables up to z bits */
  927. {
  928. if ((f <<= 1) <= *++xp)
  929. break; /* enough codes to use up j bits */
  930. f -= *xp; /* else deduct codes from patterns */
  931. }
  932. }
  933. if ((unsigned)w + j > el && (unsigned)w < el)
  934. j = el - w; /* make EOB code end at table */
  935. z = 1 << j; /* table entries for j-bit table */
  936. l[h] = j; /* set table size in stack */
  937. /* allocate and link in new table */
  938. if ((q = malloc((z + 1)*sizeof(struct huft))) == NULL)
  939. {
  940. if (h)
  941. huft_free(u[0]);
  942. return 3; /* not enough memory */
  943. }
  944. #ifdef DEBUG
  945. G.hufts += z + 1; /* track memory usage */
  946. #endif
  947. *t = q + 1; /* link to list for huft_free() */
  948. *(t = &(q->v.t)) = NULL;
  949. u[h] = ++q; /* table starts after link */
  950. /* connect to last table, if there is one */
  951. if (h)
  952. {
  953. x[h] = i; /* save pattern for backing up */
  954. r.b = (uint8_t)l[h-1]; /* bits to dump before this table */
  955. r.e = (uint8_t)(bits + j); /* bits in this table */
  956. r.v.t = q; /* pointer to this table */
  957. j = (i & ((1 << w) - 1)) >> (w - l[h-1]);
  958. u[h-1][j] = r; /* connect to last table */
  959. }
  960. }
  961. /* set up table entry in r */
  962. r.b = (uint8_t)(k - w);
  963. if (p >= v + n)
  964. r.e = 99; /* out of values--invalid code */
  965. else if (*p < s)
  966. {
  967. r.e = (uint8_t)(*p < 256 ? nob : eob); /* 256 is end-of-block code */
  968. r.v.n = (uint16_t)*p++; /* simple code is just the value */
  969. }
  970. else
  971. {
  972. r.e = (uint8_t)e[*p - s]; /* non-simple--look up in lists */
  973. r.v.n = d[*p++ - s];
  974. }
  975. /* fill code-like entries with r */
  976. f = 1 << (k - w);
  977. for (j = i >> w; j < z; j += f)
  978. q[j] = r;
  979. /* backwards increment the k-bit code i */
  980. for (j = 1 << (k - 1); i & j; j >>= 1)
  981. i ^= j;
  982. i ^= j;
  983. /* backup over finished tables */
  984. while ((i & ((1 << w) - 1)) != x[h])
  985. w -= l[--h]; /* don't need to update q */
  986. }
  987. }
  988. /* return actual size of base table */
  989. *m = l[0];
  990. /* Return true (1) if we were given an incomplete table */
  991. return y != 0 && g != 1;
  992. }
  993. void
  994. huft_free(struct huft *t)
  995. /*struct huft *t;*/ /* table to free */
  996. /* Free the malloc'ed tables built by huft_build(), which makes a linked
  997. list of the tables it made, with the links in a dummy first entry of
  998. each table. */
  999. {
  1000. register struct huft *p, *q;
  1001. /* Go through linked list, freeing from the malloced (t[-1]) address. */
  1002. p = t;
  1003. while (p != NULL)
  1004. {
  1005. q = (--p)->v.t;
  1006. free(p);
  1007. p = q;
  1008. }
  1009. }
  1010. const uint16_t mask_bits[] = {
  1011. 0x0000,
  1012. 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff,
  1013. 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff
  1014. };
  1015. void
  1016. flush(struct globals *Gp, const void *data, size_t size)
  1017. {
  1018. if (Gp->tfd>=0 && write(Gp->tfd, data, size) != size) {
  1019. emsg(3, "Cannot write \"%s\"", Gp->tgt);
  1020. Gp->tfd = -1;
  1021. Gp->status = -1;
  1022. }
  1023. *Gp->crc = zipcrc(*Gp->crc, data, size);
  1024. }
  1025. int
  1026. readbyte(struct globals *Gp)
  1027. {
  1028. if (Gp->uzsize <= 0)
  1029. return EOF;
  1030. Gp->incnt = bread((char *)Gp->inbuf,
  1031. Gp->uzsize>sizeof Gp->inbuf?sizeof Gp->inbuf:Gp->uzsize);
  1032. if (Gp->incnt <= 0)
  1033. unexeoa();
  1034. if (Gp->doswap)
  1035. swap((char *)Gp->inbuf, Gp->incnt, bflag||sflag,bflag||Sflag);
  1036. Gp->uzsize -= Gp->incnt;
  1037. Gp->incnt--;
  1038. Gp->inptr = Gp->inbuf;
  1039. return (int)(*Gp->inptr++);
  1040. }