Line data Source code
1 : /* SPDX-License-Identifier: Zlib */
2 :
3 : /*
4 : * tinflate - tiny inflate
5 : *
6 : * Copyright (c) 2003-2019 Joergen Ibsen
7 : *
8 : * This software is provided 'as-is', without any express or implied
9 : * warranty. In no event will the authors be held liable for any damages
10 : * arising from the use of this software.
11 : *
12 : * Permission is granted to anyone to use this software for any purpose,
13 : * including commercial applications, and to alter it and redistribute it
14 : * freely, subject to the following restrictions:
15 : *
16 : * 1. The origin of this software must not be misrepresented; you must
17 : * not claim that you wrote the original software. If you use this
18 : * software in a product, an acknowledgment in the product
19 : * documentation would be appreciated but is not required.
20 : *
21 : * 2. Altered source versions must be plainly marked as such, and must
22 : * not be misrepresented as being the original software.
23 : *
24 : * 3. This notice may not be removed or altered from any source
25 : * distribution.
26 : */
27 :
28 : #include "tinf.h"
29 :
30 : #include <assert.h>
31 : #include <limits.h>
32 :
33 : #if defined(UINT_MAX) && (UINT_MAX) < 0xFFFFFFFFUL
34 : # error "tinf requires unsigned int to be at least 32-bit"
35 : #endif
36 :
37 : /* -- Internal data structures -- */
38 :
39 : struct tinf_tree {
40 : unsigned short counts[16]; /* Number of codes with a given length */
41 : unsigned short symbols[288]; /* Symbols sorted by code */
42 : int max_sym;
43 : };
44 :
45 : struct tinf_data {
46 : const unsigned char *source;
47 : const unsigned char *source_end;
48 : unsigned int tag;
49 : int bitcount;
50 : int overflow;
51 :
52 : unsigned char *dest_start;
53 : unsigned char *dest;
54 : unsigned char *dest_end;
55 :
56 : struct tinf_tree ltree; /* Literal/length tree */
57 : struct tinf_tree dtree; /* Distance tree */
58 : };
59 :
60 : /* -- Utility functions -- */
61 :
62 8 : static unsigned int read_le16(const unsigned char *p)
63 : {
64 8 : return ((unsigned int) p[0])
65 8 : | ((unsigned int) p[1] << 8);
66 : }
67 :
68 : /* Build fixed Huffman trees */
69 2 : static void tinf_build_fixed_trees(struct tinf_tree *lt, struct tinf_tree *dt)
70 : {
71 : int i;
72 :
73 : /* Build fixed literal/length tree */
74 34 : for (i = 0; i < 16; ++i) {
75 32 : lt->counts[i] = 0;
76 : }
77 :
78 2 : lt->counts[7] = 24;
79 2 : lt->counts[8] = 152;
80 2 : lt->counts[9] = 112;
81 :
82 50 : for (i = 0; i < 24; ++i) {
83 48 : lt->symbols[i] = 256 + i;
84 : }
85 290 : for (i = 0; i < 144; ++i) {
86 288 : lt->symbols[24 + i] = i;
87 : }
88 18 : for (i = 0; i < 8; ++i) {
89 16 : lt->symbols[24 + 144 + i] = 280 + i;
90 : }
91 226 : for (i = 0; i < 112; ++i) {
92 224 : lt->symbols[24 + 144 + 8 + i] = 144 + i;
93 : }
94 :
95 2 : lt->max_sym = 285;
96 :
97 : /* Build fixed distance tree */
98 34 : for (i = 0; i < 16; ++i) {
99 32 : dt->counts[i] = 0;
100 : }
101 :
102 2 : dt->counts[5] = 32;
103 :
104 66 : for (i = 0; i < 32; ++i) {
105 64 : dt->symbols[i] = i;
106 : }
107 :
108 2 : dt->max_sym = 29;
109 2 : }
110 :
111 : /* Given an array of code lengths, build a tree */
112 0 : static int tinf_build_tree(struct tinf_tree *t, const unsigned char *lengths,
113 : unsigned int num)
114 : {
115 : unsigned short offs[16];
116 : unsigned int i, num_codes, available;
117 :
118 0 : assert(num <= 288);
119 :
120 0 : for (i = 0; i < 16; ++i) {
121 0 : t->counts[i] = 0;
122 : }
123 :
124 0 : t->max_sym = -1;
125 :
126 : /* Count number of codes for each non-zero length */
127 0 : for (i = 0; i < num; ++i) {
128 0 : assert(lengths[i] <= 15);
129 :
130 0 : if (lengths[i]) {
131 0 : t->max_sym = i;
132 0 : t->counts[lengths[i]]++;
133 : }
134 : }
135 :
136 : /* Compute offset table for distribution sort */
137 0 : for (available = 1, num_codes = 0, i = 0; i < 16; ++i) {
138 0 : unsigned int used = t->counts[i];
139 :
140 : /* Check length contains no more codes than available */
141 0 : if (used > available) {
142 0 : return TINF_DATA_ERROR;
143 : }
144 0 : available = 2 * (available - used);
145 :
146 0 : offs[i] = num_codes;
147 0 : num_codes += used;
148 : }
149 :
150 : /*
151 : * Check all codes were used, or for the special case of only one
152 : * code that it has length 1
153 : */
154 0 : if ((num_codes > 1 && available > 0)
155 0 : || (num_codes == 1 && t->counts[1] != 1)) {
156 0 : return TINF_DATA_ERROR;
157 : }
158 :
159 : /* Fill in symbols sorted by code */
160 0 : for (i = 0; i < num; ++i) {
161 0 : if (lengths[i]) {
162 0 : t->symbols[offs[lengths[i]]++] = i;
163 : }
164 : }
165 :
166 : /*
167 : * For the special case of only one code (which will be 0) add a
168 : * code 1 which results in a symbol that is too large
169 : */
170 0 : if (num_codes == 1) {
171 0 : t->counts[1] = 2;
172 0 : t->symbols[1] = t->max_sym + 1;
173 : }
174 :
175 0 : return TINF_OK;
176 : }
177 :
178 : /* -- Decode functions -- */
179 :
180 266 : static void tinf_refill(struct tinf_data *d, int num)
181 : {
182 266 : assert(num >= 0 && num <= 32);
183 :
184 : /* Read bytes until at least num bits available */
185 304 : while (d->bitcount < num) {
186 38 : if (d->source != d->source_end) {
187 38 : d->tag |= (unsigned int) *d->source++ << d->bitcount;
188 : }
189 : else {
190 0 : d->overflow = 1;
191 : }
192 38 : d->bitcount += 8;
193 : }
194 :
195 266 : assert(d->bitcount <= 32);
196 266 : }
197 :
198 266 : static unsigned int tinf_getbits_no_refill(struct tinf_data *d, int num)
199 : {
200 : unsigned int bits;
201 :
202 266 : assert(num >= 0 && num <= d->bitcount);
203 :
204 : /* Get bits from tag */
205 266 : bits = d->tag & ((1UL << num) - 1);
206 :
207 : /* Remove bits from tag */
208 266 : d->tag >>= num;
209 266 : d->bitcount -= num;
210 :
211 266 : return bits;
212 : }
213 :
214 : /* Get num bits from source stream */
215 266 : static unsigned int tinf_getbits(struct tinf_data *d, int num)
216 : {
217 266 : tinf_refill(d, num);
218 266 : return tinf_getbits_no_refill(d, num);
219 : }
220 :
221 : /* Read a num bit value from stream and add base */
222 0 : static unsigned int tinf_getbits_base(struct tinf_data *d, int num, int base)
223 : {
224 0 : return base + (num ? tinf_getbits(d, num) : 0);
225 : }
226 :
227 : /* Given a data stream and a tree, decode a symbol */
228 32 : static int tinf_decode_symbol(struct tinf_data *d, const struct tinf_tree *t)
229 : {
230 32 : int base = 0, offs = 0;
231 : int len;
232 :
233 : /*
234 : * Get more bits while code index is above number of codes
235 : *
236 : * Rather than the actual code, we are computing the position of the
237 : * code in the sorted order of codes, which is the index of the
238 : * corresponding symbol.
239 : *
240 : * Conceptually, for each code length (level in the tree), there are
241 : * counts[len] leaves on the left and internal nodes on the right.
242 : * The index we have decoded so far is base + offs, and if that
243 : * falls within the leaves we are done. Otherwise we adjust the range
244 : * of offs and add one more bit to it.
245 : */
246 32 : for (len = 1; ; ++len) {
247 254 : offs = 2 * offs + tinf_getbits(d, 1);
248 :
249 254 : assert(len <= 15);
250 :
251 254 : if (offs < t->counts[len]) {
252 32 : break;
253 : }
254 :
255 222 : base += t->counts[len];
256 222 : offs -= t->counts[len];
257 : }
258 :
259 32 : assert(base + offs >= 0 && base + offs < 288);
260 :
261 32 : return t->symbols[base + offs];
262 : }
263 :
264 : /* Given a data stream, decode dynamic trees from it */
265 0 : static int tinf_decode_trees(struct tinf_data *d, struct tinf_tree *lt,
266 : struct tinf_tree *dt)
267 : {
268 : unsigned char lengths[288 + 32];
269 :
270 : /* Special ordering of code length codes */
271 : static const unsigned char clcidx[19] = {
272 : 16, 17, 18, 0, 8, 7, 9, 6, 10, 5,
273 : 11, 4, 12, 3, 13, 2, 14, 1, 15
274 : };
275 :
276 : unsigned int hlit, hdist, hclen;
277 : unsigned int i, num, length;
278 : int res;
279 :
280 : /* Get 5 bits HLIT (257-286) */
281 0 : hlit = tinf_getbits_base(d, 5, 257);
282 :
283 : /* Get 5 bits HDIST (1-32) */
284 0 : hdist = tinf_getbits_base(d, 5, 1);
285 :
286 : /* Get 4 bits HCLEN (4-19) */
287 0 : hclen = tinf_getbits_base(d, 4, 4);
288 :
289 : /*
290 : * The RFC limits the range of HLIT to 286, but lists HDIST as range
291 : * 1-32, even though distance codes 30 and 31 have no meaning. While
292 : * we could allow the full range of HLIT and HDIST to make it possible
293 : * to decode the fixed trees with this function, we consider it an
294 : * error here.
295 : *
296 : * See also: https://github.com/madler/zlib/issues/82
297 : */
298 0 : if (hlit > 286 || hdist > 30) {
299 0 : return TINF_DATA_ERROR;
300 : }
301 :
302 0 : for (i = 0; i < 19; ++i) {
303 0 : lengths[i] = 0;
304 : }
305 :
306 : /* Read code lengths for code length alphabet */
307 0 : for (i = 0; i < hclen; ++i) {
308 : /* Get 3 bits code length (0-7) */
309 0 : unsigned int clen = tinf_getbits(d, 3);
310 :
311 0 : lengths[clcidx[i]] = clen;
312 : }
313 :
314 : /* Build code length tree (in literal/length tree to save space) */
315 0 : res = tinf_build_tree(lt, lengths, 19);
316 :
317 0 : if (res != TINF_OK) {
318 0 : return res;
319 : }
320 :
321 : /* Check code length tree is not empty */
322 0 : if (lt->max_sym == -1) {
323 0 : return TINF_DATA_ERROR;
324 : }
325 :
326 : /* Decode code lengths for the dynamic trees */
327 0 : for (num = 0; num < hlit + hdist; ) {
328 0 : int sym = tinf_decode_symbol(d, lt);
329 :
330 0 : if (sym > lt->max_sym) {
331 0 : return TINF_DATA_ERROR;
332 : }
333 :
334 0 : switch (sym) {
335 0 : case 16:
336 : /* Copy previous code length 3-6 times (read 2 bits) */
337 0 : if (num == 0) {
338 0 : return TINF_DATA_ERROR;
339 : }
340 0 : sym = lengths[num - 1];
341 0 : length = tinf_getbits_base(d, 2, 3);
342 0 : break;
343 0 : case 17:
344 : /* Repeat code length 0 for 3-10 times (read 3 bits) */
345 0 : sym = 0;
346 0 : length = tinf_getbits_base(d, 3, 3);
347 0 : break;
348 0 : case 18:
349 : /* Repeat code length 0 for 11-138 times (read 7 bits) */
350 0 : sym = 0;
351 0 : length = tinf_getbits_base(d, 7, 11);
352 0 : break;
353 0 : default:
354 : /* Values 0-15 represent the actual code lengths */
355 0 : length = 1;
356 0 : break;
357 : }
358 :
359 0 : if (length > hlit + hdist - num) {
360 0 : return TINF_DATA_ERROR;
361 : }
362 :
363 0 : while (length--) {
364 0 : lengths[num++] = sym;
365 : }
366 : }
367 :
368 : /* Check EOB symbol is present */
369 0 : if (lengths[256] == 0) {
370 0 : return TINF_DATA_ERROR;
371 : }
372 :
373 : /* Build dynamic trees */
374 0 : res = tinf_build_tree(lt, lengths, hlit);
375 :
376 0 : if (res != TINF_OK) {
377 0 : return res;
378 : }
379 :
380 0 : res = tinf_build_tree(dt, lengths + hlit, hdist);
381 :
382 0 : if (res != TINF_OK) {
383 0 : return res;
384 : }
385 :
386 0 : return TINF_OK;
387 : }
388 :
389 : /* -- Block inflate functions -- */
390 :
391 : /* Given a stream and two trees, inflate a block of data */
392 2 : static int tinf_inflate_block_data(struct tinf_data *d, struct tinf_tree *lt,
393 : struct tinf_tree *dt)
394 : {
395 : /* Extra bits and base tables for length codes */
396 : static const unsigned char length_bits[30] = {
397 : 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
398 : 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
399 : 4, 4, 4, 4, 5, 5, 5, 5, 0, 127
400 : };
401 :
402 : static const unsigned short length_base[30] = {
403 : 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
404 : 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
405 : 67, 83, 99, 115, 131, 163, 195, 227, 258, 0
406 : };
407 :
408 : /* Extra bits and base tables for distance codes */
409 : static const unsigned char dist_bits[30] = {
410 : 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
411 : 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
412 : 9, 9, 10, 10, 11, 11, 12, 12, 13, 13
413 : };
414 :
415 : static const unsigned short dist_base[30] = {
416 : 1, 2, 3, 4, 5, 7, 9, 13, 17, 25,
417 : 33, 49, 65, 97, 129, 193, 257, 385, 513, 769,
418 : 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
419 : };
420 :
421 30 : for (;;) {
422 32 : int sym = tinf_decode_symbol(d, lt);
423 :
424 : /* Check for overflow in bit reader */
425 32 : if (d->overflow) {
426 0 : return TINF_DATA_ERROR;
427 : }
428 :
429 32 : if (sym < 256) {
430 30 : if (d->dest == d->dest_end) {
431 0 : return TINF_BUF_ERROR;
432 : }
433 30 : *d->dest++ = sym;
434 : }
435 : else {
436 : int length, dist, offs;
437 : int i;
438 :
439 : /* Check for end of block */
440 2 : if (sym == 256) {
441 2 : return TINF_OK;
442 : }
443 :
444 : /* Check sym is within range and distance tree is not empty */
445 0 : if (sym > lt->max_sym || sym - 257 > 28 || dt->max_sym == -1) {
446 0 : return TINF_DATA_ERROR;
447 : }
448 :
449 0 : sym -= 257;
450 :
451 : /* Possibly get more bits from length code */
452 0 : length = tinf_getbits_base(d, length_bits[sym],
453 0 : length_base[sym]);
454 :
455 0 : dist = tinf_decode_symbol(d, dt);
456 :
457 : /* Check dist is within range */
458 0 : if (dist > dt->max_sym || dist > 29) {
459 0 : return TINF_DATA_ERROR;
460 : }
461 :
462 : /* Possibly get more bits from distance code */
463 0 : offs = tinf_getbits_base(d, dist_bits[dist],
464 0 : dist_base[dist]);
465 :
466 0 : if (offs > d->dest - d->dest_start) {
467 0 : return TINF_DATA_ERROR;
468 : }
469 :
470 0 : if (d->dest_end - d->dest < length) {
471 0 : return TINF_BUF_ERROR;
472 : }
473 :
474 : /* Copy match */
475 0 : for (i = 0; i < length; ++i) {
476 0 : d->dest[i] = d->dest[i - offs];
477 : }
478 :
479 0 : d->dest += length;
480 : }
481 : }
482 : }
483 :
484 : /* Inflate an uncompressed block of data */
485 4 : static int tinf_inflate_uncompressed_block(struct tinf_data *d)
486 : {
487 : unsigned int length, invlength;
488 :
489 4 : if (d->source_end - d->source < 4) {
490 0 : return TINF_DATA_ERROR;
491 : }
492 :
493 : /* Get length */
494 4 : length = read_le16(d->source);
495 :
496 : /* Get one's complement of length */
497 4 : invlength = read_le16(d->source + 2);
498 :
499 : /* Check length */
500 4 : if (length != (~invlength & 0x0000FFFF)) {
501 0 : return TINF_DATA_ERROR;
502 : }
503 :
504 4 : d->source += 4;
505 :
506 4 : if (d->source_end - d->source < length) {
507 0 : return TINF_DATA_ERROR;
508 : }
509 :
510 4 : if (d->dest_end - d->dest < length) {
511 0 : return TINF_BUF_ERROR;
512 : }
513 :
514 : /* Copy block */
515 54916 : while (length--) {
516 54912 : *d->dest++ = *d->source++;
517 : }
518 :
519 : /* Make sure we start next block on a byte boundary */
520 4 : d->tag = 0;
521 4 : d->bitcount = 0;
522 :
523 4 : return TINF_OK;
524 : }
525 :
526 : /* Inflate a block of data compressed with fixed Huffman trees */
527 2 : static int tinf_inflate_fixed_block(struct tinf_data *d)
528 : {
529 : /* Build fixed Huffman trees */
530 2 : tinf_build_fixed_trees(&d->ltree, &d->dtree);
531 :
532 : /* Decode block using fixed trees */
533 2 : return tinf_inflate_block_data(d, &d->ltree, &d->dtree);
534 : }
535 :
536 : /* Inflate a block of data compressed with dynamic Huffman trees */
537 0 : static int tinf_inflate_dynamic_block(struct tinf_data *d)
538 : {
539 : /* Decode trees from stream */
540 0 : int res = tinf_decode_trees(d, &d->ltree, &d->dtree);
541 :
542 0 : if (res != TINF_OK) {
543 0 : return res;
544 : }
545 :
546 : /* Decode block using decoded trees */
547 0 : return tinf_inflate_block_data(d, &d->ltree, &d->dtree);
548 : }
549 :
550 : /* -- Public functions -- */
551 :
552 : /* Initialize global (static) data */
553 0 : void tinf_init(void)
554 : {
555 0 : return;
556 : }
557 :
558 : /* Inflate stream from source to dest */
559 6 : int tinf_uncompress(void *dest, unsigned int *destLen,
560 : const void *source, unsigned int sourceLen)
561 : {
562 : struct tinf_data d;
563 : int bfinal;
564 :
565 : /* Initialise data */
566 6 : d.source = (const unsigned char *) source;
567 6 : d.source_end = d.source + sourceLen;
568 6 : d.tag = 0;
569 6 : d.bitcount = 0;
570 6 : d.overflow = 0;
571 :
572 6 : d.dest = (unsigned char *) dest;
573 6 : d.dest_start = d.dest;
574 6 : d.dest_end = d.dest + *destLen;
575 :
576 : do {
577 : unsigned int btype;
578 : int res;
579 :
580 : /* Read final block flag */
581 6 : bfinal = tinf_getbits(&d, 1);
582 :
583 : /* Read block type (2 bits) */
584 6 : btype = tinf_getbits(&d, 2);
585 :
586 : /* Decompress block */
587 6 : switch (btype) {
588 4 : case 0:
589 : /* Decompress uncompressed block */
590 4 : res = tinf_inflate_uncompressed_block(&d);
591 4 : break;
592 2 : case 1:
593 : /* Decompress block with fixed Huffman trees */
594 2 : res = tinf_inflate_fixed_block(&d);
595 2 : break;
596 0 : case 2:
597 : /* Decompress block with dynamic Huffman trees */
598 0 : res = tinf_inflate_dynamic_block(&d);
599 0 : break;
600 0 : default:
601 0 : res = TINF_DATA_ERROR;
602 0 : break;
603 : }
604 :
605 6 : if (res != TINF_OK) {
606 0 : return res;
607 : }
608 6 : } while (!bfinal);
609 :
610 : /* Check for overflow in bit reader */
611 6 : if (d.overflow) {
612 0 : return TINF_DATA_ERROR;
613 : }
614 :
615 6 : *destLen = d.dest - d.dest_start;
616 :
617 6 : return TINF_OK;
618 : }
619 :
620 : /* clang -g -O1 -fsanitize=fuzzer,address -DTINF_FUZZING tinflate.c */
621 : #if defined(TINF_FUZZING)
622 : #include <limits.h>
623 : #include <stddef.h>
624 : #include <stdint.h>
625 : #include <stdlib.h>
626 : #include <string.h>
627 :
628 : unsigned char depacked[64 * 1024];
629 :
630 : extern int
631 : LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
632 : {
633 : if (size > UINT_MAX / 2) { return 0; }
634 : unsigned int destLen = sizeof(depacked);
635 : tinf_uncompress(depacked, &destLen, data, size);
636 : return 0;
637 : }
638 : #endif
|