bzip2/decompress.c
Mark Wielaard 812a898b76 Accept as many selectors as the file format allows.
But ignore any larger than the theoretical maximum, BZ_MAX_SELECTORS.

The theoretical maximum number of selectors depends on the maximum
blocksize (900000 bytes) and the number of symbols (50) that can be
encoded with a different Huffman tree. BZ_MAX_SELECTORS is 18002.

But the bzip2 file format allows the number of selectors to be encoded
with 15 bits (because 18002 isn't a factor of 2 and doesn't fit in
14 bits). So the file format maximum is 32767 selectors.

Some bzip2 encoders might actually have written out more selectors
than the theoretical maximum because they rounded up the number of
selectors to some convenient factor of 8.

The extra 14766 selectors can never be validly used by the decompression
algorithm. So we can read them, but then discard them.

This is effectively what was done (by accident) before we added a
check for nSelectors to be at most BZ_MAX_SELECTORS to mitigate
CVE-2019-12900.

The extra selectors were written out after the array inside the
EState struct. But the struct has extra space allocated after the
selector arrays of 18060 bytes (which is larger than 14766).
All of which will be initialized later (so the overwrite of that
space with extra selector values would have been harmless).
2019-07-10 14:39:32 -05:00

653 lines
21 KiB
C

/*-------------------------------------------------------------*/
/*--- Decompression machinery ---*/
/*--- decompress.c ---*/
/*-------------------------------------------------------------*/
/* ------------------------------------------------------------------
This file is part of bzip2/libbzip2, a program and library for
lossless, block-sorting data compression.
bzip2/libbzip2 version 1.0.6 of 6 September 2010
Copyright (C) 1996-2010 Julian Seward <jseward@acm.org>
Please read the WARNING, DISCLAIMER and PATENTS sections in the
README file.
This program is released under the terms of the license contained
in the file LICENSE.
------------------------------------------------------------------ */
#include "bzlib_private.h"
/*---------------------------------------------------*/
static
void makeMaps_d ( DState* s )
{
Int32 i;
s->nInUse = 0;
for (i = 0; i < 256; i++)
if (s->inUse[i]) {
s->seqToUnseq[s->nInUse] = i;
s->nInUse++;
}
}
/*---------------------------------------------------*/
#define RETURN(rrr) \
{ retVal = rrr; goto save_state_and_return; };
#define GET_BITS(lll,vvv,nnn) \
case lll: s->state = lll; \
while (True) { \
if (s->bsLive >= nnn) { \
UInt32 v; \
v = (s->bsBuff >> \
(s->bsLive-nnn)) & ((1 << nnn)-1); \
s->bsLive -= nnn; \
vvv = v; \
break; \
} \
if (s->strm->avail_in == 0) RETURN(BZ_OK); \
s->bsBuff \
= (s->bsBuff << 8) | \
((UInt32) \
(*((UChar*)(s->strm->next_in)))); \
s->bsLive += 8; \
s->strm->next_in++; \
s->strm->avail_in--; \
s->strm->total_in_lo32++; \
if (s->strm->total_in_lo32 == 0) \
s->strm->total_in_hi32++; \
}
#define GET_UCHAR(lll,uuu) \
GET_BITS(lll,uuu,8)
#define GET_BIT(lll,uuu) \
GET_BITS(lll,uuu,1)
/*---------------------------------------------------*/
#define GET_MTF_VAL(label1,label2,lval) \
{ \
if (groupPos == 0) { \
groupNo++; \
if (groupNo >= nSelectors) \
RETURN(BZ_DATA_ERROR); \
groupPos = BZ_G_SIZE; \
gSel = s->selector[groupNo]; \
gMinlen = s->minLens[gSel]; \
gLimit = &(s->limit[gSel][0]); \
gPerm = &(s->perm[gSel][0]); \
gBase = &(s->base[gSel][0]); \
} \
groupPos--; \
zn = gMinlen; \
GET_BITS(label1, zvec, zn); \
while (1) { \
if (zn > 20 /* the longest code */) \
RETURN(BZ_DATA_ERROR); \
if (zvec <= gLimit[zn]) break; \
zn++; \
GET_BIT(label2, zj); \
zvec = (zvec << 1) | zj; \
}; \
if (zvec - gBase[zn] < 0 \
|| zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \
RETURN(BZ_DATA_ERROR); \
lval = gPerm[zvec - gBase[zn]]; \
}
/*---------------------------------------------------*/
Int32 BZ2_decompress ( DState* s )
{
UChar uc;
Int32 retVal;
Int32 minLen, maxLen;
bz_stream* strm = s->strm;
/* stuff that needs to be saved/restored */
Int32 i;
Int32 j;
Int32 t;
Int32 alphaSize;
Int32 nGroups;
Int32 nSelectors;
Int32 EOB;
Int32 groupNo;
Int32 groupPos;
Int32 nextSym;
Int32 nblockMAX;
Int32 nblock;
Int32 es;
Int32 N;
Int32 curr;
Int32 zt;
Int32 zn;
Int32 zvec;
Int32 zj;
Int32 gSel;
Int32 gMinlen;
Int32* gLimit;
Int32* gBase;
Int32* gPerm;
if (s->state == BZ_X_MAGIC_1) {
/*initialise the save area*/
s->save_i = 0;
s->save_j = 0;
s->save_t = 0;
s->save_alphaSize = 0;
s->save_nGroups = 0;
s->save_nSelectors = 0;
s->save_EOB = 0;
s->save_groupNo = 0;
s->save_groupPos = 0;
s->save_nextSym = 0;
s->save_nblockMAX = 0;
s->save_nblock = 0;
s->save_es = 0;
s->save_N = 0;
s->save_curr = 0;
s->save_zt = 0;
s->save_zn = 0;
s->save_zvec = 0;
s->save_zj = 0;
s->save_gSel = 0;
s->save_gMinlen = 0;
s->save_gLimit = NULL;
s->save_gBase = NULL;
s->save_gPerm = NULL;
}
/*restore from the save area*/
i = s->save_i;
j = s->save_j;
t = s->save_t;
alphaSize = s->save_alphaSize;
nGroups = s->save_nGroups;
nSelectors = s->save_nSelectors;
EOB = s->save_EOB;
groupNo = s->save_groupNo;
groupPos = s->save_groupPos;
nextSym = s->save_nextSym;
nblockMAX = s->save_nblockMAX;
nblock = s->save_nblock;
es = s->save_es;
N = s->save_N;
curr = s->save_curr;
zt = s->save_zt;
zn = s->save_zn;
zvec = s->save_zvec;
zj = s->save_zj;
gSel = s->save_gSel;
gMinlen = s->save_gMinlen;
gLimit = s->save_gLimit;
gBase = s->save_gBase;
gPerm = s->save_gPerm;
retVal = BZ_OK;
switch (s->state) {
GET_UCHAR(BZ_X_MAGIC_1, uc);
if (uc != BZ_HDR_B) RETURN(BZ_DATA_ERROR_MAGIC);
GET_UCHAR(BZ_X_MAGIC_2, uc);
if (uc != BZ_HDR_Z) RETURN(BZ_DATA_ERROR_MAGIC);
GET_UCHAR(BZ_X_MAGIC_3, uc)
if (uc != BZ_HDR_h) RETURN(BZ_DATA_ERROR_MAGIC);
GET_BITS(BZ_X_MAGIC_4, s->blockSize100k, 8)
if (s->blockSize100k < (BZ_HDR_0 + 1) ||
s->blockSize100k > (BZ_HDR_0 + 9)) RETURN(BZ_DATA_ERROR_MAGIC);
s->blockSize100k -= BZ_HDR_0;
if (s->smallDecompress) {
s->ll16 = BZALLOC( s->blockSize100k * 100000 * sizeof(UInt16) );
s->ll4 = BZALLOC(
((1 + s->blockSize100k * 100000) >> 1) * sizeof(UChar)
);
if (s->ll16 == NULL || s->ll4 == NULL) RETURN(BZ_MEM_ERROR);
} else {
s->tt = BZALLOC( s->blockSize100k * 100000 * sizeof(Int32) );
if (s->tt == NULL) RETURN(BZ_MEM_ERROR);
}
GET_UCHAR(BZ_X_BLKHDR_1, uc);
if (uc == 0x17) goto endhdr_2;
if (uc != 0x31) RETURN(BZ_DATA_ERROR);
GET_UCHAR(BZ_X_BLKHDR_2, uc);
if (uc != 0x41) RETURN(BZ_DATA_ERROR);
GET_UCHAR(BZ_X_BLKHDR_3, uc);
if (uc != 0x59) RETURN(BZ_DATA_ERROR);
GET_UCHAR(BZ_X_BLKHDR_4, uc);
if (uc != 0x26) RETURN(BZ_DATA_ERROR);
GET_UCHAR(BZ_X_BLKHDR_5, uc);
if (uc != 0x53) RETURN(BZ_DATA_ERROR);
GET_UCHAR(BZ_X_BLKHDR_6, uc);
if (uc != 0x59) RETURN(BZ_DATA_ERROR);
s->currBlockNo++;
if (s->verbosity >= 2)
VPrintf1 ( "\n [%d: huff+mtf ", s->currBlockNo );
s->storedBlockCRC = 0;
GET_UCHAR(BZ_X_BCRC_1, uc);
s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
GET_UCHAR(BZ_X_BCRC_2, uc);
s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
GET_UCHAR(BZ_X_BCRC_3, uc);
s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
GET_UCHAR(BZ_X_BCRC_4, uc);
s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
GET_BITS(BZ_X_RANDBIT, s->blockRandomised, 1);
s->origPtr = 0;
GET_UCHAR(BZ_X_ORIGPTR_1, uc);
s->origPtr = (s->origPtr << 8) | ((Int32)uc);
GET_UCHAR(BZ_X_ORIGPTR_2, uc);
s->origPtr = (s->origPtr << 8) | ((Int32)uc);
GET_UCHAR(BZ_X_ORIGPTR_3, uc);
s->origPtr = (s->origPtr << 8) | ((Int32)uc);
if (s->origPtr < 0)
RETURN(BZ_DATA_ERROR);
if (s->origPtr > 10 + 100000*s->blockSize100k)
RETURN(BZ_DATA_ERROR);
/*--- Receive the mapping table ---*/
for (i = 0; i < 16; i++) {
GET_BIT(BZ_X_MAPPING_1, uc);
if (uc == 1)
s->inUse16[i] = True; else
s->inUse16[i] = False;
}
for (i = 0; i < 256; i++) s->inUse[i] = False;
for (i = 0; i < 16; i++)
if (s->inUse16[i])
for (j = 0; j < 16; j++) {
GET_BIT(BZ_X_MAPPING_2, uc);
if (uc == 1) s->inUse[i * 16 + j] = True;
}
makeMaps_d ( s );
if (s->nInUse == 0) RETURN(BZ_DATA_ERROR);
alphaSize = s->nInUse+2;
/*--- Now the selectors ---*/
GET_BITS(BZ_X_SELECTOR_1, nGroups, 3);
if (nGroups < 2 || nGroups > BZ_N_GROUPS) RETURN(BZ_DATA_ERROR);
GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15);
if (nSelectors < 1) RETURN(BZ_DATA_ERROR);
for (i = 0; i < nSelectors; i++) {
j = 0;
while (True) {
GET_BIT(BZ_X_SELECTOR_3, uc);
if (uc == 0) break;
j++;
if (j >= nGroups) RETURN(BZ_DATA_ERROR);
}
/* Having more than BZ_MAX_SELECTORS doesn't make much sense
since they will never be used, but some implementations might
"round up" the number of selectors, so just ignore those. */
if (i < BZ_MAX_SELECTORS)
s->selectorMtf[i] = j;
}
if (nSelectors > BZ_MAX_SELECTORS)
nSelectors = BZ_MAX_SELECTORS;
/*--- Undo the MTF values for the selectors. ---*/
{
UChar pos[BZ_N_GROUPS], tmp, v;
for (v = 0; v < nGroups; v++) pos[v] = v;
for (i = 0; i < nSelectors; i++) {
v = s->selectorMtf[i];
tmp = pos[v];
while (v > 0) { pos[v] = pos[v-1]; v--; }
pos[0] = tmp;
s->selector[i] = tmp;
}
}
/*--- Now the coding tables ---*/
for (t = 0; t < nGroups; t++) {
GET_BITS(BZ_X_CODING_1, curr, 5);
for (i = 0; i < alphaSize; i++) {
while (True) {
if (curr < 1 || curr > 20) RETURN(BZ_DATA_ERROR);
GET_BIT(BZ_X_CODING_2, uc);
if (uc == 0) break;
GET_BIT(BZ_X_CODING_3, uc);
if (uc == 0) curr++; else curr--;
}
s->len[t][i] = curr;
}
}
/*--- Create the Huffman decoding tables ---*/
for (t = 0; t < nGroups; t++) {
minLen = 32;
maxLen = 0;
for (i = 0; i < alphaSize; i++) {
if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
if (s->len[t][i] < minLen) minLen = s->len[t][i];
}
BZ2_hbCreateDecodeTables (
&(s->limit[t][0]),
&(s->base[t][0]),
&(s->perm[t][0]),
&(s->len[t][0]),
minLen, maxLen, alphaSize
);
s->minLens[t] = minLen;
}
/*--- Now the MTF values ---*/
EOB = s->nInUse+1;
nblockMAX = 100000 * s->blockSize100k;
groupNo = -1;
groupPos = 0;
for (i = 0; i <= 255; i++) s->unzftab[i] = 0;
/*-- MTF init --*/
{
Int32 ii, jj, kk;
kk = MTFA_SIZE-1;
for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) {
for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
s->mtfa[kk] = (UChar)(ii * MTFL_SIZE + jj);
kk--;
}
s->mtfbase[ii] = kk + 1;
}
}
/*-- end MTF init --*/
nblock = 0;
GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym);
while (True) {
if (nextSym == EOB) break;
if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) {
es = -1;
N = 1;
do {
/* Check that N doesn't get too big, so that es doesn't
go negative. The maximum value that can be
RUNA/RUNB encoded is equal to the block size (post
the initial RLE), viz, 900k, so bounding N at 2
million should guard against overflow without
rejecting any legitimate inputs. */
if (N >= 2*1024*1024) RETURN(BZ_DATA_ERROR);
if (nextSym == BZ_RUNA) es = es + (0+1) * N; else
if (nextSym == BZ_RUNB) es = es + (1+1) * N;
N = N * 2;
GET_MTF_VAL(BZ_X_MTF_3, BZ_X_MTF_4, nextSym);
}
while (nextSym == BZ_RUNA || nextSym == BZ_RUNB);
es++;
uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ];
s->unzftab[uc] += es;
if (s->smallDecompress)
while (es > 0) {
if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
s->ll16[nblock] = (UInt16)uc;
nblock++;
es--;
}
else
while (es > 0) {
if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
s->tt[nblock] = (UInt32)uc;
nblock++;
es--;
};
continue;
} else {
if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
/*-- uc = MTF ( nextSym-1 ) --*/
{
Int32 ii, jj, kk, pp, lno, off;
UInt32 nn;
nn = (UInt32)(nextSym - 1);
if (nn < MTFL_SIZE) {
/* avoid general-case expense */
pp = s->mtfbase[0];
uc = s->mtfa[pp+nn];
while (nn > 3) {
Int32 z = pp+nn;
s->mtfa[(z) ] = s->mtfa[(z)-1];
s->mtfa[(z)-1] = s->mtfa[(z)-2];
s->mtfa[(z)-2] = s->mtfa[(z)-3];
s->mtfa[(z)-3] = s->mtfa[(z)-4];
nn -= 4;
}
while (nn > 0) {
s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--;
};
s->mtfa[pp] = uc;
} else {
/* general case */
lno = nn / MTFL_SIZE;
off = nn % MTFL_SIZE;
pp = s->mtfbase[lno] + off;
uc = s->mtfa[pp];
while (pp > s->mtfbase[lno]) {
s->mtfa[pp] = s->mtfa[pp-1]; pp--;
};
s->mtfbase[lno]++;
while (lno > 0) {
s->mtfbase[lno]--;
s->mtfa[s->mtfbase[lno]]
= s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1];
lno--;
}
s->mtfbase[0]--;
s->mtfa[s->mtfbase[0]] = uc;
if (s->mtfbase[0] == 0) {
kk = MTFA_SIZE-1;
for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) {
for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj];
kk--;
}
s->mtfbase[ii] = kk + 1;
}
}
}
}
/*-- end uc = MTF ( nextSym-1 ) --*/
s->unzftab[s->seqToUnseq[uc]]++;
if (s->smallDecompress)
s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else
s->tt[nblock] = (UInt32)(s->seqToUnseq[uc]);
nblock++;
GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym);
continue;
}
}
/* Now we know what nblock is, we can do a better sanity
check on s->origPtr.
*/
if (s->origPtr < 0 || s->origPtr >= nblock)
RETURN(BZ_DATA_ERROR);
/*-- Set up cftab to facilitate generation of T^(-1) --*/
/* Check: unzftab entries in range. */
for (i = 0; i <= 255; i++) {
if (s->unzftab[i] < 0 || s->unzftab[i] > nblock)
RETURN(BZ_DATA_ERROR);
}
/* Actually generate cftab. */
s->cftab[0] = 0;
for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1];
for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1];
/* Check: cftab entries in range. */
for (i = 0; i <= 256; i++) {
if (s->cftab[i] < 0 || s->cftab[i] > nblock) {
/* s->cftab[i] can legitimately be == nblock */
RETURN(BZ_DATA_ERROR);
}
}
/* Check: cftab entries non-descending. */
for (i = 1; i <= 256; i++) {
if (s->cftab[i-1] > s->cftab[i]) {
RETURN(BZ_DATA_ERROR);
}
}
s->state_out_len = 0;
s->state_out_ch = 0;
BZ_INITIALISE_CRC ( s->calculatedBlockCRC );
s->state = BZ_X_OUTPUT;
if (s->verbosity >= 2) VPrintf0 ( "rt+rld" );
if (s->smallDecompress) {
/*-- Make a copy of cftab, used in generation of T --*/
for (i = 0; i <= 256; i++) s->cftabCopy[i] = s->cftab[i];
/*-- compute the T vector --*/
for (i = 0; i < nblock; i++) {
uc = (UChar)(s->ll16[i]);
SET_LL(i, s->cftabCopy[uc]);
s->cftabCopy[uc]++;
}
/*-- Compute T^(-1) by pointer reversal on T --*/
i = s->origPtr;
j = GET_LL(i);
do {
Int32 tmp = GET_LL(j);
SET_LL(j, i);
i = j;
j = tmp;
}
while (i != s->origPtr);
s->tPos = s->origPtr;
s->nblock_used = 0;
if (s->blockRandomised) {
BZ_RAND_INIT_MASK;
BZ_GET_SMALL(s->k0); s->nblock_used++;
BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK;
} else {
BZ_GET_SMALL(s->k0); s->nblock_used++;
}
} else {
/*-- compute the T^(-1) vector --*/
for (i = 0; i < nblock; i++) {
uc = (UChar)(s->tt[i] & 0xff);
s->tt[s->cftab[uc]] |= (i << 8);
s->cftab[uc]++;
}
s->tPos = s->tt[s->origPtr] >> 8;
s->nblock_used = 0;
if (s->blockRandomised) {
BZ_RAND_INIT_MASK;
BZ_GET_FAST(s->k0); s->nblock_used++;
BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK;
} else {
BZ_GET_FAST(s->k0); s->nblock_used++;
}
}
RETURN(BZ_OK);
endhdr_2:
GET_UCHAR(BZ_X_ENDHDR_2, uc);
if (uc != 0x72) RETURN(BZ_DATA_ERROR);
GET_UCHAR(BZ_X_ENDHDR_3, uc);
if (uc != 0x45) RETURN(BZ_DATA_ERROR);
GET_UCHAR(BZ_X_ENDHDR_4, uc);
if (uc != 0x38) RETURN(BZ_DATA_ERROR);
GET_UCHAR(BZ_X_ENDHDR_5, uc);
if (uc != 0x50) RETURN(BZ_DATA_ERROR);
GET_UCHAR(BZ_X_ENDHDR_6, uc);
if (uc != 0x90) RETURN(BZ_DATA_ERROR);
s->storedCombinedCRC = 0;
GET_UCHAR(BZ_X_CCRC_1, uc);
s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
GET_UCHAR(BZ_X_CCRC_2, uc);
s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
GET_UCHAR(BZ_X_CCRC_3, uc);
s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
GET_UCHAR(BZ_X_CCRC_4, uc);
s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
s->state = BZ_X_IDLE;
RETURN(BZ_STREAM_END);
default: AssertH ( False, 4001 );
}
AssertH ( False, 4002 );
save_state_and_return:
s->save_i = i;
s->save_j = j;
s->save_t = t;
s->save_alphaSize = alphaSize;
s->save_nGroups = nGroups;
s->save_nSelectors = nSelectors;
s->save_EOB = EOB;
s->save_groupNo = groupNo;
s->save_groupPos = groupPos;
s->save_nextSym = nextSym;
s->save_nblockMAX = nblockMAX;
s->save_nblock = nblock;
s->save_es = es;
s->save_N = N;
s->save_curr = curr;
s->save_zt = zt;
s->save_zn = zn;
s->save_zvec = zvec;
s->save_zj = zj;
s->save_gSel = gSel;
s->save_gMinlen = gMinlen;
s->save_gLimit = gLimit;
s->save_gBase = gBase;
s->save_gPerm = gPerm;
return retVal;
}
/*-------------------------------------------------------------*/
/*--- end decompress.c ---*/
/*-------------------------------------------------------------*/