Skip to content

Commit

Permalink
Fix Snappy decoding of large 2-byte literal lengths and copy offsets
Browse files Browse the repository at this point in the history
Motivation:

The Snappy decoder was failing on valid inputs containing literals
with 2-byte lengths > 0x8000 or copies with 2-byte offsets >= 0x8000.

The decoder was also enforcing an artificially low offset limit of
0x7FFF, something the Snappy format description advises against,
and which prevents decoding valid inputs generated by other encoders.

Modifications:

Interpret 2-byte literal lengths and 2-byte copy offsets as unsigned
shorts, in accordance with the format description and reference
implementation.

Allow any positive offset value. Throw an appropriate exception
for negative values (which can theoretically occur due to arithmetic
overflow on 4-byte offsets, but are unlikely to occur in the wild).

Result:

The Snappy decoder can handle valid inputs that previously caused
it to throw exceptions.
  • Loading branch information
dnault authored and normanmaurer committed Feb 20, 2018
1 parent ce241bd commit f40ecc3
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ static int decodeLiteral(byte tag, ByteBuf in, ByteBuf out) {
if (in.readableBytes() < 2) {
return NOT_ENOUGH_INPUT;
}
length = in.readShortLE();
length = in.readUnsignedShortLE();
break;
case 62:
if (in.readableBytes() < 3) {
Expand Down Expand Up @@ -495,7 +495,7 @@ private static int decodeCopyWith2ByteOffset(byte tag, ByteBuf in, ByteBuf out,

int initialIndex = out.writerIndex();
int length = 1 + (tag >> 2 & 0x03f);
int offset = in.readShortLE();
int offset = in.readUnsignedShortLE();

validateOffset(offset, writtenSoFar);

Expand Down Expand Up @@ -565,20 +565,21 @@ private static int decodeCopyWith4ByteOffset(byte tag, ByteBuf in, ByteBuf out,

/**
* Validates that the offset extracted from a compressed reference is within
* the permissible bounds of an offset (4 <= offset <= 32768), and does not
* the permissible bounds of an offset (0 < offset < Integer.MAX_VALUE), and does not
* exceed the length of the chunk currently read so far.
*
* @param offset The offset extracted from the compressed reference
* @param chunkSizeSoFar The number of bytes read so far from this chunk
* @throws DecompressionException if the offset is invalid
*/
private static void validateOffset(int offset, int chunkSizeSoFar) {
if (offset > Short.MAX_VALUE) {
throw new DecompressionException("Offset exceeds maximum permissible value");
if (offset == 0) {
throw new DecompressionException("Offset is less than minimum permissible value");
}

if (offset <= 0) {
throw new DecompressionException("Offset is less than minimum permissible value");
if (offset < 0) {
// Due to arithmetic overflow
throw new DecompressionException("Offset is greater than maximum value supported by this implementation");
}

if (offset > chunkSizeSoFar) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -284,4 +284,39 @@ public void testEncodeLiteralAndDecodeLiteral() {
}
}
}

@Test
public void testLarge2ByteLiteralLengthAndCopyOffset() {
ByteBuf compressed = Unpooled.buffer();
ByteBuf actualDecompressed = Unpooled.buffer();
ByteBuf expectedDecompressed = Unpooled.buffer().writeByte(0x01).writeZero(0x8000).writeByte(0x01);
try {
// Generate a Snappy-encoded buffer that can only be decompressed correctly if
// the decoder treats 2-byte literal lengths and 2-byte copy offsets as unsigned values.

// Write preamble, uncompressed content length (0x8002) encoded as varint.
compressed.writeByte(0x82).writeByte(0x80).writeByte(0x02);

// Write a literal consisting of 0x01 followed by 0x8000 zeroes.
// The total length of this literal is 0x8001, which gets encoded as 0x8000 (length - 1).
// This length was selected because the encoded form is one larger than the maximum value
// representable using a signed 16-bit integer, and we want to assert the decoder is reading
// the length as an unsigned value.
compressed.writeByte(61 << 2); // tag for LITERAL with a 2-byte length
compressed.writeShortLE(0x8000); // length - 1
compressed.writeByte(0x01).writeZero(0x8000); // literal content

// Similarly, for a 2-byte copy operation we want to ensure the offset is treated as unsigned.
// Copy the initial 0x01 which was written 0x8001 bytes back in the stream.
compressed.writeByte(0x02); // tag for COPY with 2-byte offset, length = 1
compressed.writeShortLE(0x8001); // offset

snappy.decode(compressed, actualDecompressed);
assertEquals(expectedDecompressed, actualDecompressed);
} finally {
compressed.release();
actualDecompressed.release();
expectedDecompressed.release();
}
}
}

0 comments on commit f40ecc3

Please sign in to comment.