Skip to content

Commit

Permalink
LibCompress: Add an LZW compressor
Browse files Browse the repository at this point in the history
  • Loading branch information
LucasChollet authored and ADKaster committed May 14, 2024
1 parent ff33fa7 commit 54f33b4
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 0 deletions.
1 change: 1 addition & 0 deletions Tests/LibCompress/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ set(TEST_SOURCES
TestDeflate.cpp
TestGzip.cpp
TestLzma.cpp
TestLzw.cpp
TestPackBits.cpp
TestXz.cpp
TestZlib.cpp
Expand Down
33 changes: 33 additions & 0 deletions Tests/LibCompress/TestLzw.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Copyright (c) 2024, Lucas Chollet <[email protected]>
*
* SPDX-License-Identifier: BSD-2-Clause
*/

#include <LibTest/TestCase.h>

#include <AK/Array.h>
#include <LibCompress/Lzw.h>

namespace {

ErrorOr<bool> test_roundtrip_string(StringView input)
{
auto const compressed = TRY(Compress::LzwCompressor::compress_all(input.bytes(), 8));
auto const roundtrip = TRY(Compress::LzwDecompressor<LittleEndianInputBitStream>::decompress_all(compressed, 8));
return roundtrip == input.bytes();
}

}

TEST_CASE(roundtrip_lzw_little_endian_short)
{
EXPECT(TRY_OR_FAIL(test_roundtrip_string("WeWellll"sv)));
}

TEST_CASE(roundtrip_lzw_little_endian_long)
{
// LZW changes the code size after ~512 new symbols, this test case is long enough to trigger that.
constexpr auto input = "WellWellWellWellaqwertyuiop[]sdfghjkl;'zxcvbnm,./uipnaspchu9epqrjepncdp9ruew-r8thvnufsipdonvjcx zvlrz[iu0q-348urfjsd;fjmvxc.nnnmvcxzvmc c,m;l'/,l4532[5i904tmorew;lgkrmopds['kg,l;'s,gWellWellWellWellaqwertyuiop[]sdfghjkl;'zxcvbnm,./uipnaspchu9epqrjepncdp9ruew-r8thvnufsipdonvjcx zvlrz[iu0q-348urfjsd;fjmvxc.nnnmvcxzvmc c,m;l'/,l4532[5i904tmorew;lgkrmopds['kg,l;'s,gWellWellWellWellaqwertyuiop[]sdfghjkl;'zxcvbnm,./uipnaspchu9epqrjepncdp9ruew-r8thvnufsipdonvjcx zvlrz[iu0q-348urfjsd;fjmvxc.nnnmvcxzvmc c,m;l'/,l4532[5i904tmorew;lgkrmopds['kg,l;'s,gWellWellWellWellaqwertyuiop[]sdfghjkl;'zxcvbnm,./uipnaspchu9epqrjepncdp9ruew-r8thvnufsipdonvjcx zvlrz[iu0q-348urfjsd;fjmvxc.nnnmvcxzvmc c,m;l'/,l4532[5i904tmorew;lgkrmopds['kg,l;'s,g"sv;
EXPECT(TRY_OR_FAIL(test_roundtrip_string(input)));
}
71 changes: 71 additions & 0 deletions Userland/Libraries/LibCompress/Lzw.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,4 +173,75 @@ class LzwDecompressor : private Details::LzwState {
Vector<u8> m_output {};
};

class LzwCompressor : private Details::LzwState {
public:
static ErrorOr<ByteBuffer> compress_all(ReadonlyBytes bytes, u8 initial_code_size)
{
LzwCompressor compressor { initial_code_size };
AllocatingMemoryStream buffer;
LittleEndianOutputBitStream output_stream { MaybeOwned<Stream>(buffer) };

u16 const clear_code = compressor.add_control_code();
u16 const end_of_data_code = compressor.add_control_code();

TRY(output_stream.write_bits(clear_code, compressor.m_code_size));

u32 last_offset = 0;

while (last_offset < bytes.size()) {
ReadonlyBytes current_symbol {};
u16 current_code {};

if (compressor.m_code_table.size() == max_table_size - 2) {
TRY(output_stream.write_bits(clear_code, compressor.m_code_size));
compressor.reset();
}

bool found_symbol = false;

for (u32 symbol_size = 1; last_offset + symbol_size <= bytes.size(); ++symbol_size) {
current_symbol = bytes.slice(last_offset, symbol_size);
auto const new_code = compressor.code_for_symbol(current_symbol);

if (new_code.has_value()) {
current_code = *new_code;
} else {
found_symbol = true;
break;
}
}

TRY(output_stream.write_bits(current_code, compressor.m_code_size));

if (found_symbol) {
compressor.extend_code_table(Vector(current_symbol));
current_symbol = current_symbol.trim(current_symbol.size() - 1);
}
last_offset += current_symbol.size();
}

TRY(output_stream.write_bits(end_of_data_code, compressor.m_code_size));
TRY(output_stream.align_to_byte_boundary());
TRY(output_stream.flush_buffer_to_stream());

return TRY(buffer.read_until_eof());
}

private:
LzwCompressor(u8 initial_code_size)
: Details::LzwState(initial_code_size, 1)
{
}

Optional<u16> code_for_symbol(ReadonlyBytes bytes)
{
for (u16 i = 0; i < m_code_table.size(); ++i) {
if (m_code_table[i].span() == bytes)
return i;
}

return OptionalNone {};
}
};

}

0 comments on commit 54f33b4

Please sign in to comment.