Skip to content

Commit

Permalink
LibAudio: Add spec comments to the FlacLoader
Browse files Browse the repository at this point in the history
This way the FlacLoader can be more easily understood by someone that
doesn't already know the format inside out.
  • Loading branch information
kleinesfilmroellchen authored and linusg committed Jun 23, 2022
1 parent c03a0e7 commit cb8e37d
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 13 deletions.
56 changes: 50 additions & 6 deletions Userland/Libraries/LibAudio/FlacLoader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ MaybeLoaderError FlacLoaderPlugin::initialize()
return {};
}

// 11.5 STREAM
MaybeLoaderError FlacLoaderPlugin::parse_header()
{
auto bit_input = LOADER_TRY(BigEndianInputBitStream::construct(*m_stream));
Expand All @@ -82,7 +83,7 @@ MaybeLoaderError FlacLoaderPlugin::parse_header()
auto streaminfo_data_memory = LOADER_TRY(Core::Stream::MemoryStream::construct(streaminfo.data.bytes()));
auto streaminfo_data = LOADER_TRY(BigEndianInputBitStream::construct(*streaminfo_data_memory));

// STREAMINFO block
// 11.10 METADATA_BLOCK_STREAMINFO
m_min_block_size = LOADER_TRY(streaminfo_data->read_bits<u16>(16));
FLAC_VERIFY(m_min_block_size >= 16, LoaderError::Category::Format, "Minimum block size must be 16");
m_max_block_size = LOADER_TRY(streaminfo_data->read_bits<u16>(16));
Expand Down Expand Up @@ -139,11 +140,13 @@ MaybeLoaderError FlacLoaderPlugin::parse_header()
return {};
}

// 11.13. METADATA_BLOCK_SEEKTABLE
MaybeLoaderError FlacLoaderPlugin::load_seektable(FlacRawMetadataBlock& block)
{
auto memory_stream = LOADER_TRY(Core::Stream::MemoryStream::construct(block.data.bytes()));
auto seektable_bytes = LOADER_TRY(BigEndianInputBitStream::construct(*memory_stream));
for (size_t i = 0; i < block.length / 18; ++i) {
// 11.14. SEEKPOINT
FlacSeekPoint seekpoint {
.sample_index = LOADER_TRY(seektable_bytes->read_bits<u64>(64)),
.byte_offset = LOADER_TRY(seektable_bytes->read_bits<u64>(64)),
Expand All @@ -155,9 +158,10 @@ MaybeLoaderError FlacLoaderPlugin::load_seektable(FlacRawMetadataBlock& block)
return {};
}

// 11.6 METADATA_BLOCK
ErrorOr<FlacRawMetadataBlock, LoaderError> FlacLoaderPlugin::next_meta_block(BigEndianInputBitStream& bit_input)
{

// 11.7 METADATA_BLOCK_HEADER
bool is_last_block = LOADER_TRY(bit_input.read_bit());
// The block type enum constants agree with the specification
FlacMetadataBlockType type = (FlacMetadataBlockType)LOADER_TRY(bit_input.read_bits<u8>(7));
Expand Down Expand Up @@ -270,6 +274,7 @@ LoaderSamples FlacLoaderPlugin::get_more_samples(size_t max_bytes_to_read_from_i
return samples;
}

// 11.21. FRAME
MaybeLoaderError FlacLoaderPlugin::next_frame(Span<Sample> target_vector)
{
#define FLAC_VERIFY(check, category, msg) \
Expand All @@ -283,11 +288,12 @@ MaybeLoaderError FlacLoaderPlugin::next_frame(Span<Sample> target_vector)

// TODO: Check the CRC-16 checksum (and others) by keeping track of read data

// FLAC frame sync code starts header
// 11.22. FRAME_HEADER
u16 sync_code = LOADER_TRY(bit_stream->read_bits<u16>(14));
FLAC_VERIFY(sync_code == 0b11111111111110, LoaderError::Category::Format, "Sync code");
bool reserved_bit = LOADER_TRY(bit_stream->read_bit());
FLAC_VERIFY(reserved_bit == 0, LoaderError::Category::Format, "Reserved frame header bit");
// 11.22.2. BLOCKING STRATEGY
[[maybe_unused]] bool blocking_strategy = LOADER_TRY(bit_stream->read_bit());

u32 sample_count = TRY(convert_sample_count_code(LOADER_TRY(bit_stream->read_bits<u8>(4))));
Expand All @@ -303,16 +309,19 @@ MaybeLoaderError FlacLoaderPlugin::next_frame(Span<Sample> target_vector)
reserved_bit = LOADER_TRY(bit_stream->read_bit());
FLAC_VERIFY(reserved_bit == 0, LoaderError::Category::Format, "Reserved frame header end bit");

// 11.22.8. CODED NUMBER
// FIXME: sample number can be 8-56 bits, frame number can be 8-48 bits
m_current_sample_or_frame = LOADER_TRY(read_utf8_char(*bit_stream));

// Conditional header variables
// 11.22.9. BLOCK SIZE INT
if (sample_count == FLAC_BLOCKSIZE_AT_END_OF_HEADER_8) {
sample_count = LOADER_TRY(bit_stream->read_bits<u32>(8)) + 1;
} else if (sample_count == FLAC_BLOCKSIZE_AT_END_OF_HEADER_16) {
sample_count = LOADER_TRY(bit_stream->read_bits<u32>(16)) + 1;
}

// 11.22.10. SAMPLE RATE INT
if (frame_sample_rate == FLAC_SAMPLERATE_AT_END_OF_HEADER_8) {
frame_sample_rate = LOADER_TRY(bit_stream->read_bits<u32>(8)) * 1000;
} else if (frame_sample_rate == FLAC_SAMPLERATE_AT_END_OF_HEADER_16) {
Expand All @@ -321,6 +330,7 @@ MaybeLoaderError FlacLoaderPlugin::next_frame(Span<Sample> target_vector)
frame_sample_rate = LOADER_TRY(bit_stream->read_bits<u32>(16)) * 10;
}

// 11.22.11. FRAME CRC
// TODO: check header checksum, see above
[[maybe_unused]] u8 checksum = LOADER_TRY(bit_stream->read_bits<u8>(8));

Expand All @@ -343,8 +353,10 @@ MaybeLoaderError FlacLoaderPlugin::next_frame(Span<Sample> target_vector)
current_subframes.unchecked_append(move(subframe_samples));
}

// 11.2. Overview ("The audio data is composed of...")
bit_stream->align_to_byte_boundary();

// 11.23. FRAME_FOOTER
// TODO: check checksum, see above
[[maybe_unused]] u16 footer_checksum = LOADER_TRY(bit_stream->read_bits<u16>(16));
dbgln_if(AFLACLOADER_DEBUG, "Subframe footer checksum: {}", footer_checksum);
Expand Down Expand Up @@ -425,6 +437,7 @@ MaybeLoaderError FlacLoaderPlugin::next_frame(Span<Sample> target_vector)
#undef FLAC_VERIFY
}

// 11.22.3. INTERCHANNEL SAMPLE BLOCK SIZE
ErrorOr<u32, LoaderError> FlacLoaderPlugin::convert_sample_count_code(u8 sample_count_code)
{
// single codes
Expand All @@ -444,6 +457,7 @@ ErrorOr<u32, LoaderError> FlacLoaderPlugin::convert_sample_count_code(u8 sample_
return 256 * AK::exp2(sample_count_code - 8);
}

// 11.22.4. SAMPLE RATE
ErrorOr<u32, LoaderError> FlacLoaderPlugin::convert_sample_rate_code(u8 sample_rate_code)
{
switch (sample_rate_code) {
Expand Down Expand Up @@ -482,6 +496,7 @@ ErrorOr<u32, LoaderError> FlacLoaderPlugin::convert_sample_rate_code(u8 sample_r
}
}

// 11.22.6. SAMPLE SIZE
ErrorOr<PcmSampleFormat, LoaderError> FlacLoaderPlugin::convert_bit_depth_code(u8 bit_depth_code)
{
switch (bit_depth_code) {
Expand All @@ -501,13 +516,15 @@ ErrorOr<PcmSampleFormat, LoaderError> FlacLoaderPlugin::convert_bit_depth_code(u
}
}

// 11.22.5. CHANNEL ASSIGNMENT
u8 frame_channel_type_to_channel_count(FlacFrameChannelType channel_type)
{
if (channel_type <= FlacFrameChannelType::Surround7p1)
return to_underlying(channel_type) + 1;
return 2;
}

// 11.25. SUBFRAME_HEADER
ErrorOr<FlacSubframeHeader, LoaderError> FlacLoaderPlugin::next_subframe_header(BigEndianInputBitStream& bit_stream, u8 channel_index)
{
u8 bits_per_sample = static_cast<u16>(pcm_bits_per_sample(m_current_frame->bit_depth));
Expand All @@ -534,7 +551,7 @@ ErrorOr<FlacSubframeHeader, LoaderError> FlacLoaderPlugin::next_subframe_header(
if (LOADER_TRY(bit_stream.read_bit()) != 0)
return LoaderError { LoaderError::Category::Format, static_cast<size_t>(m_current_sample_or_frame), "Zero bit padding" };

// subframe type (encoding)
// 11.25.1. SUBFRAME TYPE
u8 subframe_code = LOADER_TRY(bit_stream.read_bits<u8>(6));
if ((subframe_code >= 0b000010 && subframe_code <= 0b000111) || (subframe_code > 0b001100 && subframe_code < 0b100000))
return LoaderError { LoaderError::Category::Format, static_cast<size_t>(m_current_sample_or_frame), "Subframe type" };
Expand All @@ -553,7 +570,7 @@ ErrorOr<FlacSubframeHeader, LoaderError> FlacLoaderPlugin::next_subframe_header(
subframe_type = (FlacSubframeType)subframe_code;
}

// wasted bits per sample (unary encoding)
// 11.25.2. WASTED BITS PER SAMPLE FLAG
bool has_wasted_bits = LOADER_TRY(bit_stream.read_bit());
u8 k = 0;
if (has_wasted_bits) {
Expand All @@ -578,6 +595,7 @@ ErrorOr<Vector<i32>, LoaderError> FlacLoaderPlugin::parse_subframe(FlacSubframeH

switch (subframe_header.type) {
case FlacSubframeType::Constant: {
// 11.26. SUBFRAME_CONSTANT
u64 constant_value = LOADER_TRY(bit_input.read_bits<u64>(subframe_header.bits_per_sample - subframe_header.wasted_bits_per_sample));
dbgln_if(AFLACLOADER_DEBUG, "Constant subframe: {}", constant_value);

Expand Down Expand Up @@ -616,6 +634,7 @@ ErrorOr<Vector<i32>, LoaderError> FlacLoaderPlugin::parse_subframe(FlacSubframeH
return resampler.resample(samples);
}

// 11.29. SUBFRAME_VERBATIM
// Decode a subframe that isn't actually encoded, usually seen in random data
ErrorOr<Vector<i32>, LoaderError> FlacLoaderPlugin::decode_verbatim(FlacSubframeHeader& subframe, BigEndianInputBitStream& bit_input)
{
Expand All @@ -632,6 +651,7 @@ ErrorOr<Vector<i32>, LoaderError> FlacLoaderPlugin::decode_verbatim(FlacSubframe
return decoded;
}

// 11.28. SUBFRAME_LPC
// Decode a subframe encoded with a custom linear predictor coding, i.e. the subframe provides the polynomial order and coefficients
ErrorOr<Vector<i32>, LoaderError> FlacLoaderPlugin::decode_custom_lpc(FlacSubframeHeader& subframe, BigEndianInputBitStream& bit_input)
{
Expand Down Expand Up @@ -677,6 +697,7 @@ ErrorOr<Vector<i32>, LoaderError> FlacLoaderPlugin::decode_custom_lpc(FlacSubfra
// Even though FLAC operates at a maximum bit depth of 32 bits, modern encoders use super-large coefficients for maximum compression.
// These will easily overflow 32 bits and cause strange white noise that abruptly stops intermittently (at the end of a frame).
// The simple fix of course is to do intermediate computations in 64 bits.
// These considerations are not in the original FLAC spec, but have been added to the IETF standard: https://datatracker.ietf.org/doc/html/draft-ietf-cellar-flac-03#appendix-A.3
sample += static_cast<i64>(coefficients[t]) * static_cast<i64>(decoded[i - t - 1]);
}
decoded[i] += sample >> lpc_shift;
Expand All @@ -685,6 +706,7 @@ ErrorOr<Vector<i32>, LoaderError> FlacLoaderPlugin::decode_custom_lpc(FlacSubfra
return decoded;
}

// 11.27. SUBFRAME_FIXED
// Decode a subframe encoded with one of the fixed linear predictor codings
ErrorOr<Vector<i32>, LoaderError> FlacLoaderPlugin::decode_fixed_lpc(FlacSubframeHeader& subframe, BigEndianInputBitStream& bit_input)
{
Expand All @@ -703,6 +725,23 @@ ErrorOr<Vector<i32>, LoaderError> FlacLoaderPlugin::decode_fixed_lpc(FlacSubfram

dbgln_if(AFLACLOADER_DEBUG, "decoded length {}, {} order predictor", decoded.size(), subframe.order);

// Skip these comments if you don't care about the neat math behind fixed LPC :^)
// These coefficients for the recursive prediction formula are the only ones that can be resolved to polynomial predictor functions.
// The order equals the degree of the polynomial - 1, so the second-order predictor has an underlying polynomial of degree 1, a straight line.
// More specifically, the closest approximation to a polynomial is used, and the degree depends on how many previous values are available.
// This makes use of a very neat property of polynomials, which is that they are entirely characterized by their finitely many derivatives.
// (Mathematically speaking, the infinite Taylor series of any polynomial equals the polynomial itself.)
// Now remember that derivation is just the slope of the function, which is the same as the difference of two close-by values.
// Therefore, with two samples we can calculate the first derivative at a sample via the difference, which gives us a polynomial of degree 1.
// With three samples, we can do the same but also calculate the second derivative via the difference in the first derivatives.
// This gives us a polynomial of degree 2, as it has two "proper" (non-constant) derivatives.
// This can be continued for higher-order derivatives when we have more coefficients, giving us higher-order polynomials.
// In essence, it's akin to a Lagrangian polynomial interpolation for every sample (but already pre-solved).

// The coefficients for orders 0-3 originate from the SHORTEN codec:
// http:https://mi.eng.cam.ac.uk/reports/svr-ftp/auto-pdf/robinson_tr156.pdf page 4
// The coefficients for order 4 are undocumented in the original FLAC specification(s), but can now be found in
// https://datatracker.ietf.org/doc/html/draft-ietf-cellar-flac-03#section-10.2.5
switch (subframe.order) {
case 0:
// s_0(t) = 0
Expand Down Expand Up @@ -735,20 +774,24 @@ ErrorOr<Vector<i32>, LoaderError> FlacLoaderPlugin::decode_fixed_lpc(FlacSubfram
return decoded;
}

// 11.30. RESIDUAL
// Decode the residual, the "error" between the function approximation and the actual audio data
MaybeLoaderError FlacLoaderPlugin::decode_residual(Vector<i32>& decoded, FlacSubframeHeader& subframe, BigEndianInputBitStream& bit_input)
{
// 11.30.1. RESIDUAL_CODING_METHOD
auto residual_mode = static_cast<FlacResidualMode>(LOADER_TRY(bit_input.read_bits<u8>(2)));
u8 partition_order = LOADER_TRY(bit_input.read_bits<u8>(4));
size_t partitions = 1 << partition_order;

if (residual_mode == FlacResidualMode::Rice4Bit) {
// 11.30.2. RESIDUAL_CODING_METHOD_PARTITIONED_EXP_GOLOMB
// decode a single Rice partition with four bits for the order k
for (size_t i = 0; i < partitions; ++i) {
auto rice_partition = TRY(decode_rice_partition(4, partitions, i, subframe, bit_input));
decoded.extend(move(rice_partition));
}
} else if (residual_mode == FlacResidualMode::Rice5Bit) {
// 11.30.3. RESIDUAL_CODING_METHOD_PARTITIONED_EXP_GOLOMB2
// five bits equivalent
for (size_t i = 0; i < partitions; ++i) {
auto rice_partition = TRY(decode_rice_partition(5, partitions, i, subframe, bit_input));
Expand All @@ -760,10 +803,11 @@ MaybeLoaderError FlacLoaderPlugin::decode_residual(Vector<i32>& decoded, FlacSub
return {};
}

// 11.30.2.1. EXP_GOLOMB_PARTITION and 11.30.3.1. EXP_GOLOMB2_PARTITION
// Decode a single Rice partition as part of the residual, every partition can have its own Rice parameter k
ALWAYS_INLINE ErrorOr<Vector<i32>, LoaderError> FlacLoaderPlugin::decode_rice_partition(u8 partition_type, u32 partitions, u32 partition_index, FlacSubframeHeader& subframe, BigEndianInputBitStream& bit_input)
{
// Rice parameter / Exp-Golomb order
// 11.30.2.2. EXP GOLOMB PARTITION ENCODING PARAMETER and 11.30.3.2. EXP-GOLOMB2 PARTITION ENCODING PARAMETER
u8 k = LOADER_TRY(bit_input.read_bits<u8>(partition_type));

u32 residual_sample_count;
Expand Down
7 changes: 7 additions & 0 deletions Userland/Libraries/LibAudio/FlacLoader.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@ ALWAYS_INLINE ErrorOr<u64> read_utf8_char(BigEndianInputBitStream& input);
// decode a single number encoded with exponential golomb encoding of the specified order
ALWAYS_INLINE ErrorOr<i32> decode_unsigned_exp_golomb(u8 order, BigEndianInputBitStream& bit_input);

// Loader for the Free Lossless Audio Codec (FLAC)
// This loader supports all audio features of FLAC, although audio from more than two channels is discarded.
// The loader currently supports the STREAMINFO, PADDING, and SEEKTABLE metadata blocks.
// See: https://xiph.org/flac/documentation_format_overview.html
// https://xiph.org/flac/format.html (identical to IETF draft version 2)
// https://datatracker.ietf.org/doc/html/draft-ietf-cellar-flac-02 (all section numbers refer to this specification)
// https://datatracker.ietf.org/doc/html/draft-ietf-cellar-flac-03 (newer IETF draft that uses incompatible numberings and names)
class FlacLoaderPlugin : public LoaderPlugin {
public:
explicit FlacLoaderPlugin(StringView path);
Expand Down
18 changes: 11 additions & 7 deletions Userland/Libraries/LibAudio/FlacTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,16 @@

namespace Audio {

// Temporary constants for header blocksize/sample rate spec
// These are not the actual values stored in the file! They are marker constants instead, only used temporarily in the decoder.
// 11.22.3. INTERCHANNEL SAMPLE BLOCK SIZE
#define FLAC_BLOCKSIZE_AT_END_OF_HEADER_8 0xffffffff
#define FLAC_BLOCKSIZE_AT_END_OF_HEADER_16 0xfffffffe
// 11.22.4. SAMPLE RATE
#define FLAC_SAMPLERATE_AT_END_OF_HEADER_8 0xffffffff
#define FLAC_SAMPLERATE_AT_END_OF_HEADER_16 0xfffffffe
#define FLAC_SAMPLERATE_AT_END_OF_HEADER_16X10 0xfffffffd

// Metadata block type, 7 bits.
// 11.8 BLOCK_TYPE (7 bits)
enum class FlacMetadataBlockType : u8 {
STREAMINFO = 0, // Important data about the audio format
PADDING = 1, // Non-data block to be ignored
Expand All @@ -33,7 +35,7 @@ enum class FlacMetadataBlockType : u8 {
INVALID = 127, // Error
};

// follows FLAC codes
// 11.22.5. CHANNEL ASSIGNMENT
enum class FlacFrameChannelType : u8 {
Mono = 0,
Stereo = 1,
Expand All @@ -49,7 +51,7 @@ enum class FlacFrameChannelType : u8 {
// others are reserved
};

// follows FLAC codes
// 11.25.1. SUBFRAME TYPE
enum class FlacSubframeType : u8 {
Constant = 0,
Verbatim = 1,
Expand All @@ -58,28 +60,29 @@ enum class FlacSubframeType : u8 {
// others are reserved
};

// follows FLAC codes
// 11.30.1. RESIDUAL_CODING_METHOD
enum class FlacResidualMode : u8 {
Rice4Bit = 0,
Rice5Bit = 1,
};

// Simple wrapper around any kind of metadata block
// 11.6. METADATA_BLOCK
struct FlacRawMetadataBlock {
bool is_last_block;
FlacMetadataBlockType type;
u32 length; // 24 bits
ByteBuffer data;
};

// An abstract, parsed and validated FLAC frame
// 11.22. FRAME_HEADER
struct FlacFrameHeader {
u32 sample_count;
u32 sample_rate;
FlacFrameChannelType channels;
PcmSampleFormat bit_depth;
};

// 11.25. SUBFRAME_HEADER
struct FlacSubframeHeader {
FlacSubframeType type;
// order for fixed and LPC subframes
Expand All @@ -88,6 +91,7 @@ struct FlacSubframeHeader {
u8 bits_per_sample;
};

// 11.14. SEEKPOINT
struct FlacSeekPoint {
u64 sample_index;
u64 byte_offset;
Expand Down

0 comments on commit cb8e37d

Please sign in to comment.