Skip to content

Commit

Permalink
Patch MP3 parser to ignore files that contain Lyrics3 data. (spotify#164
Browse files Browse the repository at this point in the history
)

* Patch MP3 parser to ignore files that contain Lyrics3 data.

* More tests.

* Bump clang-format version.

* Bump version.
  • Loading branch information
psobot committed Nov 18, 2022
1 parent cd31b23 commit 6c7f7af
Show file tree
Hide file tree
Showing 8 changed files with 3,743 additions and 60 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/all.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
- name: Check C++ Formatting
uses: jidicula/[email protected]
with:
clang-format-version: 11
clang-format-version: 14
fallback-style: LLVM

run-tests:
Expand Down
49 changes: 49 additions & 0 deletions pedalboard/io/AudioFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,59 @@

#pragma once

#include "../juce_overrides/juce_PatchedMP3AudioFormat.h"
#include "AudioFile.h"
#include "LameMP3AudioFormat.h"

namespace Pedalboard {

static constexpr const unsigned int DEFAULT_AUDIO_BUFFER_SIZE_FRAMES = 8192;

/**
* Registers audio formats for reading and writing in a deterministic (but
* configurable) order. On different platforms, different formats are handled by
* different backends (i.e.: CoreAudioFormat handles MP3 on macOS, but only for
* reading) so this method allows us to ensure reproducibility in tests.
*/
void registerPedalboardAudioFormats(juce::AudioFormatManager &manager,
bool forWriting, bool crossPlatformOnly) {
manager.registerFormat(new juce::WavAudioFormat(), true);
manager.registerFormat(new juce::AiffAudioFormat(), false);

#if JUCE_USE_FLAC
manager.registerFormat(new juce::FlacAudioFormat(), false);
#endif

#if JUCE_USE_OGGVORBIS
manager.registerFormat(new juce::OggVorbisAudioFormat(), false);
#endif

if (forWriting) {
// Prefer our own custom MP3 format (which only writes, doesn't read) over
// MP3AudioFormat (which only reads, doesn't write)
manager.registerFormat(new LameMP3AudioFormat(), false);
} else {
// On macOS, CoreAudio can read MP3s better (more fault-tolerantly) than
// MP3AudioFormat can. But sometimes, we still want to use the built-in MP3
// reader so that we can get identical parsing behaviour on both macOS and
// Linux. To do so, we use this flag:

if (crossPlatformOnly) {
manager.registerFormat(new juce::MP3AudioFormat(), false);
} else {
#if JUCE_MAC || JUCE_IOS
manager.registerFormat(new juce::CoreAudioFormat(), false);
#else
manager.registerFormat(new juce::MP3AudioFormat(), false);
#endif
}
}

#if JUCE_USE_WINDOWS_MEDIA_FORMAT
manager.registerFormat(new juce::WindowsMediaAudioFormat(), false);
#endif
}

class AudioFile {};

} // namespace Pedalboard
136 changes: 96 additions & 40 deletions pedalboard/io/ReadableAudioFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,11 @@ class ReadableAudioFile
: public AudioFile,
public std::enable_shared_from_this<ReadableAudioFile> {
public:
ReadableAudioFile(std::string filename) : filename(filename) {
formatManager.registerBasicFormats();
ReadableAudioFile(std::string filename, bool crossPlatformFormatsOnly = false)
: filename(filename) {
registerPedalboardAudioFormats(formatManager, false,
crossPlatformFormatsOnly);

juce::File file(filename);

if (!file.existsAsFile()) {
Expand Down Expand Up @@ -69,8 +72,10 @@ class ReadableAudioFile
"\" does not seem to be of a known or supported format.");
}

ReadableAudioFile(std::unique_ptr<PythonInputStream> inputStream) {
formatManager.registerBasicFormats();
ReadableAudioFile(std::unique_ptr<PythonInputStream> inputStream,
bool crossPlatformFormatsOnly = false) {
registerPedalboardAudioFormats(formatManager, false,
crossPlatformFormatsOnly);

if (!inputStream->isSeekable()) {
PythonException::raise();
Expand Down Expand Up @@ -239,7 +244,7 @@ class ReadableAudioFile
PythonException::raise();

if (!readResult) {
throw std::runtime_error("Failed to read from file.");
throwReadError(currentPosition, numSamples);
}
} else {
// If the audio is stored in an integral format, read it as integers
Expand All @@ -251,7 +256,7 @@ class ReadableAudioFile
currentPosition, numSamples);
PythonException::raise();
if (!readResult) {
throw std::runtime_error("Failed to read from file.");
throwReadError(currentPosition, numSamples);
}

// When converting 24-bit, 16-bit, or 8-bit data from int to float,
Expand Down Expand Up @@ -357,7 +362,7 @@ class ReadableAudioFile
currentPosition, numSamples);
PythonException::raise();
if (!readResult) {
throw std::runtime_error("Failed to read from file.");
throwReadError(currentPosition, numSamples);
}
} else {
// Read the file in smaller chunks, converting from int32 to the
Expand Down Expand Up @@ -465,6 +470,42 @@ class ReadableAudioFile
}

private:
void throwReadError(long long currentPosition, long long numSamples) {
std::ostringstream ss;
ss.imbue(std::locale(""));

ss << "Failed to read audio data";

if (getFilename() && !getFilename()->empty()) {
ss << " from file \"" << *getFilename() << "\"";
} else if (PythonInputStream *stream = getPythonInputStream()) {
ss << " from " << stream->getRepresentation();
}

ss << "."
<< " Tried to read " << numSamples
<< " frames of audio from frame offset " << currentPosition;

if (PythonInputStream *stream = getPythonInputStream()) {
ss << " but encountered invalid data near byte " << stream->getPosition();
}
ss << ".";

if (PythonInputStream *stream = getPythonInputStream()) {
if (stream->isExhausted()) {
ss << " The file may contain invalid data past or near its end. Try "
"reading fewer audio frames from the file.";
}
}

// In case any of the calls above to PythonInputStream cause an exception in
// Python, this line will re-raise those so that the Python exception is
// visible:
PythonException::raise();

throw std::runtime_error(ss.str());
}

juce::AudioFormatManager formatManager;
std::string filename;
std::unique_ptr<juce::AudioFormatReader> reader;
Expand Down Expand Up @@ -495,7 +536,9 @@ be readable depending on the operating system and installed system libraries:
``.wav``
Use :meth:`pedalboard.io.get_supported_read_formats()` to see which
formats or file extensions are supported on the current platform.
formats or file extensions are supported on the current platform. To use
only audio format parsing libraries that are consistent on all platforms, pass
``cross_platform_formats_only=True`` to this constructor.
(Note that although an audio file may have a certain file extension, its
contents may be encoded with a compression algorithm unsupported by
Expand All @@ -516,29 +559,35 @@ inline void init_readable_audio_file(
py::class_<ReadableAudioFile, AudioFile, std::shared_ptr<ReadableAudioFile>>
&pyReadableAudioFile) {
pyReadableAudioFile
.def(py::init([](std::string filename) -> ReadableAudioFile * {
.def(py::init([](std::string filename,
bool crossPlatformFormatsOnly) -> ReadableAudioFile * {
// This definition is only here to provide nice docstrings.
throw std::runtime_error(
"Internal error: __init__ should never be called, as this "
"class implements __new__.");
}),
py::arg("filename"))
.def(py::init([](py::object filelike) -> ReadableAudioFile * {
py::arg("filename"), py::arg("cross_platform_formats_only") = false)
.def(py::init([](py::object filelike,
bool crossPlatformFormatsOnly) -> ReadableAudioFile * {
// This definition is only here to provide nice docstrings.
throw std::runtime_error(
"Internal error: __init__ should never be called, as this "
"class implements __new__.");
}),
py::arg("file_like"))
py::arg("file_like"), py::arg("cross_platform_formats_only") = false)
.def_static(
"__new__",
[](const py::object *, std::string filename) {
return std::make_shared<ReadableAudioFile>(filename);
[](const py::object *, std::string filename,
bool crossPlatformFormatsOnly) {
return std::make_shared<ReadableAudioFile>(
filename, crossPlatformFormatsOnly);
},
py::arg("cls"), py::arg("filename"))
py::arg("cls"), py::arg("filename"),
py::arg("cross_platform_formats_only") = false)
.def_static(
"__new__",
[](const py::object *, py::object filelike) {
[](const py::object *, py::object filelike,
bool crossPlatformFormatsOnly) {
if (!isReadableFileLike(filelike)) {
throw py::type_error(
"Expected either a filename or a file-like object (with "
Expand All @@ -547,9 +596,11 @@ inline void init_readable_audio_file(
}

return std::make_shared<ReadableAudioFile>(
std::make_unique<PythonInputStream>(filelike));
std::make_unique<PythonInputStream>(filelike),
crossPlatformFormatsOnly);
},
py::arg("cls"), py::arg("file_like"))
py::arg("cls"), py::arg("file_like"),
py::arg("cross_platform_formats_only") = false)
.def(
"read", &ReadableAudioFile::read, py::arg("num_frames") = 0,
"Read the given number of frames (samples in each channel) from this "
Expand Down Expand Up @@ -659,31 +710,36 @@ inline void init_readable_audio_file(
"provided `target_sample_rate`, using a constant amount of "
"memory.\n\n*Introduced in v0.6.0.*");

m.def("get_supported_read_formats", []() {
juce::AudioFormatManager manager;
manager.registerBasicFormats();

std::vector<std::string> formatNames(manager.getNumKnownFormats());
juce::StringArray extensions;
for (int i = 0; i < manager.getNumKnownFormats(); i++) {
auto *format = manager.getKnownFormat(i);
extensions.addArray(format->getFileExtensions());
}
m.def(
"get_supported_read_formats",
[](bool crossPlatformFormatsOnly) {
juce::AudioFormatManager manager;
registerPedalboardAudioFormats(manager, false,
crossPlatformFormatsOnly);

std::vector<std::string> formatNames(manager.getNumKnownFormats());
juce::StringArray extensions;
for (int i = 0; i < manager.getNumKnownFormats(); i++) {
auto *format = manager.getKnownFormat(i);
extensions.addArray(format->getFileExtensions());
}

extensions.trim();
extensions.removeEmptyStrings();
extensions.removeDuplicates(true);
extensions.trim();
extensions.removeEmptyStrings();
extensions.removeDuplicates(true);

std::vector<std::string> output;
for (juce::String s : extensions) {
output.push_back(s.toStdString());
}
std::vector<std::string> output;
for (juce::String s : extensions) {
output.push_back(s.toStdString());
}

std::sort(
output.begin(), output.end(),
[](const std::string lhs, const std::string rhs) { return lhs < rhs; });
std::sort(output.begin(), output.end(),
[](const std::string lhs, const std::string rhs) {
return lhs < rhs;
});

return output;
});
return output;
},
py::arg("cross_platform_formats_only") = false);
}
} // namespace Pedalboard
28 changes: 13 additions & 15 deletions pedalboard/io/WriteableAudioFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,12 +248,7 @@ class WriteableAudioFile
"non-zero num_channels.");
}

// Don't use registerBasicFormats(), as it'll register the wrong MP3 format:
formatManager.registerFormat(new juce::WavAudioFormat(), false);
formatManager.registerFormat(new juce::AiffAudioFormat(), false);
formatManager.registerFormat(new juce::FlacAudioFormat(), false);
formatManager.registerFormat(new juce::OggVorbisAudioFormat(), false);
formatManager.registerFormat(new LameMP3AudioFormat(), false);
registerPedalboardAudioFormats(formatManager, true, false);

std::unique_ptr<juce::OutputStream> outputStream;
juce::AudioFormat *format = nullptr;
Expand Down Expand Up @@ -971,14 +966,17 @@ inline void init_writeable_audio_file(
"The strings ``\"best\"``, ``\"worst\"``, ``\"fastest\"``, and "
"``\"slowest\"`` will also work for any codec.");

m.def("get_supported_write_formats", []() {
// JUCE doesn't support writing other formats out-of-the-box on all
// platforms, and there's no easy way to tell which formats are supported
// without attempting to create an AudioFileWriter object - so this list is
// hardcoded for now.
const std::vector<std::string> formats = {".aiff", ".flac", ".ogg", ".wav",
".mp3"};
return formats;
});
m.def(
"get_supported_write_formats",
[](bool crossPlatformFormatsOnly = false) {
// JUCE doesn't support writing other formats out-of-the-box on all
// platforms, and there's no easy way to tell which formats are
// supported without attempting to create an AudioFileWriter object - so
// this list is hardcoded for now.
const std::vector<std::string> formats = {".aiff", ".flac", ".ogg",
".wav", ".mp3"};
return formats;
},
py::arg("cross_platform_formats_only") = false);
}
} // namespace Pedalboard
Loading

0 comments on commit 6c7f7af

Please sign in to comment.