Skip to content

Commit

Permalink
LibCore+Userland: Add 13 more detectable file types
Browse files Browse the repository at this point in the history
This patch adds 13 new detectable file formats, which are as follows in
alphabetical order:
.blend, .isz, ext* filesystem, Lua bytecode, Matroska container, NES
ROM, .pdf, qcow image, .rtf, WebAssembly bytecode, Windows 3.1X/95
compressed archive and raw zlib stream

Some are a tad esoteric, but the more file types we detect, the more
useful this utility becomes! :^)
  • Loading branch information
vkoskiv authored and linusg committed May 24, 2021
1 parent 9a1ac66 commit cb8d0c8
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 19 deletions.
22 changes: 21 additions & 1 deletion Userland/Libraries/LibCore/MimeData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,12 @@ String guess_mime_type_based_on_filename(const StringView& path)
}

#define ENUMERATE_HEADER_CONTENTS \
__ENUMERATE_MIME_TYPE_HEADER(blend, "extra/blender", 0, 7, 'B', 'L', 'E', 'N', 'D', 'E', 'R') \
__ENUMERATE_MIME_TYPE_HEADER(bmp, "image/bmp", 0, 2, 'B', 'M') \
__ENUMERATE_MIME_TYPE_HEADER(bzip2, "application/x-bzip2", 0, 3, 'B', 'Z', 'h') \
__ENUMERATE_MIME_TYPE_HEADER(compressed_iso, "extra/isz", 0, 4, 'I', 's', 'Z', '!') \
__ENUMERATE_MIME_TYPE_HEADER(elf, "extra/elf", 0, 4, 0x7F, 'E', 'L', 'F') \
__ENUMERATE_MIME_TYPE_HEADER(ext, "extra/ext", 0x438, 2, 0x53, 0xEF) \
__ENUMERATE_MIME_TYPE_HEADER(flac, "extra/flac", 0, 4, 0x66, 0x4C, 0x61, 0x43) \
__ENUMERATE_MIME_TYPE_HEADER(gif_87, "image/gif", 0, 6, 'G', 'I', 'F', '8', '7', 'a') \
__ENUMERATE_MIME_TYPE_HEADER(gif_89, "image/gif", 0, 6, 'G', 'I', 'F', '8', '9', 'a') \
Expand All @@ -99,17 +102,34 @@ String guess_mime_type_based_on_filename(const StringView& path)
__ENUMERATE_MIME_TYPE_HEADER(jpeg, "image/jpeg", 0, 4, 0xFF, 0xD8, 0xFF, 0xDB) \
__ENUMERATE_MIME_TYPE_HEADER(jpeg_huh, "image/jpeg", 0, 4, 0xFF, 0xD8, 0xFF, 0xEE) \
__ENUMERATE_MIME_TYPE_HEADER(jpeg_jfif, "image/jpeg", 0, 12, 0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 'J', 'F', 'I', 'F', 0x00, 0x01) \
__ENUMERATE_MIME_TYPE_HEADER(lua_bytecode, "extra/lua-bytecode", 0, 4, 0x1B, 'L', 'u', 'a') \
__ENUMERATE_MIME_TYPE_HEADER(midi, "audio/midi", 0, 4, 0x4D, 0x54, 0x68, 0x64) \
__ENUMERATE_MIME_TYPE_HEADER(mkv, "extra/matroska", 0, 4, 0x1A, 0x45, 0xDF, 0xA3) \
__ENUMERATE_MIME_TYPE_HEADER(nesrom, "extra/nes-rom", 0, 4, 'N', 'E', 'S', 0x1A) \
__ENUMERATE_MIME_TYPE_HEADER(pbm, "image/x-portable-bitmap", 0, 3, 0x50, 0x31, 0x0A) \
__ENUMERATE_MIME_TYPE_HEADER(pdf, "application/pdf", 0, 5, 0x25, 'P', 'D', 'F', 0x2D) \
__ENUMERATE_MIME_TYPE_HEADER(pgm, "image/x-portable-graymap", 0, 3, 0x50, 0x32, 0x0A) \
__ENUMERATE_MIME_TYPE_HEADER(png, "image/png", 0, 8, 0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A) \
__ENUMERATE_MIME_TYPE_HEADER(ppm, "image/x-portable-pixmap", 0, 3, 0x50, 0x33, 0x0A) \
__ENUMERATE_MIME_TYPE_HEADER(qcow, "extra/qcow", 0, 3, 'Q', 'F', 'I') \
__ENUMERATE_MIME_TYPE_HEADER(rtf, "application/rtf", 0, 6, 0x7B, 0x5C, 0x72, 0x74, 0x66, 0x31) \
__ENUMERATE_MIME_TYPE_HEADER(sevenzip, "application/x-7z-compressed", 0, 6, 0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C) \
__ENUMERATE_MIME_TYPE_HEADER(shell, "text/x-shellscript", 0, 10, '#', '!', '/', 'b', 'i', 'n', '/', 's', 'h', '\n') \
__ENUMERATE_MIME_TYPE_HEADER(sqlite, "extra/sqlite", 0, 16, 'S', 'Q', 'L', 'i', 't', 'e', ' ', 'f', 'o', 'r', 'm', 'a', 't', ' ', '3', 0x00) \
__ENUMERATE_MIME_TYPE_HEADER(tar, "application/tar", 0x101, 5, 0x75, 0x73, 0x74, 0x61, 0x72) \
__ENUMERATE_MIME_TYPE_HEADER(tiff, "image/tiff", 0, 4, 'I', 'I', '*', 0x00) \
__ENUMERATE_MIME_TYPE_HEADER(tiff_bigendian, "image/tiff", 0, 4, 'M', 'M', 0x00, '*')
__ENUMERATE_MIME_TYPE_HEADER(tiff_bigendian, "image/tiff", 0, 4, 'M', 'M', 0x00, '*') \
__ENUMERATE_MIME_TYPE_HEADER(wasm, "application/wasm", 0, 4, 0x00, 'a', 's', 'm') \
__ENUMERATE_MIME_TYPE_HEADER(win_31x_archive, "extra/win-31x-compressed", 0, 4, 'K', 'W', 'A', 'J') \
__ENUMERATE_MIME_TYPE_HEADER(win_95_archive, "extra/win-95-compressed", 0, 4, 'S', 'Z', 'D', 'D') \
__ENUMERATE_MIME_TYPE_HEADER(zlib_0, "extra/raw-zlib", 0, 2, 0x78, 0x01) \
__ENUMERATE_MIME_TYPE_HEADER(zlib_1, "extra/raw-zlib", 0, 2, 0x78, 0x5E) \
__ENUMERATE_MIME_TYPE_HEADER(zlib_2, "extra/raw-zlib", 0, 2, 0x78, 0x9C) \
__ENUMERATE_MIME_TYPE_HEADER(zlib_3, "extra/raw-zlib", 0, 2, 0x78, 0xDA) \
__ENUMERATE_MIME_TYPE_HEADER(zlib_4, "extra/raw-zlib", 0, 2, 0x78, 0x20) \
__ENUMERATE_MIME_TYPE_HEADER(zlib_5, "extra/raw-zlib", 0, 2, 0x78, 0x7D) \
__ENUMERATE_MIME_TYPE_HEADER(zlib_6, "extra/raw-zlib", 0, 2, 0x78, 0xBB) \
__ENUMERATE_MIME_TYPE_HEADER(zlib_7, "extra/raw-zlib", 0, 2, 0x78, 0xF9)

#define __ENUMERATE_MIME_TYPE_HEADER(var_name, mime_type, pattern_offset, pattern_size, ...) \
static const u8 var_name##_arr[pattern_size] = { __VA_ARGS__ }; \
Expand Down
49 changes: 31 additions & 18 deletions Userland/Utilities/file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,24 +52,37 @@ static Optional<String> gzip_details(String description, const String& path)
return String::formatted("{}, {}", description, gzip_details.value());
}

#define ENUMERATE_MIME_TYPE_DESCRIPTIONS \
__ENUMERATE_MIME_TYPE_DESCRIPTION("application/gzip", "gzip compressed data", gzip_details) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("application/javascript", "JavaScript source", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("application/json", "JSON data", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("application/tar", "tape archive", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("application/x-7z-compressed", "7-Zip archive", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("audio/midi", "MIDI sound", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("extra/flac", "FLAC audio", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("extra/iso-9660", "ISO 9660 CD/DVD image", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("extra/sqlite", "sqlite database", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("image/bmp", "BMP image data", image_details) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("image/gif", "GIF image data", image_details) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("image/jpeg", "JPEG image data", image_details) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("image/png", "PNG image data", image_details) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("image/x-portable-bitmap", "PBM image data", image_details) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("image/x-portable-graymap", "PGM image data", image_details) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("image/x-portable-pixmap", "PPM image data", image_details) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("text/markdown", "Markdown document", description_only) \
#define ENUMERATE_MIME_TYPE_DESCRIPTIONS \
__ENUMERATE_MIME_TYPE_DESCRIPTION("application/gzip", "gzip compressed data", gzip_details) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("application/javascript", "JavaScript source", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("application/json", "JSON data", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("application/pdf", "PDF document", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("application/rtf", "Rich text file", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("application/tar", "tape archive", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("application/wasm", "WebAssembly bytecode", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("application/x-7z-compressed", "7-Zip archive", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("audio/midi", "MIDI sound", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("extra/blender", "Blender project file", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("extra/ext", "ext filesystem", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("extra/flac", "FLAC audio", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("extra/iso-9660", "ISO 9660 CD/DVD image", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("extra/isz", "Compressed ISO image", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("extra/lua-bytecode", "Lua bytecode", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("extra/matroska", "Matroska container", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("extra/nes-rom", "Nintendo Entertainment System ROM", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("extra/qcow", "qcow file", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("extra/raw-zlib", "raw zlib stream", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("extra/sqlite", "sqlite database", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("extra/win-31x-compressed", "Windows 3.1X compressed file", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("extra/win-95-compressed", "Windows 95 compressed file", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("image/bmp", "BMP image data", image_details) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("image/gif", "GIF image data", image_details) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("image/jpeg", "JPEG image data", image_details) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("image/png", "PNG image data", image_details) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("image/x-portable-bitmap", "PBM image data", image_details) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("image/x-portable-graymap", "PGM image data", image_details) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("image/x-portable-pixmap", "PPM image data", image_details) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("text/markdown", "Markdown document", description_only) \
__ENUMERATE_MIME_TYPE_DESCRIPTION("text/x-shellscript", "POSIX shell script text executable", description_only)

static Optional<String> get_description_from_mime_type(const String& mime, const String& path)
Expand Down

0 comments on commit cb8d0c8

Please sign in to comment.