Skip to content

Commit

Permalink
Meta+LibUnicode: Avoid relocations for static unicode data
Browse files Browse the repository at this point in the history
Previously the s_decomposition_mappings variable would refer to other
data in s_decomposition_mappings_data. This would cause thousands of
avoidable relocations at load time.

This saves about 128kB RAM for each process which uses LibUnicode.
  • Loading branch information
gunnarbeutner committed Nov 6, 2022
1 parent fb71df5 commit 2d3567e
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 12 deletions.
24 changes: 17 additions & 7 deletions Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -768,6 +768,13 @@ struct SpecialCasing {
Condition condition { Condition::None };
};
struct CodePointDecompositionRaw {
u32 code_point { 0 };
CompatibilityFormattingTag tag { CompatibilityFormattingTag::Canonical };
size_t decomposition_index { 0 };
size_t decomposition_count { 0 };
};
struct CodePointDecomposition {
u32 code_point { 0 };
CompatibilityFormattingTag tag { CompatibilityFormattingTag::Canonical };
Expand Down Expand Up @@ -947,7 +954,7 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { {
generator.set("tag", mapping->tag);
generator.set("start", String::number(mapping->decomposition_index));
generator.set("size", String::number(mapping->decomposition_size));
generator.append(", CompatibilityFormattingTag::@tag@, Span<u32 const> { s_decomposition_mappings_data.data() + @start@, @size@ } },");
generator.append(", CompatibilityFormattingTag::@tag@, @start@, @size@ },");
} else {
append_list_and_size(data.special_casing_indices, "&s_special_casing[{}]"sv);
generator.append(" },");
Expand All @@ -974,7 +981,7 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { {
append_code_point_mappings("special_case"sv, "SpecialCaseMapping"sv, unicode_data.code_points_with_special_casing, [](auto const& data) { return data.special_casing_indices; });
append_code_point_mappings("abbreviation"sv, "CodePointAbbreviation"sv, unicode_data.code_point_abbreviations.size(), [](auto const& data) { return data.abbreviation; });

append_code_point_mappings("decomposition"sv, "CodePointDecomposition"sv, unicode_data.code_points_with_decomposition_mapping,
append_code_point_mappings("decomposition"sv, "CodePointDecompositionRaw"sv, unicode_data.code_points_with_decomposition_mapping,
[](auto const& data) {
return data.decomposition_mapping;
});
Expand Down Expand Up @@ -1153,17 +1160,20 @@ Optional<StringView> code_point_abbreviation(u32 code_point)
return decode_string(mapping->abbreviation);
}
Optional<CodePointDecomposition const&> code_point_decomposition(u32 code_point)
Optional<CodePointDecomposition const> code_point_decomposition(u32 code_point)
{
auto const* mapping = binary_search(s_decomposition_mappings, code_point, nullptr, CodePointComparator<CodePointDecomposition> {});
auto const* mapping = binary_search(s_decomposition_mappings, code_point, nullptr, CodePointComparator<CodePointDecompositionRaw> {});
if (mapping == nullptr)
return {};
return *mapping;
return CodePointDecomposition { mapping->code_point, mapping->tag, Span<u32 const> { s_decomposition_mappings_data.data() + mapping->decomposition_index, mapping->decomposition_count } };
}
Span<CodePointDecomposition const> code_point_decompositions()
Optional<CodePointDecomposition const> code_point_decomposition_by_index(size_t index)
{
return s_decomposition_mappings;
if (index >= s_decomposition_mappings.size())
return {};
auto const& mapping = s_decomposition_mappings[index];
return CodePointDecomposition { mapping.code_point, mapping.tag, Span<u32 const> { s_decomposition_mappings_data.data() + mapping.decomposition_index, mapping.decomposition_count } };
}
)~~~");

Expand Down
10 changes: 7 additions & 3 deletions Userland/Libraries/LibUnicode/Normalize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@

namespace Unicode {

Optional<CodePointDecomposition const&> __attribute__((weak)) code_point_decomposition(u32) { return {}; }
Span<CodePointDecomposition const> __attribute__((weak)) code_point_decompositions() { return {}; }
Optional<CodePointDecomposition const> __attribute__((weak)) code_point_decomposition(u32) { return {}; }
Optional<CodePointDecomposition const> __attribute__((weak)) code_point_decomposition_by_index(size_t) { return {}; }

NormalizationForm normalization_form_from_string(StringView form)
{
Expand Down Expand Up @@ -122,7 +122,11 @@ static u32 combine_code_points(u32 a, u32 b)
{
Array<u32, 2> const points { a, b };
// FIXME: Do something better than linear search to find reverse mappings.
for (auto const& mapping : Unicode::code_point_decompositions()) {
for (size_t index = 0;; ++index) {
auto mapping_maybe = Unicode::code_point_decomposition_by_index(index);
if (!mapping_maybe.has_value())
break;
auto& mapping = mapping_maybe.value();
if (mapping.tag == CompatibilityFormattingTag::Canonical && mapping.decomposition == points) {
if (code_point_has_property(mapping.code_point, Property::Full_Composition_Exclusion))
continue;
Expand Down
4 changes: 2 additions & 2 deletions Userland/Libraries/LibUnicode/Normalize.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@

namespace Unicode {

Optional<CodePointDecomposition const&> code_point_decomposition(u32 code_point);
Span<CodePointDecomposition const> code_point_decompositions();
Optional<CodePointDecomposition const> code_point_decomposition(u32 code_point);
Optional<CodePointDecomposition const> code_point_decomposition_by_index(size_t index);

enum class NormalizationForm {
NFD,
Expand Down

0 comments on commit 2d3567e

Please sign in to comment.