From 0af192ff8d3874a0bd3ee505be5336158326f587 Mon Sep 17 00:00:00 2001
From: Andrew Kaster <akaster@serenityos.org>
Date: Sun, 30 May 2021 16:12:11 -0600
Subject: [PATCH] AK: Handle LEB128 encoded values that are too large for the
 result type

Previously, we would go crazy and shift things way out of bounds.
Add tests to verify that the decoding algorithm is safe around the
limits of the result type.
---
 AK/LEB128.h             | 40 ++++++++++++++---
 Tests/AK/TestLEB128.cpp | 97 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 130 insertions(+), 7 deletions(-)
diff --git a/AK/LEB128.h b/AK/LEB128.h
index 1b3319909df687..cf195a50cf160d 100644
--- a/AK/LEB128.h
+++ b/AK/LEB128.h
@@ -6,6 +6,7 @@
 
 #pragma once
 
+#include <AK/NumericLimits.h>
 #include <AK/Stream.h>
 #include <AK/Types.h>
 
@@ -29,13 +30,18 @@ struct LEB128 {
                 input_stream.set_fatal_error();
                 return false;
             }
-
             u8 byte = 0;
             input_stream >> byte;
             if (input_stream.has_any_error())
                 return false;
 
-            result = (result) | (static_cast<ValueType>(byte & ~(1 << 7)) << (num_bytes * 7));
+            ValueType masked_byte = byte & ~(1 << 7);
+            const bool shift_too_large_for_result = (num_bytes * 7 > sizeof(ValueType) * 8) && (masked_byte != 0);
+            const bool shift_too_large_for_byte = ((masked_byte << (num_bytes * 7)) >> (num_bytes * 7)) != masked_byte;
+            if (shift_too_large_for_result || shift_too_large_for_byte)
+                return false;
+
+            result = (result) | (masked_byte << (num_bytes * 7));
             if (!(byte & (1 << 7)))
                 break;
             ++num_bytes;
@@ -47,15 +53,18 @@ struct LEB128 {
     template<typename StreamT, typename ValueType = ssize_t>
     static bool read_signed(StreamT& stream, ValueType& result)
     {
-        using UValueType = MakeUnsigned<ValueType>;
+        // Note: We read into a u64 to simplify the parsing logic;
+        //    result is range checked into ValueType after parsing.
+        static_assert(sizeof(ValueType) <= sizeof(u64), "Error checking logic assumes 64 bits or less!");
         [[maybe_unused]] size_t backup_offset = 0;
         if constexpr (requires { stream.offset(); })
             backup_offset = stream.offset();
         InputStream& input_stream { stream };
 
-        result = 0;
+        i64 temp = 0;
         size_t num_bytes = 0;
         u8 byte = 0;
+        result = 0;
 
         do {
             if (input_stream.unreliable_eof()) {
@@ -68,15 +77,32 @@ struct LEB128 {
             input_stream >> byte;
             if (input_stream.has_any_error())
                 return false;
-            result = (result) | (static_cast<UValueType>(byte & ~(1 << 7)) << (num_bytes * 7));
+
+            // note: 64 bit assumptions!
+            u64 masked_byte = byte & ~(1 << 7);
+            const bool shift_too_large_for_result = (num_bytes * 7 >= 64) && (masked_byte != ((temp < 0) ? 0x7Fu : 0u));
+            const bool shift_too_large_for_byte = (num_bytes * 7) == 63 && masked_byte != 0x00 && masked_byte != 0x7Fu;
+
+            if (shift_too_large_for_result || shift_too_large_for_byte)
+                return false;
+
+            temp = (temp) | (masked_byte << (num_bytes * 7));
             ++num_bytes;
         } while (byte & (1 << 7));
 
-        if (num_bytes * 7 < sizeof(UValueType) * 4 && (byte & 0x40)) {
+        if ((num_bytes * 7) < 64 && (byte & 0x40)) {
             // sign extend
-            result |= ((UValueType)(-1) << (num_bytes * 7));
+            temp |= ((u64)(-1) << (num_bytes * 7));
+        }
+
+        // Now that we've accumulated into an i64, make sure it fits into result
+        if constexpr (sizeof(ValueType) < sizeof(u64)) {
+            if (temp > NumericLimits<ValueType>::max() || temp < NumericLimits<ValueType>::min())
+                return false;
         }
 
+        result = static_cast<ValueType>(temp);
+
         return true;
     }
 };
diff --git a/Tests/AK/TestLEB128.cpp b/Tests/AK/TestLEB128.cpp
index 615b3498afc14c..ce5ee65cdeaeaa 100644
--- a/Tests/AK/TestLEB128.cpp
+++ b/Tests/AK/TestLEB128.cpp
@@ -6,6 +6,7 @@
 
 #include <AK/LEB128.h>
 #include <AK/MemoryStream.h>
+#include <AK/NumericLimits.h>
 #include <LibTest/TestCase.h>
 
 TEST_CASE(single_byte)
@@ -113,3 +114,99 @@ TEST_CASE(two_bytes)
         }
     }
 }
+
+TEST_CASE(overflow_sizeof_output_unsigned)
+{
+    u8 u32_max_plus_one[] = { 0x80, 0x80, 0x80, 0x80, 0x10 };
+    {
+        u32 out = 0;
+        InputMemoryStream stream({ u32_max_plus_one, sizeof(u32_max_plus_one) });
+        EXPECT(!LEB128::read_unsigned(stream, out));
+        EXPECT_EQ(out, 0u);
+        EXPECT(!stream.handle_any_error());
+
+        u64 out64 = 0;
+        stream.seek(0);
+        EXPECT(LEB128::read_unsigned(stream, out64));
+        EXPECT_EQ(out64, static_cast<u64>(NumericLimits<u32>::max()) + 1);
+        EXPECT(!stream.handle_any_error());
+    }
+
+    u8 u32_max[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0x0F };
+    {
+        u32 out = 0;
+        InputMemoryStream stream({ u32_max, sizeof(u32_max) });
+        EXPECT(LEB128::read_unsigned(stream, out));
+        EXPECT_EQ(out, NumericLimits<u32>::max());
+        EXPECT(!stream.handle_any_error());
+
+        u64 out64 = 0;
+        stream.seek(0);
+        EXPECT(LEB128::read_unsigned(stream, out64));
+        EXPECT_EQ(out64, NumericLimits<u32>::max());
+        EXPECT(!stream.handle_any_error());
+    }
+}
+
+TEST_CASE(overflow_sizeof_output_signed)
+{
+    u8 i32_max_plus_one[] = { 0x80, 0x80, 0x80, 0x80, 0x08 };
+    {
+        i32 out = 0;
+        InputMemoryStream stream({ i32_max_plus_one, sizeof(i32_max_plus_one) });
+        EXPECT(!LEB128::read_signed(stream, out));
+        EXPECT_EQ(out, 0);
+        EXPECT(!stream.handle_any_error());
+
+        i64 out64 = 0;
+        stream.seek(0);
+        EXPECT(LEB128::read_signed(stream, out64));
+        EXPECT_EQ(out64, static_cast<i64>(NumericLimits<i32>::max()) + 1);
+        EXPECT(!stream.handle_any_error());
+    }
+
+    u8 i32_max[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0x07 };
+    {
+        i32 out = 0;
+        InputMemoryStream stream({ i32_max, sizeof(i32_max) });
+        EXPECT(LEB128::read_signed(stream, out));
+        EXPECT_EQ(out, NumericLimits<i32>::max());
+        EXPECT(!stream.handle_any_error());
+
+        i64 out64 = 0;
+        stream.seek(0);
+        EXPECT(LEB128::read_signed(stream, out64));
+        EXPECT_EQ(out64, NumericLimits<i32>::max());
+        EXPECT(!stream.handle_any_error());
+    }
+
+    u8 i32_min_minus_one[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0x77 };
+    {
+        i32 out = 0;
+        InputMemoryStream stream({ i32_min_minus_one, sizeof(i32_min_minus_one) });
+        EXPECT(!LEB128::read_signed(stream, out));
+        EXPECT_EQ(out, 0);
+        EXPECT(!stream.handle_any_error());
+
+        i64 out64 = 0;
+        stream.seek(0);
+        EXPECT(LEB128::read_signed(stream, out64));
+        EXPECT_EQ(out64, static_cast<i64>(NumericLimits<i32>::min()) - 1);
+        EXPECT(!stream.handle_any_error());
+    }
+
+    u8 i32_min[] = { 0x80, 0x80, 0x80, 0x80, 0x78 };
+    {
+        i32 out = 0;
+        InputMemoryStream stream({ i32_min, sizeof(i32_min) });
+        EXPECT(LEB128::read_signed(stream, out));
+        EXPECT_EQ(out, NumericLimits<i32>::min());
+        EXPECT(!stream.handle_any_error());
+
+        i64 out64 = 0;
+        stream.seek(0);
+        EXPECT(LEB128::read_signed(stream, out64));
+        EXPECT_EQ(out64, NumericLimits<i32>::min());
+        EXPECT(!stream.handle_any_error());
+    }
+}