Skip to content

Commit

Permalink
ICU-22602 Fix stack overflow inside flattenVariables
Browse files Browse the repository at this point in the history
Limit the recursive call of flattenVariables to maximum depth 3500
since Java on my machine throw stack overflow exception around 3900.
  • Loading branch information
FrankYFTang committed Dec 14, 2023
1 parent 99f6be4 commit 19af9e7
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 10 deletions.
16 changes: 13 additions & 3 deletions icu4c/source/common/rbbinode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,17 @@ RBBINode *RBBINode::cloneTree() {
// nested references are handled by cloneTree(), not here.
//
//-------------------------------------------------------------------------
RBBINode *RBBINode::flattenVariables() {
constexpr int kRecursiveDepthLimit = 3500;
RBBINode *RBBINode::flattenVariables(UErrorCode& status, int depth) {
if (U_FAILURE(status)) {
return this;
}
// If the depth of the stack is too deep, we return U_INPUT_TOO_LONG_ERROR
// to avoid stack overflow crash.
if (depth > kRecursiveDepthLimit) {
status = U_INPUT_TOO_LONG_ERROR;
return this;
}
if (fType == varRef) {
RBBINode *retNode = fLeftChild->cloneTree();
if (retNode != nullptr) {
Expand All @@ -251,11 +261,11 @@ RBBINode *RBBINode::flattenVariables() {
}

if (fLeftChild != nullptr) {
fLeftChild = fLeftChild->flattenVariables();
fLeftChild = fLeftChild->flattenVariables(status, depth+1);
fLeftChild->fParent = this;
}
if (fRightChild != nullptr) {
fRightChild = fRightChild->flattenVariables();
fRightChild = fRightChild->flattenVariables(status, depth+1);
fRightChild->fParent = this;
}
return this;
Expand Down
2 changes: 1 addition & 1 deletion icu4c/source/common/rbbinode.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ class RBBINode : public UMemory {
static void NRDeleteNode(RBBINode *node);

RBBINode *cloneTree();
RBBINode *flattenVariables();
RBBINode *flattenVariables(UErrorCode &status, int depth=0);
void flattenSets();
void findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status);

Expand Down
5 changes: 4 additions & 1 deletion icu4c/source/common/rbbitblb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,10 @@ void RBBITableBuilder::buildForwardTable() {
// Walk through the tree, replacing any references to $variables with a copy of the
// parse tree for the substitution expression.
//
fTree = fTree->flattenVariables();
fTree = fTree->flattenVariables(*fStatus, 0);
if (U_FAILURE(*fStatus)) {
return;
}
#ifdef RBBI_DEBUG
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ftree")) {
RBBIDebugPuts("\nParse tree after flattening variable references.");
Expand Down
10 changes: 10 additions & 0 deletions icu4c/source/test/intltest/rbbitst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha
TESTCASE_AUTO(TestBug22581);
TESTCASE_AUTO(TestBug22584);
TESTCASE_AUTO(TestBug22585);
TESTCASE_AUTO(TestBug22602);

#if U_ENABLE_TRACING
TESTCASE_AUTO(TestTraceCreateCharacter);
Expand Down Expand Up @@ -5879,6 +5880,15 @@ void RBBITest::TestBug22585() {
RuleBasedBreakIterator bi2(rule, pe, ec);
}

// Test a long string with a ; in the end will not cause stack overflow.
void RBBITest::TestBug22602() {
UnicodeString rule(25000, (UChar32)'A', 25000-1);
rule.append(u";");
UParseError pe {};
UErrorCode ec {U_ZERO_ERROR};
RuleBasedBreakIterator bi(rule, pe, ec);
}

void RBBITest::TestBug22584() {
// Creating a break iterator from a rule consisting of a very long
// literal input string caused a stack overflow when deleting the
Expand Down
1 change: 1 addition & 0 deletions icu4c/source/test/intltest/rbbitst.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ class RBBITest: public IntlTest {
void TestBug22581();
void TestBug22584();
void TestBug22585();
void TestBug22602();

#if U_ENABLE_TRACING
void TestTraceCreateCharacter();
Expand Down
11 changes: 7 additions & 4 deletions icu4j/main/core/src/main/java/com/ibm/icu/text/RBBINode.java
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,6 @@ RBBINode cloneTree() {
}



//-------------------------------------------------------------------------
//
// flattenVariables Walk a parse tree, replacing any variable
Expand All @@ -195,7 +194,11 @@ RBBINode cloneTree() {
// nested references are handled by cloneTree(), not here.
//
//-------------------------------------------------------------------------
RBBINode flattenVariables() {
static final private int kRecursiveDepthLimit = 3500;
RBBINode flattenVariables(int depth) {
if (depth > kRecursiveDepthLimit) {
throw new IllegalArgumentException("The input is too long");
}
if (fType == varRef) {
RBBINode retNode = fLeftChild.cloneTree();
retNode.fRuleRoot = this.fRuleRoot;
Expand All @@ -204,11 +207,11 @@ RBBINode flattenVariables() {
}

if (fLeftChild != null) {
fLeftChild = fLeftChild.flattenVariables();
fLeftChild = fLeftChild.flattenVariables(depth+1);
fLeftChild.fParent = this;
}
if (fRightChild != null) {
fRightChild = fRightChild.flattenVariables();
fRightChild = fRightChild.flattenVariables(depth+1);
fRightChild.fParent = this;
}
return this;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ void buildForwardTable() {
// Walk through the tree, replacing any references to $variables with a copy of the
// parse tree for the substitution expression.
//
fRB.fTreeRoots[fRootIx] = fRB.fTreeRoots[fRootIx].flattenVariables();
fRB.fTreeRoots[fRootIx] = fRB.fTreeRoots[fRootIx].flattenVariables(0);
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("ftree")>=0) {
System.out.println("Parse tree after flattening variable references.");
fRB.fTreeRoots[fRootIx].printTree(true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import java.text.CharacterIterator;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;

Expand Down Expand Up @@ -984,6 +985,23 @@ public void TestBug22585() {
fail("TestBug22585: Unexpected exception while new RuleBasedBreakIterator() with unpaired low surrogate: " + e);
}
}
@Test
public void TestBug22602() {
try {
char[] charArray = new char[25000];
Arrays.fill(charArray, 'A');
charArray[charArray.length-1] = ';';
String rules = new String(charArray);
RuleBasedBreakIterator bi = new RuleBasedBreakIterator(rules);
fail("TestBug22602: RuleBasedBreakIterator() failed to throw an exception with a long string followed by a ';'.");
}
catch (IllegalArgumentException e) {
// expected exception with a long string followed by a ';'.
}
catch(StackOverflowError e) {
fail("TestBug22602: Unexpected exception while new RuleBasedBreakIterator() with a long string followed by a ';': " + e);
}
}
/* Test preceding(index) and following(index), with semi-random indexes.
* The random indexes are produced in clusters that are relatively closely spaced,
* to increase the occurrences of hits to the internal break cache.
Expand Down

0 comments on commit 19af9e7

Please sign in to comment.