Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#63. Negative Lookahead Error #64

Open
wants to merge 9 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
#BUG solution approaches overview:

1. Make lookaround Node apply restrictions on surrounding nodes. The implication here is that it might not be known how much nodes it should apply restrictions to.
1. The visitor that collects generated value might verify that inserted value is compliant with restrictions from lookaround nodes, though not clear how to re-generate values with lookbehind pattern.


# Regex: generate matching and non-matching strings

This is a java library that, given a regex pattern, allows to:
Expand Down
7 changes: 7 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
<maven.versions.plugin.version>2.7</maven.versions.plugin.version>

<jacoco.maven.plugin.version>0.8.5</jacoco.maven.plugin.version>
<mockito-core.version>3.12.1</mockito-core.version>
</properties>

<licenses>
Expand Down Expand Up @@ -286,5 +287,11 @@
<version>${jmh.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<version>${mockito-core.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>
42 changes: 40 additions & 2 deletions src/main/java/com/github/curiousoddman/rgxgen/RgxGen.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@

import com.github.curiousoddman.rgxgen.config.RgxGenProperties;
import com.github.curiousoddman.rgxgen.iterators.StringIterator;
import com.github.curiousoddman.rgxgen.iterators.ValidatedIterator;
import com.github.curiousoddman.rgxgen.nodes.Node;
import com.github.curiousoddman.rgxgen.parsing.NodeTreeBuilder;
import com.github.curiousoddman.rgxgen.parsing.dflt.DefaultTreeBuilder;
import com.github.curiousoddman.rgxgen.validation.Validator;
import com.github.curiousoddman.rgxgen.visitors.GenerationVisitor;
import com.github.curiousoddman.rgxgen.visitors.NotMatchingGenerationVisitor;
import com.github.curiousoddman.rgxgen.visitors.UniqueGenerationVisitor;
Expand All @@ -31,13 +33,17 @@
import java.util.Random;
import java.util.stream.Stream;

import static com.github.curiousoddman.rgxgen.config.RgxGenOption.MAX_LOOKAROUND_MATCH_RETRIES;

/**
* String values generator based on regular expression pattern
*/
public class RgxGen {
private static RgxGenProperties aGlobalProperties;

private final Node aNode;
private final Node aNode;
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
private final Optional<Validator> aValidator;

private RgxGenProperties aLocalProperties = aGlobalProperties;

Expand Down Expand Up @@ -69,6 +75,7 @@ public RgxGen(CharSequence pattern) {
*/
public RgxGen(NodeTreeBuilder builder) {
aNode = builder.get();
aValidator = builder.getValidator();
}

/**
Expand Down Expand Up @@ -97,6 +104,9 @@ public void setProperties(RgxGenProperties properties) {
*/
@Deprecated
public BigInteger numUnique() {
if (aValidator.isPresent()) {
return null;
}
UniqueValuesCountingVisitor v = new UniqueValuesCountingVisitor(aLocalProperties);
aNode.visit(v);
return v.getEstimation()
Expand All @@ -111,6 +121,10 @@ public BigInteger numUnique() {
* though actual count is only 5, because right and left part of group can yield same value
*/
public Optional<BigInteger> getUniqueEstimation() {
if (aValidator.isPresent()) {
return Optional.empty();
}

UniqueValuesCountingVisitor v = new UniqueValuesCountingVisitor(aLocalProperties);
aNode.visit(v);
return v.getEstimation();
Expand All @@ -134,7 +148,11 @@ public Stream<String> stream() {
public StringIterator iterateUnique() {
UniqueGenerationVisitor ugv = new UniqueGenerationVisitor(aLocalProperties);
aNode.visit(ugv);
return ugv.getUniqueStrings();
if (aValidator.isPresent()) {
return new ValidatedIterator(aValidator.get(), ugv.getUniqueStrings());
} else {
return ugv.getUniqueStrings();
}
}

/**
Expand All @@ -154,6 +172,26 @@ public String generate() {
* @return generated string.
*/
public String generate(Random random) {
if (aValidator.isPresent()) {
int maxRetries = MAX_LOOKAROUND_MATCH_RETRIES.getIntFromProperties(aLocalProperties);
boolean limitRetries = maxRetries > 0;
int currentRetry = 0;
Validator validator = aValidator.get();
String value;
do {
if (limitRetries && ++currentRetry > maxRetries) {
throw new RuntimeException("Pattern generation takes too much tries.");
}
value = generateImpl(random);
} while (!validator.isValid(value));

return value;
} else {
return generateImpl(random);
}
}

private String generateImpl(Random random) {
GenerationVisitor gv = GenerationVisitor.builder()
.withRandom(random)
.withProperties(aLocalProperties)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,16 @@ public enum RgxGenOption {
*
* @defaultValue false
*/
CASE_INSENSITIVE("matching.case.insensitive", "false");
CASE_INSENSITIVE("matching.case.insensitive", "false"),

/**
* Maximum number to re-try generate value when lookaround patterns are used.
* When this number of retries is exceeded - the exception is thrown.
* Negative value means infinite retries. (WARNING! This might lead to infinite loop)
*
* @defaultValue 1000
*/
MAX_LOOKAROUND_MATCH_RETRIES("max.lookaround.retries", "1000");

private final String aKey;
private final String aDefault;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
public interface StringIterator extends Iterator<String> {
/**
* Reset the iterator to the initial position.
* After reset it will start iterating from the first value.
* After reset, it will start iterating from the first value.
* <p>
* Can be used to restart iterator that returns {@code false} when {@code hasNext()} is called.
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package com.github.curiousoddman.rgxgen.iterators;

/* **************************************************************************
Copyright 2019 Vladislavs Varslavans

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
/* **************************************************************************/

import com.github.curiousoddman.rgxgen.validation.Validator;

import java.util.NoSuchElementException;

public class ValidatedIterator implements StringIterator {
private final Validator aValidator;
private final StringIterator aStringIterator;

private boolean isInitialized = false;
private String aCurrentValue = null;
private boolean hasNext = true;

public ValidatedIterator(Validator validator, StringIterator stringIterator) {
aValidator = validator;
aStringIterator = stringIterator;
}

@Override
public void reset() {
isInitialized = false;
hasNext = true;
aCurrentValue = null;
aStringIterator.reset();
}

@Override
public String current() {
return aCurrentValue;
}

private boolean findNextValid() {
while (aStringIterator.hasNext()) {
String next = aStringIterator.next();
if (aValidator.isValid(next)) {
return true;
}
}

return false;
}

private void initialize() {
isInitialized = true;
boolean hasValid = findNextValid();
if (hasValid) {
aCurrentValue = aStringIterator.current();
hasNext = findNextValid();
}
}

@Override
public boolean hasNext() {
if (isInitialized) {
return hasNext;
} else {
initialize();
return aCurrentValue != null;
}
}

@Override
public String next() {
if (isInitialized) {
aCurrentValue = aStringIterator.current();
hasNext = findNextValid();
} else {
initialize();
if (aCurrentValue == null) {
throw new NoSuchElementException("No texts can be produced by this pattern.");
}
}
return aCurrentValue;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
/* **************************************************************************/

import com.github.curiousoddman.rgxgen.nodes.Node;
import com.github.curiousoddman.rgxgen.validation.Validator;

import java.util.Optional;

/**
* Interface for the parser/nodes builder.
Expand All @@ -27,4 +30,9 @@ public interface NodeTreeBuilder {
* @return Root node for the parsed pattern
*/
Node get();

/**
* @return list of validators that should be applied to determine if the generated text satisfies them all.
*/
Optional<Validator> getValidator();
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import com.github.curiousoddman.rgxgen.nodes.*;
import com.github.curiousoddman.rgxgen.parsing.NodeTreeBuilder;
import com.github.curiousoddman.rgxgen.util.Util;
import com.github.curiousoddman.rgxgen.validation.FullPatternValidator;
import com.github.curiousoddman.rgxgen.validation.Validator;

import java.util.*;

Expand All @@ -35,19 +37,22 @@ public class DefaultTreeBuilder implements NodeTreeBuilder {
private static final ConstantsProvider CONST_PROVIDER = new ConstantsProvider();

private final CharIterator aCharIterator;
private String aPattern;
private final Map<Node, Integer> aNodesStartPos = new IdentityHashMap<>();

private Node aNode;
private int aNextGroupIndex = 1;
private boolean aNeedValidation = false;
private Node aNode;
private int aNextGroupIndex = 1;

/**
* Default implementation of parser and NodeTreeBuilder.
* It reads expression and creates a hierarchy of {@code com.github.curiousoddman.rgxgen.generator.nodes.Node}.
*
* @param expr expression to parse
* @param pattern expression to parse
*/
public DefaultTreeBuilder(String expr) {
aCharIterator = new CharIterator(expr);
public DefaultTreeBuilder(String pattern) {
aCharIterator = new CharIterator(pattern);
aPattern = pattern;
}

/**
Expand Down Expand Up @@ -231,7 +236,7 @@ private Node parseGroup(int groupStartPos, GroupType currentGroupType) {
sbToFinal(sb, nodes);
int intGroupStartPos = aCharIterator.prevPos();
GroupType groupType = processGroupType();
nodes.add(parseGroup(intGroupStartPos, groupType));
handleGroupType(nodes, intGroupStartPos, groupType);
break;

case '|':
Expand Down Expand Up @@ -266,6 +271,39 @@ private Node parseGroup(int groupStartPos, GroupType currentGroupType) {
return handleGroupEndCharacter(groupStartPos, sb, nodes, isChoice, choices, captureGroupIndex, currentGroupType);
}

private void handleGroupType(List<Node> nodes, int intGroupStartPos, GroupType groupType) {
Node newNode;
switch (groupType) {
case NEGATIVE_LOOKAHEAD:
newNode = parseGroup(intGroupStartPos, groupType);
if (aCharIterator.hasNext()) {
// If we have more nodes - we need to validate all generated texts and do not include lookahead node
aNeedValidation = true;
} else {
// If there are no more nodes - means we can just generate something that does not match the pattern.
nodes.add(newNode);
}
break;

case NEGATIVE_LOOKBEHIND:
newNode = parseGroup(intGroupStartPos, groupType);
if (aCharIterator.prevPos() == 3) {
// There is nothing to validate before this node. So we can just generate not matching text.
nodes.add(newNode);
} else {
aNeedValidation = true;
}
break;

case POSITIVE_LOOKBEHIND:
case POSITIVE_LOOKAHEAD:
case CAPTURE_GROUP:
case NON_CAPTURE_GROUP:
nodes.add(parseGroup(intGroupStartPos, groupType));
break;
}
}

private void handleAnySymbolCharacter(Collection<Node> nodes, StringBuilder sb) {
sbToFinal(sb, nodes);
SymbolSet symbolSet = new SymbolSet();
Expand Down Expand Up @@ -674,4 +712,9 @@ public Node get() {
}
return aNode;
}

@Override
public Optional<Validator> getValidator() {
return aNeedValidation ? Optional.of(new FullPatternValidator(aPattern)) : Optional.empty();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package com.github.curiousoddman.rgxgen.validation;

/* **************************************************************************
Copyright 2019 Vladislavs Varslavans

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
/* **************************************************************************/

import java.util.regex.Pattern;

public class FullPatternValidator implements Validator {
private final Pattern aPattern;

public FullPatternValidator(String pattern) {
aPattern = Pattern.compile(pattern);
}

@Override
public boolean isValid(String text) {
return aPattern.matcher(text)
.find();
}
}
Loading