Skip to content

Commit

Permalink
Merge pull request #81 from ceedubs/regex-optimization
Browse files Browse the repository at this point in the history
Update droste to 0.7.0 and support more regex optimizations
  • Loading branch information
ceedubs committed Jul 8, 2019
2 parents aa6d2f8 + 17e6f80 commit 2551e2d
Show file tree
Hide file tree
Showing 19 changed files with 144 additions and 57 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package ceedubs.irrec
package bench

import cats.implicits._
import regex._, Regex._
import java.util.regex.Pattern
import org.openjdk.jmh.annotations.{Benchmark, Scope, State}
Expand Down
2 changes: 2 additions & 0 deletions benchmarks/src/main/scala/RepeatCountRegexBenchmarks.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package ceedubs.irrec
package bench

import regex._, Regex._

import cats.implicits._
import java.util.regex.Pattern
import org.openjdk.jmh.annotations.{Benchmark, Scope, State}

Expand Down
2 changes: 2 additions & 0 deletions benchmarks/src/main/scala/ZeroStarStarABenchmarks.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package ceedubs.irrec
package bench

import regex._, Regex._

import cats.implicits._
import java.util.regex.Pattern
import org.openjdk.jmh.annotations.{Benchmark, Scope, State}

Expand Down
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ val stableVersion = "0.2.1"
val catsVersion = "1.6.1"
val catsCollectionsVersion = "0.8.0"
val scalacheckVersion = "1.13.5"
val drosteVersion = "0.6.0"
val drosteVersion = "0.7.0"
val fastParseVersion = "2.1.0"
val scalaJsDomVersion = "0.9.6"

Expand Down
2 changes: 2 additions & 0 deletions docs/performance.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ inefficientRegex.pprint
```

```scala mdoc:silent
import cats.implicits._

val moreEfficientRegex: Regex[Char] = inefficientRegex.optimize
```

Expand Down
6 changes: 3 additions & 3 deletions regex-gen/src/main/scala/RegexGen.scala
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ import DietGen._
import cats.Order
import cats.collections.{Diet, Discrete, Range}
import cats.implicits._
import qq.droste.{scheme, CoalgebraM}
import qq.droste.data.CoattrF
import qq.droste.data.prelude._
import higherkindness.droste.{scheme, CoalgebraM}
import higherkindness.droste.data.CoattrF
import higherkindness.droste.data.prelude._
import org.scalacheck.{Arbitrary, Gen}, Gen.Choose

object RegexGen {
Expand Down
6 changes: 3 additions & 3 deletions regex-gen/src/main/scala/RegexMatchGen.scala
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ import DietGen.dietMatchingGen
import cats.implicits._
import cats.Order
import cats.collections.{Diet, Discrete, Range}
import qq.droste.{scheme, Algebra}
import qq.droste.data.CoattrF
import qq.droste.data.prelude._
import higherkindness.droste.{scheme, Algebra}
import higherkindness.droste.data.CoattrF
import higherkindness.droste.data.prelude._
import org.scalacheck.Gen, Gen.Choose

object RegexMatchGen {
Expand Down
11 changes: 7 additions & 4 deletions regex-gen/src/main/scala/RegexShrink.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ package ceedubs.irrec
package regex

import cats.collections.{Diet, Discrete, Range}
import qq.droste.{scheme, RAlgebra}
import qq.droste.data.{Coattr, CoattrF}
import qq.droste.data.prelude._
import higherkindness.droste.{scheme, RAlgebra}
import higherkindness.droste.data.{Coattr, CoattrF}
import higherkindness.droste.data.prelude._
import org.scalacheck.Shrink
import cats.{Now, Order}
import cats.implicits._
Expand Down Expand Up @@ -61,7 +61,10 @@ object RegexShrink {

def shrinkDiet[A: Discrete: Order](diet: Diet[A]): Stream[Diet[A]] = {
implicit val rangeShrink: Shrink[Range[A]] = Shrink(shrinkRange(_))
Shrink.shrink(dietRangeList(diet)).map(ranges => ranges.foldMap(Diet.fromRange _))
Shrink
.shrink(dietRangeList(diet))
.filter(_.nonEmpty)
.map(ranges => ranges.foldMap(Diet.fromRange _))
}

// TODO ceedubs add something to cats collections so we don't need this.
Expand Down
8 changes: 4 additions & 4 deletions regex/src/main/scala/Glushkov.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ package regex
import cats.{Functor, Order}
import cats.data.State
import cats.implicits._
import qq.droste.{Algebra, AlgebraM}
import qq.droste.data.prelude._
import qq.droste.data.{Coattr, CoattrF}
import qq.droste.scheme
import higherkindness.droste.{Algebra, AlgebraM}
import higherkindness.droste.data.prelude._
import higherkindness.droste.data.{Coattr, CoattrF}
import higherkindness.droste.scheme
import scala.collection.immutable.{SortedMap, SortedSet}

// a lot of the code in here is based on http:https://luzhuomi.blogspot.com/2012/06/extending-glushkov-nfa-with-sub.html
Expand Down
45 changes: 23 additions & 22 deletions regex/src/main/scala/KleeneOptimization.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,33 @@ package ceedubs.irrec
package regex

import Regex._
import qq.droste.{scheme, Algebra}
import qq.droste.data.{Coattr, CoattrF}, Coattr.Roll
import qq.droste.data.prelude._
import KleeneF._

import higherkindness.droste.{scheme, Algebra}
import higherkindness.droste.data.{Coattr, CoattrF}, Coattr.Roll
import CoattrF.{Roll => RollF}
import higherkindness.droste.data.prelude._

object KleeneOptimization {
def partialOptimizeKleene[A]: PartialFunction[CoattrF[KleeneF, A, Kleene[A]], Kleene[A]] = {
case RollF(Times(Roll(One), x)) => x
case RollF(Times(x, Roll(One))) => x
case RollF(Plus(x, Roll(Zero))) => x
case RollF(Plus(Roll(Zero), x)) => x
case RollF(Star(Roll(Plus(l, Roll(KleeneF.One))))) =>
or(star(l), empty)
case RollF(Star(Roll(Plus(Roll(KleeneF.One), r)))) =>
or(empty, star(r))
case RollF(Star(x @ Roll(One))) => x
case RollF(Star(x @ Roll(Star(_)))) => x
// TODO should be able to use an `Eq` instance and compare for OR of any 2 things that are equal?
case RollF(Plus(x @ Roll(One), Roll(One))) => x
}

def optimizeKleeneAlgebra[A]: Algebra[CoattrF[KleeneF, A, ?], Kleene[A]] = Algebra {
CoattrF.un(_) match {
case l @ Left(_) => Coattr(l)
case r @ Right(k) =>
k match {
case KleeneF.Times(Roll(KleeneF.One), x) => x
case KleeneF.Times(x, Roll(KleeneF.One)) => x
case KleeneF.Plus(x, Roll(KleeneF.Zero)) => x
case KleeneF.Plus(Roll(KleeneF.Zero), x) => x
case KleeneF.Star(Roll(KleeneF.Plus(l, Roll(KleeneF.One)))) =>
or(star(l), empty)
case KleeneF.Star(Roll(KleeneF.Plus(Roll(KleeneF.One), r))) =>
or(empty, star(r))
case KleeneF.Star(x @ Roll(KleeneF.One)) => x
case KleeneF.Star(x @ Roll(KleeneF.Star(_))) => x
// TODO should be able to use an `Eq` instance and compare for OR of any 2 things that are equal?
case KleeneF.Plus(x @ Roll(KleeneF.One), Roll(KleeneF.One)) => x
case _ => Coattr(r)
}
}
val pf = partialOptimizeKleene[A]
val default: CoattrF[KleeneF, A, Kleene[A]] => Kleene[A] = x => Coattr(CoattrF.un(x))
k => pf.applyOrElse(k, default)
}

def optimizeKleene[A]: Kleene[A] => Kleene[A] = scheme.cata(optimizeKleeneAlgebra[A])
Expand Down
12 changes: 12 additions & 0 deletions regex/src/main/scala/Match.scala
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,16 @@ object Match {
def lit[A](a: A): Match[A] = Literal(a)

def wildcard[A]: Match[A] = Wildcard()

// keeping this private for now because we might want to add more match types
// in the future and lose the ability to do this.
private[irrec] def unionMatches[A: Discrete: Order](x: Match[A], y: Match[A]): Match[A] =
(x, y) match {
case (w @ Wildcard(), _) => w
case (_, w @ Wildcard()) => w
case (l @ Literal(x), Literal(y)) => if (x === y) l else MatchSet.allow(Diet.one(x) + y)
case (Literal(x), m: MatchSet[A]) => m.union(MatchSet.allow(Diet.one(x)))
case (m: MatchSet[A], Literal(x)) => m.union(MatchSet.allow(Diet.one(x)))
case (m1: MatchSet[A], m2: MatchSet[A]) => m1 union m2
}
}
2 changes: 1 addition & 1 deletion regex/src/main/scala/Regex.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ package regex
import cats.{Foldable, Order, Reducible}
import cats.collections.{Diet, Discrete, Range}
import cats.implicits._
import qq.droste.data.Coattr
import higherkindness.droste.data.Coattr
import cats.data.NonEmptyList
import ceedubs.irrec.regex.Match.MatchSet

Expand Down
8 changes: 5 additions & 3 deletions regex/src/main/scala/RegexOps.scala
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
package ceedubs.irrec
package regex

import cats.Foldable
import cats.{Foldable, Order}
import java.util.regex.Pattern
import cats.collections.Discrete

final class KleeneOps[A](private val r: Kleene[A]) extends AnyVal {

Expand All @@ -20,13 +21,14 @@ final class KleeneOps[A](private val r: Kleene[A]) extends AnyVal {

def repeat(minInclusive: Int, maxInclusive: Option[Int]): Kleene[A] =
Regex.repeat(minInclusive, maxInclusive, r)

def optimize: Kleene[A] = KleeneOptimization.optimizeKleene(r)
}

final class RegexOps[A](private val r: Regex[A]) extends AnyVal {
def matcher[F[_]](implicit orderingA: Ordering[A], foldableF: Foldable[F]): F[A] => Boolean =
Regex.matcher(r)

def optimize(implicit discreteA: Discrete[A], orderA: Order[A]): Regex[A] =
RegexOptimization.optimizeRegex[A].apply(r)
}

final class CharRegexOps(private val r: Regex[Char]) extends AnyVal {
Expand Down
41 changes: 41 additions & 0 deletions regex/src/main/scala/RegexOptimization.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package ceedubs.irrec
package regex

import Regex._
import Match._
import KleeneF._

import cats.kernel.Order
import higherkindness.droste.{scheme, Algebra}
import higherkindness.droste.data.{Coattr, CoattrF}
import Coattr.{Pure, Roll}
import CoattrF.{Pure => PureF, Roll => RollF}
import higherkindness.droste.data.prelude._
import cats.collections.Discrete

object RegexOptimization {
def partialOptimizeRegex[A: Discrete: Order]
: PartialFunction[CoattrF[KleeneF, Match[A], Regex[A]], Regex[A]] = {
case PureF(MatchSet.Forbid(d)) if d.isEmpty => Regex.wildcard
case PureF(MatchSet.Allow(d)) if d.isEmpty => Regex.impossible
case RollF(Plus(Pure(x), Pure(y))) => matching(Match.unionMatches(x, y))
case RollF(Plus(Pure(x), Roll(Plus(Pure(y), z)))) =>
matching(Match.unionMatches(x, y)) | z
case RollF(Plus(Roll(Plus(Pure(x), y)), Pure(z))) =>
matching(Match.unionMatches(x, z)) | y
case RollF(Plus(Roll(Plus(Pure(l1), l2)), Roll(Plus(Pure(r1), r2)))) =>
matching(Match.unionMatches(l1, r1)) | (l2 | r2)
case RollF(Plus(Roll(Plus(x @ Pure(_), y)), z)) => x | (y | z)
case RollF(Plus(l @ Roll(_), Roll(Plus(r1 @ Pure(_), r2)))) => r1 | (l | r2)
case RollF(Plus(l @ Roll(_), r @ Pure(_))) => r | l
}

def optimizeRegexAlgebra[A: Discrete: Order]: Algebra[CoattrF[KleeneF, Match[A], ?], Regex[A]] =
Algebra[CoattrF[KleeneF, Match[A], ?], Regex[A]] {
val pf = KleeneOptimization.partialOptimizeKleene[Match[A]] orElse partialOptimizeRegex[A]
val default: CoattrF[KleeneF, Match[A], Regex[A]] => Regex[A] = x => Coattr(CoattrF.un(x))
k => pf.applyOrElse(k, default)
}

def optimizeRegex[A: Discrete: Order]: Regex[A] => Regex[A] = scheme.cata(optimizeRegexAlgebra[A])
}
6 changes: 3 additions & 3 deletions regex/src/main/scala/RegexPrettyPrinter.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ package regex

import cats.Eq
import cats.implicits._
import qq.droste.{scheme, Algebra, Gather, RAlgebra}
import qq.droste.data.CoattrF
import qq.droste.data.prelude._
import higherkindness.droste.{scheme, Algebra, Gather, RAlgebra}
import higherkindness.droste.data.CoattrF
import higherkindness.droste.data.prelude._
import cats.collections.Diet

object RegexPrettyPrinter {
Expand Down
2 changes: 1 addition & 1 deletion regex/src/main/scala/package.scala
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package ceedubs.irrec

import qq.droste.data.Coattr
import higherkindness.droste.data.Coattr

package object regex {
type Kleene[A] = Coattr[KleeneF, A]
Expand Down
26 changes: 17 additions & 9 deletions tests/src/test/scala/parse/ParserTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -318,18 +318,25 @@ class ParserTests extends IrrecSuite {
}

test("pretty print parser round trip") {
forAll(genCharRegexAndCandidate) {
case RegexAndCandidate(r, s) =>
val clue = s"regex: (${r.pprint}), candidate: (${s.mkString})"
parseRegex(r.pprint) match {
case Failure(label, _, _) => withClue(clue)(fail(s"parsing failure: $label"))
case Success(parsed, _) =>
sameRegex(parsed, r)
withClue(clue)(r.matcher[Stream].apply(s) should ===(parsed.matcher[Stream].apply(s)))
}
implicit val regexShrink = RegexShrink.shrinkForRegex[Char]
forAll(genStandardRegexChar) { r =>
val clue = s"regex: (${r.pprint})"
parseRegex(r.pprint) match {
case Failure(label, _, _) => withClue(clue)(fail(s"parsing failure: $label"))
case Success(parsed, _) =>
sameRegex(parsed, r)
}
}
}

test("unicode character points") {
val r = lit('陸')
val printed = r.pprint
printed should ===("\\uf9d3")
val r2 = parse("\\uf9d3")
r2.pprint should ===("\\uf9d3")
}

test("regex parsing handles empty strings") {
val expected = Regex.empty[Match[Char]]
val r = parse("")
Expand Down Expand Up @@ -403,6 +410,7 @@ class ParserTests extends IrrecSuite {
// equivalent. For example `Times(x, Times(y, z))` and `Times(Times(x, y), z)`. So we compare
// them by their pretty-printed equivalence. It's not perfect, but in practice it works pretty
// well.
//actual.optimize.pprint should ===(expected.optimize.pprint)
actual.optimize.pprint should ===(expected.optimize.pprint)
}
}
Expand Down
1 change: 0 additions & 1 deletion tests/src/test/scala/regex-gen/RegexShrinkTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ class RegexShrinkTests extends CatsSuite {
test("shrink negative MatchSet") {
val r: Regex[Char] = notInSet(Diet.one('a') + 'b' + 'c')
val expected = List(
Diet.empty[Char],
Diet.one('a') + 'b',
Diet.one('a'),
Diet.one('b') + 'c',
Expand Down
18 changes: 16 additions & 2 deletions tests/src/test/scala/regex/KleeneOptimizationTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package regex

import Regex._
import RegexAndCandidate._
import cats.collections.{Diet, Range}

class KleeneOptimizationTests extends IrrecSuite {
test("an optimized regular expression still produces the same result") {
Expand All @@ -13,15 +14,28 @@ class KleeneOptimizationTests extends IrrecSuite {
}
}

test("optimization changes (empty|r)* to empty|r*") {
test("optimization changes (empty|r)* r*|empty") {
val r = (Regex.empty | lit('b')).star
val optimizedR = r.optimize
optimizedR should be(Regex.empty | lit('b').star)
optimizedR should be(lit('b').star | Regex.empty)
}

test("optimization changes (r|empty)* to r*|empty") {
val r = (lit('b') | Regex.empty).star
val optimizedR = r.optimize
optimizedR should be(lit('b').star | Regex.empty)
}

test("optimization changes a|b to [ab]") {
val r = lit('a') | lit('b')
val optimizedR = r.optimize
optimizedR should be(range('a', 'b'))
}

test("optimization changes [ad]|b|f|[ez] to [a-bd-fz]") {
val r = inSet(Diet.one('a') + 'd') | lit('b') | lit('f') | inSet(Diet.one('e') + 'z')
val optimizedR = r.optimize
val expected = inSet(Diet.fromRange(Range('a', 'b')) + Range('d', 'f') + 'z')
optimizedR should be(expected)
}
}

0 comments on commit 2551e2d

Please sign in to comment.