From 280a66ddc3cd9741ec58ab426f1c28da60523812 Mon Sep 17 00:00:00 2001 From: Cody Allen Date: Fri, 1 Feb 2019 16:37:33 -0800 Subject: [PATCH] Fix sporadic failure in rlike test See conversation [here](https://github.com/typelevel/frameless/pull/342#issuecomment-448740999). The `rlike` test was generating random strings and using them as regular expressions. This occasionally caused issues when the strings did not form valid regular expressions (unclosed braces, etc). This PR solves the issue by using the library [irrec](https://github.com/ceedubs/irrec). irrec provides Scalacheck generators for regular expressions and candidate matches for them. Full disclosure: I'm the only maintainer/contributor for irrec, and it should be considered a hobby project. Having said that, it's only being added as a test dependency and it fixes a sporadic build failure, so it may be worth it. @imarios suggested that it would make a good PR [here](https://github.com/typelevel/frameless/pull/342#issuecomment-449084595). Feel free to merge if you like this, but I won't be offended if you don't :) --- build.sbt | 6 ++++-- dataset/src/test/scala/frameless/ColumnTests.scala | 11 +++++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/build.sbt b/build.sbt index 16d55d15..5e1ededf 100644 --- a/build.sbt +++ b/build.sbt @@ -5,6 +5,7 @@ val catsMtlVersion = "0.3.0" val scalatest = "3.0.3" val shapeless = "2.3.2" val scalacheck = "1.14.0" +val irrecVersion = "0.2.0" lazy val root = Project("frameless", file("." + "frameless")).in(file(".")) .aggregate(core, cats, dataset, ml, docs) @@ -40,8 +41,9 @@ lazy val dataset = project .settings(framelessTypedDatasetREPL: _*) .settings(publishSettings: _*) .settings(libraryDependencies ++= Seq( - "org.apache.spark" %% "spark-core" % sparkVersion % "provided", - "org.apache.spark" %% "spark-sql" % sparkVersion % "provided" + "org.apache.spark" %% "spark-core" % sparkVersion % "provided", + "org.apache.spark" %% "spark-sql" % sparkVersion % "provided", + "net.ceedubs" %% "irrec-regex-gen" % irrecVersion % Test )) .dependsOn(core % "test->test;compile->compile") diff --git a/dataset/src/test/scala/frameless/ColumnTests.scala b/dataset/src/test/scala/frameless/ColumnTests.scala index 9854e508..f29eb202 100644 --- a/dataset/src/test/scala/frameless/ColumnTests.scala +++ b/dataset/src/test/scala/frameless/ColumnTests.scala @@ -3,9 +3,11 @@ package frameless import java.time.Instant import org.scalacheck.Prop._ -import org.scalacheck.{Arbitrary, Gen, Prop} +import org.scalacheck.{Arbitrary, Gen, Prop}, Arbitrary.arbitrary import org.scalatest.Matchers._ import shapeless.test.illTyped +import ceedubs.irrec.regex._ +import ceedubs.irrec.regex.CharRegexGen.genCharRegexAndCandidate import scala.math.Ordering.Implicits._ @@ -196,18 +198,19 @@ class ColumnTests extends TypedDatasetSuite { import spark.implicits._ check { - forAll { (a: String, b: String) => + forAll(genCharRegexAndCandidate, arbitrary[String]) { (r, b) => + val a = r.candidate.mkString val ds = TypedDataset.create(X2(a, b) :: Nil) val typedLike = ds - .select(ds('a).rlike(a), ds('b).rlike(a), ds('a).rlike(".*")) + .select(ds('a).rlike(r.r.pprint), ds('b).rlike(r.r.pprint), ds('a).rlike(".*")) .collect() .run() .toList val untypedDs = ds.toDF() val untypedLike = untypedDs - .select(untypedDs("a").rlike(a), untypedDs("b").rlike(a), untypedDs("a").rlike(".*")) + .select(untypedDs("a").rlike(r.r.pprint), untypedDs("b").rlike(r.r.pprint), untypedDs("a").rlike(".*")) .as[(Boolean, Boolean, Boolean)] .collect() .toList