Fix sporadic failure in rlike test

See conversation [here](typelevel#342 (comment)). The `rlike` test was generating random strings and using them as regular expressions. This occasionally caused issues when the strings did not form valid regular expressions (unclosed braces, etc). This PR solves the issue by using the library [irrec](https://github.com/ceedubs/irrec). irrec provides Scalacheck generators for regular expressions and candidate matches for them. Full disclosure: I'm the only maintainer/contributor for irrec, and it should be considered a hobby project. Having said that, it's only being added as a test dependency and it fixes a sporadic build failure, so it may be worth it. @imarios suggested that it would make a good PR [here](typelevel#342 (comment)). Feel free to merge if you like this, but I won't be offended if you don't :)
KamalKang · Feb 2, 2019 · 280a66d · 280a66d
1 parent 0d52c03
commit 280a66d
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 6 deletions.
diff --git a/build.sbt b/build.sbt
@@ -5,6 +5,7 @@ val catsMtlVersion = "0.3.0"
 val scalatest = "3.0.3"
 val shapeless = "2.3.2"
 val scalacheck = "1.14.0"
+val irrecVersion = "0.2.0"
 
 lazy val root = Project("frameless", file("." + "frameless")).in(file("."))
  .aggregate(core, cats, dataset, ml, docs)
@@ -40,8 +41,9 @@ lazy val dataset = project
  .settings(framelessTypedDatasetREPL: _*)
  .settings(publishSettings: _*)
  .settings(libraryDependencies ++= Seq(
- "org.apache.spark" %% "spark-core" % sparkVersion % "provided",
- "org.apache.spark" %% "spark-sql" % sparkVersion % "provided"
+ "org.apache.spark" %% "spark-core" % sparkVersion % "provided",
+ "org.apache.spark" %% "spark-sql" % sparkVersion % "provided",
+ "net.ceedubs" %% "irrec-regex-gen" % irrecVersion % Test
  ))
  .dependsOn(core % "test->test;compile->compile")
 

diff --git a/dataset/src/test/scala/frameless/ColumnTests.scala b/dataset/src/test/scala/frameless/ColumnTests.scala
@@ -3,9 +3,11 @@ package frameless
 import java.time.Instant
 
 import org.scalacheck.Prop._
-import org.scalacheck.{Arbitrary, Gen, Prop}
+import org.scalacheck.{Arbitrary, Gen, Prop}, Arbitrary.arbitrary
 import org.scalatest.Matchers._
 import shapeless.test.illTyped
+import ceedubs.irrec.regex._
+import ceedubs.irrec.regex.CharRegexGen.genCharRegexAndCandidate
 
 import scala.math.Ordering.Implicits._
 
@@ -196,18 +198,19 @@ class ColumnTests extends TypedDatasetSuite {
  import spark.implicits._
 
  check {
- forAll { (a: String, b: String) =>
+ forAll(genCharRegexAndCandidate, arbitrary[String]) { (r, b) =>
+ val a = r.candidate.mkString
  val ds = TypedDataset.create(X2(a, b) :: Nil)
 
  val typedLike = ds
- .select(ds('a).rlike(a), ds('b).rlike(a), ds('a).rlike(".*"))
+ .select(ds('a).rlike(r.r.pprint), ds('b).rlike(r.r.pprint), ds('a).rlike(".*"))
  .collect()
  .run()
  .toList
 
  val untypedDs = ds.toDF()
  val untypedLike = untypedDs
- .select(untypedDs("a").rlike(a), untypedDs("b").rlike(a), untypedDs("a").rlike(".*"))
+ .select(untypedDs("a").rlike(r.r.pprint), untypedDs("b").rlike(r.r.pprint), untypedDs("a").rlike(".*"))
  .as[(Boolean, Boolean, Boolean)]
  .collect()
  .toList