From 280a66ddc3cd9741ec58ab426f1c28da60523812 Mon Sep 17 00:00:00 2001
From: Cody Allen <ceedubs@gmail.com>
Date: Fri, 1 Feb 2019 16:37:33 -0800
Subject: [PATCH] Fix sporadic failure in rlike test

See conversation [here](https://github.com/typelevel/frameless/pull/342#issuecomment-448740999).

The `rlike` test was generating random strings and using them as regular
expressions. This occasionally caused issues when the strings did not
form valid regular expressions (unclosed braces, etc).

This PR solves the issue by using the library
[irrec](https://github.com/ceedubs/irrec). irrec provides Scalacheck generators
for regular expressions and candidate matches for them. Full disclosure: I'm the
only maintainer/contributor for irrec, and it should be considered a hobby
project. Having said that, it's only being added as a test dependency and it
fixes a sporadic build failure, so it may be worth it. @imarios suggested that
it would make a good PR
[here](https://github.com/typelevel/frameless/pull/342#issuecomment-449084595).

Feel free to merge if you like this, but I won't be offended if you don't :)
---
 build.sbt                                          |  6 ++++--
 dataset/src/test/scala/frameless/ColumnTests.scala | 11 +++++++----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/build.sbt b/build.sbt
index 16d55d15..5e1ededf 100644
--- a/build.sbt
+++ b/build.sbt
@@ -5,6 +5,7 @@ val catsMtlVersion = "0.3.0"
 val scalatest = "3.0.3"
 val shapeless = "2.3.2"
 val scalacheck = "1.14.0"
+val irrecVersion = "0.2.0"
 
 lazy val root = Project("frameless", file("." + "frameless")).in(file("."))
   .aggregate(core, cats, dataset, ml, docs)
@@ -40,8 +41,9 @@ lazy val dataset = project
   .settings(framelessTypedDatasetREPL: _*)
   .settings(publishSettings: _*)
   .settings(libraryDependencies ++= Seq(
-    "org.apache.spark" %% "spark-core" % sparkVersion % "provided",
-    "org.apache.spark" %% "spark-sql"  % sparkVersion % "provided"
+    "org.apache.spark" %% "spark-core"      % sparkVersion % "provided",
+    "org.apache.spark" %% "spark-sql"       % sparkVersion % "provided",
+    "net.ceedubs"      %% "irrec-regex-gen" % irrecVersion % Test
   ))
   .dependsOn(core % "test->test;compile->compile")
 
diff --git a/dataset/src/test/scala/frameless/ColumnTests.scala b/dataset/src/test/scala/frameless/ColumnTests.scala
index 9854e508..f29eb202 100644
--- a/dataset/src/test/scala/frameless/ColumnTests.scala
+++ b/dataset/src/test/scala/frameless/ColumnTests.scala
@@ -3,9 +3,11 @@ package frameless
 import java.time.Instant
 
 import org.scalacheck.Prop._
-import org.scalacheck.{Arbitrary, Gen, Prop}
+import org.scalacheck.{Arbitrary, Gen, Prop}, Arbitrary.arbitrary
 import org.scalatest.Matchers._
 import shapeless.test.illTyped
+import ceedubs.irrec.regex._
+import ceedubs.irrec.regex.CharRegexGen.genCharRegexAndCandidate
 
 import scala.math.Ordering.Implicits._
 
@@ -196,18 +198,19 @@ class ColumnTests extends TypedDatasetSuite {
     import spark.implicits._
 
     check {
-      forAll { (a: String, b: String) =>
+      forAll(genCharRegexAndCandidate, arbitrary[String]) { (r, b) =>
+        val a = r.candidate.mkString
         val ds = TypedDataset.create(X2(a, b) :: Nil)
 
         val typedLike = ds
-          .select(ds('a).rlike(a), ds('b).rlike(a), ds('a).rlike(".*"))
+          .select(ds('a).rlike(r.r.pprint), ds('b).rlike(r.r.pprint), ds('a).rlike(".*"))
           .collect()
           .run()
           .toList
 
         val untypedDs = ds.toDF()
         val untypedLike = untypedDs
-          .select(untypedDs("a").rlike(a), untypedDs("b").rlike(a), untypedDs("a").rlike(".*"))
+          .select(untypedDs("a").rlike(r.r.pprint), untypedDs("b").rlike(r.r.pprint), untypedDs("a").rlike(".*"))
           .as[(Boolean, Boolean, Boolean)]
           .collect()
           .toList