Improving test coverage.

typelevel · Jan 30, 2018 · dc42bfb · dc42bfb
1 parent bc49c8a
commit dc42bfb
Show file tree

Hide file tree

Showing 5 changed files with 195 additions and 88 deletions.
diff --git a/dataset/src/main/scala/frameless/functions/AggregateFunctions.scala b/dataset/src/main/scala/frameless/functions/AggregateFunctions.scala
@@ -7,15 +7,6 @@ import org.apache.spark.sql.{functions => untyped}
 import frameless.syntax._
 
 trait AggregateFunctions {
-
- /** Creates a [[frameless.TypedColumn]] of literal value. If A is to be encoded using an Injection make
- * sure the injection instance is in scope.
- *
- * apache/spark
- */
- def litAggr[A: TypedEncoder, T](value: A): TypedAggregate[T, A] =
- frameless.functions.lit(value).untyped.typedAggregate
-
  /** Aggregate function: returns the number of items in a group.
  *
  * apache/spark

diff --git a/dataset/src/main/scala/frameless/functions/NonAggregateFunctions.scala b/dataset/src/main/scala/frameless/functions/NonAggregateFunctions.scala
@@ -175,35 +175,38 @@ trait NonAggregateFunctions {
  column.typed(untyped.base64(column.untyped))
 
  /** Non-Aggregate function: Concatenates multiple input string columns together into a single string column.
+ * @note varargs make it harder to generalize so we overload the method for [[TypedColumn]] and [[TypedAggregate]]
  *
  * apache/spark
  */
- def concat[T](c1: TypedColumn[T, String], xs: TypedColumn[T, String]*): TypedColumn[T, String] =
- c1.typed(untyped.concat((c1 +: xs).map(_.untyped): _*))
+ def concat[T](columns: TypedColumn[T, String]*): TypedColumn[T, String] =
+ new TypedColumn(untyped.concat(columns.map(_.untyped): _*))
 
- /** Non-Aggregate function: Concatenates multiple input string columns together into a single string column,
- * using the given separator.
+ /** Non-Aggregate function: Concatenates multiple input string columns together into a single string column.
+ * @note varargs make it harder to generalize so we overload the method for [[TypedColumn]] and [[TypedAggregate]]
  *
  * apache/spark
  */
- def concatWs[T](sep: String, c1: TypedColumn[T, String], xs: TypedColumn[T, String]*): TypedColumn[T, String] =
- c1.typed(untyped.concat_ws(sep, (c1 +: xs).map(_.untyped): _*))
+ def concat[T](columns: TypedAggregate[T, String]*): TypedAggregate[T, String] =
+ new TypedAggregate(untyped.concat(columns.map(_.untyped): _*))
 
- /** Non-Aggregate function: Concatenates multiple input string columns together into a single string column.
+ /** Non-Aggregate function: Concatenates multiple input string columns together into a single string column,
+ * using the given separator.
+ * @note varargs make it harder to generalize so we overload the method for [[TypedColumn]] and [[TypedAggregate]]
  *
  * apache/spark
  */
- def concat[T](c1: TypedAggregate[T, String], xs: TypedAggregate[T, String]*): TypedAggregate[T, String] =
- c1.typed(untyped.concat((c1 +: xs).map(_.untyped): _*))
-
+ def concatWs[T](sep: String, columns: TypedAggregate[T, String]*): TypedAggregate[T, String] =
+ new TypedAggregate(untyped.concat_ws(sep, columns.map(_.untyped): _*))
 
  /** Non-Aggregate function: Concatenates multiple input string columns together into a single string column,
  * using the given separator.
+ * @note varargs make it harder to generalize so we overload the method for [[TypedColumn]] and [[TypedAggregate]]
  *
  * apache/spark
  */
- def concatWs[T](sep: String, c1: TypedAggregate[T, String], xs: TypedAggregate[T, String]*): TypedAggregate[T, String] =
- c1.typed(untyped.concat_ws(sep, (c1 +: xs).map(_.untyped): _*))
+ def concatWs[T](sep: String, columns: TypedColumn[T, String]*): TypedColumn[T, String] =
+ new TypedColumn(untyped.concat_ws(sep, columns.map(_.untyped): _*))
 
  /** Non-Aggregate function: Locates the position of the first occurrence of substring column
  * in given string

diff --git a/dataset/src/main/scala/frameless/functions/package.scala b/dataset/src/main/scala/frameless/functions/package.scala
@@ -7,9 +7,20 @@ package object functions extends Udf with UnaryFunctions {
  object aggregate extends AggregateFunctions
  object nonAggregate extends NonAggregateFunctions
 
+ /** Creates a [[frameless.TypedAggregate]] of literal value. If A is to be encoded using an Injection make
+ * sure the injection instance is in scope.
+ *
+ * apache/spark
+ */
  def litAggr[A: TypedEncoder, T](value: A): TypedAggregate[T, A] =
  new TypedAggregate[T,A](lit(value).expr)
 
+
+ /** Creates a [[frameless.TypedColumn]] of literal value. If A is to be encoded using an Injection make
+ * sure the injection instance is in scope.
+ *
+ * apache/spark
+ */
  def lit[A: TypedEncoder, T](value: A): TypedColumn[T, A] = {
  val encoder = TypedEncoder[A]
 

diff --git a/dataset/src/test/scala/frameless/functions/DoubleBehaviourUtils.scala b/dataset/src/test/scala/frameless/functions/DoubleBehaviourUtils.scala
@@ -1,17 +1,20 @@
-package frameless.functions
+package frameless
+package functions
 
 /**
- * Some statistical functions in Spark can result in Double, Double.NaN or Null. This tends to break ?= of the property based testing.
- * Use the nanNullHandler function here to alleviate this by mapping this NaN and Null to None. This will result in functioning comparison again.
+ * Some statistical functions in Spark can result in Double, Double.NaN or Null.
+ * This tends to break ?= of the property based testing. Use the nanNullHandler function
+ * here to alleviate this by mapping this NaN and Null to None. This will result in
+ * functioning comparison again.
  */
 object DoubleBehaviourUtils {
- // Mapping with this function is needed because spark uses Double.NaN for some semantics in the correlation function. ?= for prop testing will use == underlying and will break because Double.NaN != Double.NaN
+ // Mapping with this function is needed because spark uses Double.NaN for some semantics in the
+ // correlation function. ?= for prop testing will use == underlying and will break because Double.NaN != Double.NaN
  private val nanHandler: Double => Option[Double] = value => if (!value.equals(Double.NaN)) Option(value) else None
  // Making sure that null => None and does not result in 0.0d because of row.getAs[Double]'s use of .asInstanceOf
  val nanNullHandler: Any => Option[Double] = {
  case null => None
  case d: Double => nanHandler(d)
  case _ => ???
  }
-
 }