Skip to content

Commit

Permalink
Improving test coverage.
Browse files Browse the repository at this point in the history
  • Loading branch information
imarios committed Jan 30, 2018
1 parent bc49c8a commit dc42bfb
Show file tree
Hide file tree
Showing 5 changed files with 195 additions and 88 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,6 @@ import org.apache.spark.sql.{functions => untyped}
import frameless.syntax._

trait AggregateFunctions {

/** Creates a [[frameless.TypedColumn]] of literal value. If A is to be encoded using an Injection make
* sure the injection instance is in scope.
*
* apache/spark
*/
def litAggr[A: TypedEncoder, T](value: A): TypedAggregate[T, A] =
frameless.functions.lit(value).untyped.typedAggregate

/** Aggregate function: returns the number of items in a group.
*
* apache/spark
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,35 +175,38 @@ trait NonAggregateFunctions {
column.typed(untyped.base64(column.untyped))

/** Non-Aggregate function: Concatenates multiple input string columns together into a single string column.
* @note varargs make it harder to generalize so we overload the method for [[TypedColumn]] and [[TypedAggregate]]
*
* apache/spark
*/
def concat[T](c1: TypedColumn[T, String], xs: TypedColumn[T, String]*): TypedColumn[T, String] =
c1.typed(untyped.concat((c1 +: xs).map(_.untyped): _*))
def concat[T](columns: TypedColumn[T, String]*): TypedColumn[T, String] =
new TypedColumn(untyped.concat(columns.map(_.untyped): _*))

/** Non-Aggregate function: Concatenates multiple input string columns together into a single string column,
* using the given separator.
/** Non-Aggregate function: Concatenates multiple input string columns together into a single string column.
* @note varargs make it harder to generalize so we overload the method for [[TypedColumn]] and [[TypedAggregate]]
*
* apache/spark
*/
def concatWs[T](sep: String, c1: TypedColumn[T, String], xs: TypedColumn[T, String]*): TypedColumn[T, String] =
c1.typed(untyped.concat_ws(sep, (c1 +: xs).map(_.untyped): _*))
def concat[T](columns: TypedAggregate[T, String]*): TypedAggregate[T, String] =
new TypedAggregate(untyped.concat(columns.map(_.untyped): _*))

/** Non-Aggregate function: Concatenates multiple input string columns together into a single string column.
/** Non-Aggregate function: Concatenates multiple input string columns together into a single string column,
* using the given separator.
* @note varargs make it harder to generalize so we overload the method for [[TypedColumn]] and [[TypedAggregate]]
*
* apache/spark
*/
def concat[T](c1: TypedAggregate[T, String], xs: TypedAggregate[T, String]*): TypedAggregate[T, String] =
c1.typed(untyped.concat((c1 +: xs).map(_.untyped): _*))

def concatWs[T](sep: String, columns: TypedAggregate[T, String]*): TypedAggregate[T, String] =
new TypedAggregate(untyped.concat_ws(sep, columns.map(_.untyped): _*))

/** Non-Aggregate function: Concatenates multiple input string columns together into a single string column,
* using the given separator.
* @note varargs make it harder to generalize so we overload the method for [[TypedColumn]] and [[TypedAggregate]]
*
* apache/spark
*/
def concatWs[T](sep: String, c1: TypedAggregate[T, String], xs: TypedAggregate[T, String]*): TypedAggregate[T, String] =
c1.typed(untyped.concat_ws(sep, (c1 +: xs).map(_.untyped): _*))
def concatWs[T](sep: String, columns: TypedColumn[T, String]*): TypedColumn[T, String] =
new TypedColumn(untyped.concat_ws(sep, columns.map(_.untyped): _*))

/** Non-Aggregate function: Locates the position of the first occurrence of substring column
* in given string
Expand Down
11 changes: 11 additions & 0 deletions dataset/src/main/scala/frameless/functions/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,20 @@ package object functions extends Udf with UnaryFunctions {
object aggregate extends AggregateFunctions
object nonAggregate extends NonAggregateFunctions

/** Creates a [[frameless.TypedAggregate]] of literal value. If A is to be encoded using an Injection make
* sure the injection instance is in scope.
*
* apache/spark
*/
def litAggr[A: TypedEncoder, T](value: A): TypedAggregate[T, A] =
new TypedAggregate[T,A](lit(value).expr)


/** Creates a [[frameless.TypedColumn]] of literal value. If A is to be encoded using an Injection make
* sure the injection instance is in scope.
*
* apache/spark
*/
def lit[A: TypedEncoder, T](value: A): TypedColumn[T, A] = {
val encoder = TypedEncoder[A]

Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
package frameless.functions
package frameless
package functions

/**
* Some statistical functions in Spark can result in Double, Double.NaN or Null. This tends to break ?= of the property based testing.
* Use the nanNullHandler function here to alleviate this by mapping this NaN and Null to None. This will result in functioning comparison again.
* Some statistical functions in Spark can result in Double, Double.NaN or Null.
* This tends to break ?= of the property based testing. Use the nanNullHandler function
* here to alleviate this by mapping this NaN and Null to None. This will result in
* functioning comparison again.
*/
object DoubleBehaviourUtils {
// Mapping with this function is needed because spark uses Double.NaN for some semantics in the correlation function. ?= for prop testing will use == underlying and will break because Double.NaN != Double.NaN
// Mapping with this function is needed because spark uses Double.NaN for some semantics in the
// correlation function. ?= for prop testing will use == underlying and will break because Double.NaN != Double.NaN
private val nanHandler: Double => Option[Double] = value => if (!value.equals(Double.NaN)) Option(value) else None
// Making sure that null => None and does not result in 0.0d because of row.getAs[Double]'s use of .asInstanceOf
val nanNullHandler: Any => Option[Double] = {
case null => None
case d: Double => nanHandler(d)
case _ => ???
}

}
Loading

0 comments on commit dc42bfb

Please sign in to comment.