diff --git a/CHANGELOG.md b/CHANGELOG.md
index c57ac416..5cf37eec 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,9 @@
+### Big Data Types v1.2.0
+- New module for Circe (JSON)
+ - Conversion from Circe to other types
+ - New docs
+ - More examples
+
### Big Data Types v1.1.2
- Cassandra
- Fixed minor issues on parser
diff --git a/README.md b/README.md
index ceaf06d0..04aebcbd 100644
--- a/README.md
+++ b/README.md
@@ -14,12 +14,13 @@ Check the [Documentation website](https://data-tools.github.io/big-data-types) t
# Available conversions:
-| From / To |Scala Types |BigQuery |Spark |Cassandra |
-|------------|:----------------:|:----------------:|:----------------:|:----------------:|
-|Scala Types | - |:white_check_mark:|:white_check_mark:|:white_check_mark:|
-|BigQuery | | - |:white_check_mark:|:white_check_mark:|
-|Spark | |:white_check_mark:| - |:white_check_mark:|
-|Cassandra | |:white_check_mark:|:white_check_mark:| - |
+| From / To |Scala Types |BigQuery |Spark |Cassandra | Circe (JSON) |
+|--------------|:----------------:|:----------------:|:----------------:|:----------------:|:------------:|
+| Scala Types | - |:white_check_mark:|:white_check_mark:|:white_check_mark:| |
+| BigQuery | | - |:white_check_mark:|:white_check_mark:| |
+| Spark | |:white_check_mark:| - |:white_check_mark:| |
+| Cassandra | |:white_check_mark:|:white_check_mark:| - | |
+| Circe (JSON) | |:white_check_mark:|:white_check_mark:|:white_check_mark:| |
Versions for Scala ![Scala 2.12](https://img.shields.io/badge/Scala-2.12-red) ,![Scala_2.13](https://img.shields.io/badge/Scala-2.13-red)
diff --git a/build.sbt b/build.sbt
index b0cd8106..afed95e2 100644
--- a/build.sbt
+++ b/build.sbt
@@ -1,5 +1,5 @@
//used to build Sonatype releases
-lazy val versionNumber = "1.1.2"
+lazy val versionNumber = "1.2.0"
lazy val projectName = "big-data-types"
version := versionNumber
name := projectName
@@ -26,7 +26,7 @@ lazy val publishSettings = Seq(
ScmInfo(url("https://github.com/data-tools/big-data-types"), "git@github.com:data-tools/big-data-types.git")
),
developers := List(Developer("JavierMonton", "Javier Monton", "", url("https://github.com/JavierMonton"))),
- licenses := Seq("APL2" -> url("http://www.apache.org/licenses/LICENSE-2.0.txt")),
+ licenses := Seq("APL2" -> url("https://www.apache.org/licenses/LICENSE-2.0.txt")),
publishMavenStyle := true
)
@@ -69,6 +69,13 @@ lazy val cassandraDependencies = Seq(
scalatest % Test
)
+val circeVersion = "0.14.1"
+lazy val jsonCirceDependencies = Seq(
+ "io.circe" %% "circe-core",
+ "io.circe" %% "circe-generic",
+ "io.circe" %% "circe-parser"
+ ).map(_ % circeVersion)
+
lazy val scalatest = "org.scalatest" %% "scalatest" % "3.2.11"
//Project settings
@@ -80,6 +87,7 @@ lazy val root = (project in file("."))
bigquery,
spark,
cassandra,
+ jsonCirce,
examples
)
@@ -135,6 +143,18 @@ lazy val cassandra = (project in file("cassandra"))
)
.dependsOn(core % "test->test;compile->compile")
+lazy val jsonCirce = (project in file("jsoncirce"))
+ .configs(IntegrationTest)
+ .settings(
+ name := projectName + "-circe",
+ publishSettings,
+ scalacOptions ++= scalacCommon,
+ crossScalaVersions := supportedScalaVersions,
+ crossVersionSharedSources,
+ libraryDependencies ++= jsonCirceDependencies
+ )
+ .dependsOn(core % "test->test;compile->compile")
+
// Examples module for testing, with all modules included, not built
lazy val examples = (project in file("examples"))
.settings(
@@ -147,6 +167,7 @@ lazy val examples = (project in file("examples"))
.dependsOn(core % "test->test;compile->compile")
.dependsOn(bigquery % "test->test;compile->compile")
.dependsOn(cassandra % "test->test;compile->compile")
+ .dependsOn(jsonCirce % "test->test;compile->compile")
.settings(
noPublishSettings,
crossScalaVersions := List(scala212, scala213),
diff --git a/core/src/test/scala_2/org/datatools/bigdatatypes/TestTypes.scala b/core/src/test/scala_2/org/datatools/bigdatatypes/TestTypes.scala
index 2fff37ea..6dd6c7e7 100644
--- a/core/src/test/scala_2/org/datatools/bigdatatypes/TestTypes.scala
+++ b/core/src/test/scala_2/org/datatools/bigdatatypes/TestTypes.scala
@@ -1,8 +1,8 @@
package org.datatools.bigdatatypes
-import org.datatools.bigdatatypes.basictypes.SqlType._
-import org.datatools.bigdatatypes.basictypes.SqlTypeMode._
-import org.datatools.bigdatatypes.basictypes._
+import org.datatools.bigdatatypes.basictypes.SqlType.*
+import org.datatools.bigdatatypes.basictypes.SqlTypeMode.*
+import org.datatools.bigdatatypes.basictypes.*
import java.sql.{Date, Timestamp}
diff --git a/docs/Contributing/CreateNewType.md b/docs/Contributing/CreateNewType.md
index b9b11d79..a77c1413 100644
--- a/docs/Contributing/CreateNewType.md
+++ b/docs/Contributing/CreateNewType.md
@@ -8,19 +8,19 @@ This is a guide on how to add a new type to the library
- [How to develop a new type](#how-to-develop-a-new-type)
- [How it works](#how-it-works)
* [SqlType ADT](#sqltype-adt)
- * [Conversion / Reverse Conversion](#conversion---reverse-conversion)
+ * [Conversion / Reverse Conversion](#conversion--reverse-conversion)
+ [Conversion](#conversion)
+ [Reverse Conversion](#reverse-conversion)
- [How to do it](#how-to-do-it)
* [Create a new subproject in SBT](#create-a-new-subproject-in-sbt)
- * [Conversion: Type Class - SqlType to New Type](#conversion--type-class---sqltype-to-new-type)
+ * [Conversion: Type Class - SqlType to New Type](#conversion-type-class---sqltype-to-new-type)
+ [Defining the syntax](#defining-the-syntax)
+ [Implementing the Type Class](#implementing-the-type-class)
- [Mode inside Types](#mode-inside-types)
+ [Everything together](#everything-together)
- * [Conversion: SqlInstance to New Type](#conversion--sqlinstance-to-new-type)
- * [Reverse conversion: New Type to SqlType](#reverse-conversion--new-type-to-sqltype)
- * [Everything together](#everything-together-1)
+ * [Conversion: SqlInstance to New Type](#conversion-sqlinstance-to-new-type)
+ * [Reverse conversion: New Type to SqlType](#reverse-conversion-new-type-to-sqltype)
+ * [Everything together](#everything-together)
## How to develop a new type
@@ -78,7 +78,7 @@ an existing _Type Class_ called `SqlTypeConversion`
By doing this, we will get automatically conversion to the rest of the types of the library
-# How to do it
+## How to do it
As covered in [Conversion](#conversion), we have to implement 2 types classes, one for types, another for instances.
Both will derive `SqlTypeConversion` type class into our specific type and by doing so, we will get automatically all conversions into our new type
@@ -123,6 +123,81 @@ lazy val root = (project in file("."))
Now, you can create a new root folder with your type name with the typical structure (src/main/scala_ ...)
+## Preparing Tests
+:::Note
+You can develop the conversion before tests, but we recommend to create a set of test before starting to develop a new type,
+it helps a lot to understand your new type and how it is being created.
+Sometimes a type is not as easy as it seems.
+:::
+
+In the `core` module of the library there are some case classes that should cover all the different scenarios
+ (different types, lists, objects, deep nested objects) so the testing part will consist on:
+- 1 - Create instances of your new types
+- 2 - Pick the already defined [Test Case Classes](../../core/src/test/scala_2/org/datatools/bigdatatypes/TestTypes.scala)
+- Test that 1 can be converted into 2
+- Test that 2 can be converted into 1
+
+:::tip
+You will need to understand the following about your new type:
+- How types are being created
+- How nullable fields works (with Optional types, nullable parameters ...)
+- How lists and nested objects works (if they exist)
+:::
+
+To do so, first, create a new `test/scala` folder with `org.datatools.bigdatatypes` and create an object like `MyTypeTestTypes`
+
+See the example of Spark Types:
+```scala
+object SparkTestTypes {
+
+ val basicFields: Seq[StructField] =
+ List(
+ StructField("myInt", IntegerType, nullable = false),
+ StructField("myLong", LongType, nullable = false),
+ StructField("myFloat", FloatType, nullable = false),
+ StructField("myDouble", DoubleType, nullable = false),
+ StructField("myDecimal", DataTypes.createDecimalType, nullable = false),
+ StructField("myBoolean", BooleanType, nullable = false),
+ StructField("myString", StringType, nullable = false)
+ )
+ val basicWithList: Seq[StructField] =
+ List(
+ StructField("myInt", IntegerType, nullable = false),
+ StructField("myList", ArrayType(IntegerType), nullable = true)
+ )
+// ...
+}
+```
+Create a new package for your tests called `myType` and add there a new class for each conversion.
+
+### Tests for reverse conversion
+From our type to the generic one
+
+Create a file called `MyTypeConversionSpec` and add there some tests. You can add the following tests:
+- Simple individual type
+- Product type (case class / object)
+- Lists
+- Nested objects
+- Some extra tests for extension methods (syntactic sugars like `.asSqlType` or `.asBigQuery` in normal conversion)
+
+e.g. from Spark:
+```scala
+class SparkTypeConversionSpec extends UnitSpec {
+
+ "Simple Spark DataType" should "be converted into SqlType" in {
+ SqlTypeConversion[IntegerType].getType shouldBe SqlInt()
+ }
+
+ "StructField nullable" should "be converted into Nullable SqlType" in {
+ val sf = StructField("myInt", IntegerType, nullable = true)
+ sf.asSqlType shouldBe SqlInt(Nullable)
+ SqlInstanceConversion[StructField].getType(sf) shouldBe SqlInt(Nullable)
+ }
+ // ...
+}
+```
+
+
## Conversion: Type Class - SqlType to New Type
### Defining the syntax
@@ -176,7 +251,9 @@ As the types usually can be recursive (nested objects) we can start defining a m
getSchemaWithName(f.transformKey(name, sqlType), sqlType) :: getSchema(SqlStruct(records, mode))
}
```
-**_Note:_** this method probably could be copied, changing only the return type for our type
+:::tip
+This method probably could be copied, changing only the return type for our type. You will create `getSchemaWithName` right now
+:::
And another method (`getSchemaWithName` in this example) to specify the specific types:
In this case, we are showing an example from BigQuery as it seems simpler to understand:
@@ -190,8 +267,13 @@ In this case, we are showing an example from BigQuery as it seems simpler to und
Field.newBuilder(name, StandardSQLTypeName.INT64).setMode(sqlModeToBigQueryMode(mode)).build()
case SqlFloat(mode) =>
Field.newBuilder(name, StandardSQLTypeName.FLOAT64).setMode(sqlModeToBigQueryMode(mode)).build()
- ...
- ...
+ case SqlDouble(mode) => ???
+ case SqlDecimal(mode) => ???
+ case SqlBool(mode) => ???
+ case SqlString(mode) => ???
+ case SqlTimestamp(mode) => ???
+ case SqlDate(mode) => ???
+ case SqlStruct(subType, mode) => ???
}
```
Same example from Spark:
@@ -377,6 +459,9 @@ object SparkTypeConversion {
implicit val longType: SqlTypeConversion[LongType] = SqlTypeConversion.instance(SqlLong())
implicit val doubleType: SqlTypeConversion[DoubleType] = SqlTypeConversion.instance(SqlDouble())
```
+:::tip
+You can copy&paste all the available types from others modules like the [Spark one](../../spark/src/main/scala/org/datatools/bigdatatypes/spark/SparkTypeConversion.scala)
+:::
- Probably we use an instance of our type, for example, in Spark, we have `StructField` and `StructType` as instances, so we cover them using `SqlInstanceConversion` _Type Class_. In Cassandra we use internally a tuple `(String, DataType)`, and it also works
diff --git a/docs/Modules/Circe.md b/docs/Modules/Circe.md
new file mode 100644
index 00000000..c5a8a301
--- /dev/null
+++ b/docs/Modules/Circe.md
@@ -0,0 +1,68 @@
+---
+sidebar_position: 6
+---
+# Circe (JSON)
+
+[Circe](https://circe.github.io/circe/) is a JSON library for Scala.
+
+The Circe module of this library allows to convert `Json` objects (from Circe) to any other type in the library.
+:::caution
+For now only conversions from Circe to other types are available. Other types to Circe are not ready yet.
+:::
+
+:::info
+Json objects do not have very concrete types, meaning that `number` is a type,
+but more specific types like `integer`, `float` or others do not exists.
+Because of that, any conversion between types will convert `number` into `Decimal` types,
+as `Decimal` is the only one that can ensure the precision of any arbitrary number
+:::
+About Circe and private types
+Circe has more specific types than `JNumber`, like `JLong`, `JDouble` and other, +but all of them are private to Circe itself, so we can not use them, not even for matching types during conversions. +In any case, even if we were able to use them, when parsing a JSON string (probably most of the cases) +we can not detect the specific types +
+ +```scala +import io.Circe.Json +import org.datatools.bigdatatypes.circe.CirceTypeConversion.* +import org.datatools.bigdatatypes.formats.Formats.implicitDefaultFormats +import org.datatools.bigdatatypes.spark.SqlInstanceToSpark.InstanceSyntax +import org.datatools.bigdatatypes.bigquery.SqlInstanceToBigQuery.* + + val circeJson: Json = Json.fromFields(List( + ("id", Json.fromString("test")), + ("foo", Json.fromString("test")), + ("bar", Json.fromInt(1)) + )) + + val sparkSchema: StructType = circeJson.asSparkSchema + val bqSchema: Schema = circeJson.asBigQuery.schema +``` + +Or if you do it from a JSON parsed using Circe: +```scala +import io.circe._, io.circe.parser._ + +import org.datatools.bigdatatypes.circe.CirceTypeConversion.* +import org.datatools.bigdatatypes.formats.Formats.implicitDefaultFormats +import org.datatools.bigdatatypes.spark.SqlInstanceToSpark.InstanceSyntax +import org.datatools.bigdatatypes.bigquery.SqlInstanceToBigQuery.* + +val rawJson: String = """ +{ + "foo": "bar", + "baz": 123, + "list of stuff": [ 4, 5, 6 ] +} +""" +val parseResult = parse(rawJson) +// parseResult: Either[ParsingFailure, Json] +val sparkSchema = parseResult.map(j => j.asSparkSchema) +// sparkSchema: Either[ParsingFailure, StructType] +val bqSchema = parseResult.map(j => j.asBigQuery.schema) +// bqSchema: Either[ParsingFailure, Schema] +``` + +--- diff --git a/docs/intro.md b/docs/intro.md index 909a1853..fbccb59e 100644 --- a/docs/intro.md +++ b/docs/intro.md @@ -33,11 +33,12 @@ or a BigQuery table into a Cassandra table without having code that relates thos ### Available conversions: -| From / To |Scala Types |BigQuery |Spark |Cassandra | -|------------|:----------------:|:----------------:|:----------------:|:----------------:| -|Scala Types | - |:white_check_mark:|:white_check_mark:|:white_check_mark:| -|BigQuery | | - |:white_check_mark:|:white_check_mark:| -|Spark | |:white_check_mark:| - |:white_check_mark:| -|Cassandra | |:white_check_mark:|:white_check_mark:| - | +| From / To |Scala Types |BigQuery |Spark |Cassandra | Circe (JSON) | +|--------------|:----------------:|:----------------:|:----------------:|:----------------:|:------------:| +| Scala Types | - |:white_check_mark:|:white_check_mark:|:white_check_mark:| | +| BigQuery | | - |:white_check_mark:|:white_check_mark:| | +| Spark | |:white_check_mark:| - |:white_check_mark:| | +| Cassandra | |:white_check_mark:|:white_check_mark:| - | | +| Circe (JSON) | |:white_check_mark:|:white_check_mark:|:white_check_mark:| | diff --git a/examples/src/test/scala/org/datatools/bigdatatypes/CassandraToOthers.scala b/examples/src/test/scala/org/datatools/bigdatatypes/CassandraToOthers.scala index 474c881a..72b628a9 100644 --- a/examples/src/test/scala/org/datatools/bigdatatypes/CassandraToOthers.scala +++ b/examples/src/test/scala/org/datatools/bigdatatypes/CassandraToOthers.scala @@ -21,14 +21,18 @@ class CassandraToOthers extends UnitSpec { .withColumn("foo", DataTypes.TEXT) .withColumn("bar", DataTypes.INT) + val fields: List[Field] = List( + Field.newBuilder("id", StandardSQLTypeName.STRING).setMode(Mode.REQUIRED).build(), + Field.newBuilder("foo", StandardSQLTypeName.STRING).setMode(Mode.REQUIRED).build(), + Field.newBuilder("bar", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build() + ) + val bqSchema: Schema = Schema.of(toJava(fields)) + "Cassandra table" should "be converted into BigQuery Schema" in { - val fields = List( - Field.newBuilder("id", StandardSQLTypeName.STRING).setMode(Mode.REQUIRED).build(), - Field.newBuilder("foo", StandardSQLTypeName.STRING).setMode(Mode.REQUIRED).build(), - Field.newBuilder("bar", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build() - ) - val bqSchema = Schema.of(toJava(fields)) - SqlInstanceToBigQuery[CreateTable] + SqlInstanceToBigQuery[CreateTable].bigQueryFields(cassandraTable).schema shouldBe bqSchema + } + + it should "be converted into BigQuery Schema using extension method" in { cassandraTable.asBigQuery.schema shouldBe bqSchema } } diff --git a/examples/src/test/scala/org/datatools/bigdatatypes/CirceToOthers.scala b/examples/src/test/scala/org/datatools/bigdatatypes/CirceToOthers.scala new file mode 100644 index 00000000..89279da8 --- /dev/null +++ b/examples/src/test/scala/org/datatools/bigdatatypes/CirceToOthers.scala @@ -0,0 +1,50 @@ +package org.datatools.bigdatatypes + +import com.google.cloud.bigquery.Field.Mode +import com.google.cloud.bigquery.{Field, Schema, StandardSQLTypeName} +import io.circe.Json +import org.datatools.bigdatatypes.bigquery.JavaConverters.toJava +import org.datatools.bigdatatypes.bigquery.SqlInstanceToBigQuery.{InstanceSchemaSyntax, InstanceSyntax} +import org.datatools.bigdatatypes.cassandra.CassandraTables +import org.datatools.bigdatatypes.cassandra.CassandraTables.AsCassandraInstanceSyntax +import org.datatools.bigdatatypes.circe.CirceTypeConversion.circeJsonType +import org.datatools.bigdatatypes.formats.Formats.implicitDefaultFormats + +class CirceToOthers extends UnitSpec { + + behavior of "Circe types to Cassandra" + + val circeJson: Json = CirceTestTypes.basicTypes + + "Json from Circe" should "be converted into Cassandra Table" in { + CassandraTables.table[Json](CirceTestTypes.basicTypes, "testTable", "myLong").toString shouldBe + "CREATE TABLE testtable (myint decimal,mylong decimal PRIMARY KEY,myfloat decimal,mydouble decimal,mydecimal decimal,myboolean boolean,mystring text)" + } + + it should "be converted into Cassandra Table using extension method" in { + CirceTestTypes.basicTypes.asCassandra("testTable", "myLong").toString shouldBe + "CREATE TABLE testtable (myint decimal,mylong decimal PRIMARY KEY,myfloat decimal,mydouble decimal,mydecimal decimal,myboolean boolean,mystring text)" + } + + behavior of "Circe to BigQuery" + + val fields: List[Field] = List( + Field.newBuilder("id", StandardSQLTypeName.STRING).setMode(Mode.REQUIRED).build(), + Field.newBuilder("foo", StandardSQLTypeName.STRING).setMode(Mode.REQUIRED).build(), + Field.newBuilder("bar", StandardSQLTypeName.NUMERIC).setMode(Mode.REQUIRED).build() + ) + val bqSchema: Schema = Schema.of(toJava(fields)) + + val circe: Json = Json.fromFields( + List( + ("id", Json.fromString("test")), + ("foo", Json.fromString("test")), + ("bar", Json.fromInt(1)) + ) + ) + + it should "be converted into BigQuery Schema" in { + circe.asBigQuery.schema shouldBe bqSchema + } + +} diff --git a/examples/src/test/scala_2/bigdatatypes/CassandraToOthers.scala b/examples/src/test/scala_2/bigdatatypes/CassandraToOthers.scala index d5463078..e4692915 100644 --- a/examples/src/test/scala_2/bigdatatypes/CassandraToOthers.scala +++ b/examples/src/test/scala_2/bigdatatypes/CassandraToOthers.scala @@ -21,7 +21,6 @@ class CassandraToOthers extends UnitSpec { .withColumn("bar", DataTypes.INT) "Cassandra table" should "be converted into Spark Schema" in { - // val sparkSchema: StructType = myDataFrame.schema val sparkSchema: StructType = StructType( List( StructField("id", StringType, nullable = false), diff --git a/examples/src/test/scala_2/bigdatatypes/CirceToOthers.scala b/examples/src/test/scala_2/bigdatatypes/CirceToOthers.scala new file mode 100644 index 00000000..bcba706e --- /dev/null +++ b/examples/src/test/scala_2/bigdatatypes/CirceToOthers.scala @@ -0,0 +1,39 @@ +package bigdatatypes + +import io.circe.Json +import org.apache.spark.sql.types.{DataTypes, StringType, StructField, StructType} +import org.datatools.bigdatatypes.UnitSpec +import org.datatools.bigdatatypes.circe.CirceTypeConversion.circeJsonType +import org.datatools.bigdatatypes.formats.Formats.implicitDefaultFormats +import org.datatools.bigdatatypes.spark.SparkSchemas +import org.datatools.bigdatatypes.spark.SqlInstanceToSpark.InstanceSyntax + +class CirceToOthers extends UnitSpec { + + behavior of "Circe to Spark types" + + val sparkSchema: StructType = StructType( + List( + StructField("id", StringType, nullable = false), + StructField("foo", StringType, nullable = false), + StructField("bar", DataTypes.createDecimalType, nullable = false) + ) + ) + + val circe: Json = Json.fromFields( + List( + ("id", Json.fromString("test")), + ("foo", Json.fromString("test")), + ("bar", Json.fromInt(1)) + ) + ) + + "Circe Json" should "be converted into Spark Schema" in { + SparkSchemas.schema[Json](circe) shouldBe sparkSchema + } + + it should "be converted into Spark Schema using extension method" in { + circe.asSparkSchema shouldBe sparkSchema + } + +} diff --git a/jsoncirce/src/main/scala/org/datatools/bigdatatypes/circe/CirceTypeConversion.scala b/jsoncirce/src/main/scala/org/datatools/bigdatatypes/circe/CirceTypeConversion.scala new file mode 100644 index 00000000..600d1c70 --- /dev/null +++ b/jsoncirce/src/main/scala/org/datatools/bigdatatypes/circe/CirceTypeConversion.scala @@ -0,0 +1,50 @@ +package org.datatools.bigdatatypes.circe + +import io.circe.Json +import org.datatools.bigdatatypes.basictypes.SqlType.* +import org.datatools.bigdatatypes.basictypes.SqlTypeMode.{Repeated, Required} +import org.datatools.bigdatatypes.basictypes.{SqlType, SqlTypeMode} +import org.datatools.bigdatatypes.conversions.SqlInstanceConversion + +import scala.annotation.tailrec + +object CirceTypeConversion { + + /** Circe does not have an implementation of SqlTypeConversion due to its private API. + * We can not detect if a field is JDouble, JLong and so on because they are private, + * so the only way to detect some kind of types is based on their methods `.isNumber`, `isString` and so on + * which are only available if we have an instance of [[Json]] + */ + + /** Implementation of SqlInstanceConversion Type Class */ + implicit val circeJsonType: SqlInstanceConversion[Json] = (value: Json) => convertCirceType(value) + + @tailrec + def convertCirceType(j: Json, repeated: Boolean = false): SqlType = + j match { + case v if v.isArray => convertCirceType(v.asArray.get.apply(0), repeated = true) + case v if v.isNumber => SqlDecimal(isRepeated(repeated)) + case v if v.isString => SqlString(isRepeated(repeated)) + case v if v.isBoolean => SqlBool(isRepeated(repeated)) + case v if v.isObject => + val pairs = v.asObject.get.keys zip v.asObject.get.values + SqlStruct(loopStructs(pairs), isRepeated(repeated)) + } + + /** For recursion, loops over all items in an object + */ + private def loopStructs(l: Iterable[(String, Json)]): List[(String, SqlType)] = + l.map(x => x._1 -> convertCirceType(x._2)).toList + + /** From Boolean to Repeated or Required Mode + */ + private def isRepeated(repeated: Boolean): SqlTypeMode = if (repeated) Repeated else Required + + /** Extension method. Enables val myInstance: Json -> myInstance.asSqlType + * @param value in a Json from Circe + */ + implicit class StructTypeSyntax(value: Json) { + def asSqlType: SqlType = SqlInstanceConversion[Json].getType(value) + } + +} diff --git a/jsoncirce/src/test/scala/org/datatools/bigdatatypes/CirceTestTypes.scala b/jsoncirce/src/test/scala/org/datatools/bigdatatypes/CirceTestTypes.scala new file mode 100644 index 00000000..8edc45a0 --- /dev/null +++ b/jsoncirce/src/test/scala/org/datatools/bigdatatypes/CirceTestTypes.scala @@ -0,0 +1,61 @@ +package org.datatools.bigdatatypes + +import io.circe.Json + +/** Test types from [[TestTypes]] converted to Circe. They can be used in multiple tests + */ +object CirceTestTypes { + + val basicFields: Seq[(String, Json)] = + List( + ("myInt", Json.fromInt(1)), + ("myLong", Json.fromLong(1)), + ("myFloat", Json.fromFloat(1).get), + ("myDouble", Json.fromDouble(1).get), + ("myDecimal", Json.fromBigDecimal(1)), + ("myBoolean", Json.fromBoolean(true)), + ("myString", Json.fromString("")) + ) + + val basicTypes: Json = Json.fromFields(basicFields) + + val basicWithList: Json = Json.fromFields( + List( + ("myInt", Json.fromInt(1)), + ( + "myList", + Json.fromValues( + List( + Json.fromInt(1) + ) + ) + ) + ) + ) + + val basicNested: Json = Json.fromFields( + List( + ("myInt", Json.fromInt(1)), + ("myStruct", Json.fromFields(basicFields)) + ) + ) + + val basicNestedWithList: Json = Json.fromFields( + List( + ( + "matrix", + Json.fromValues( + List( + Json.fromFields( + List( + ("x", Json.fromInt(1)), + ("y", Json.fromInt(1)) + ) + ) + ) + ) + ) + ) + ) + +} diff --git a/jsoncirce/src/test/scala/org/datatools/bigdatatypes/SqlTestTypes.scala b/jsoncirce/src/test/scala/org/datatools/bigdatatypes/SqlTestTypes.scala new file mode 100644 index 00000000..f097cb69 --- /dev/null +++ b/jsoncirce/src/test/scala/org/datatools/bigdatatypes/SqlTestTypes.scala @@ -0,0 +1,82 @@ +package org.datatools.bigdatatypes + +import org.datatools.bigdatatypes.basictypes.* +import org.datatools.bigdatatypes.basictypes.SqlType.* +import org.datatools.bigdatatypes.basictypes.SqlTypeMode.* + +import java.sql.{Date, Timestamp} + +/** Case Classes and their SqlType representations + * This should be used to test SqlTypeConversion and all reverse conversions from other modules + */ +object SqlTestTypes { + + /** Used for case classes, nested or others */ + val basicFields: List[(String, SqlType)] = + List( + ("myInt", SqlDecimal(Required)), + ("myLong", SqlDecimal(Required)), + ("myFloat", SqlDecimal(Required)), + ("myDouble", SqlDecimal(Required)), + ("myDecimal", SqlDecimal(Required)), + ("myBoolean", SqlBool(Required)), + ("myString", SqlString(Required)) + ) + + val basicOption: SqlStruct = SqlStruct( + List( + ("myString", SqlString(Required)), + ("myOptionalString", SqlString(Nullable)) + ) + ) + + val basicTypes: SqlStruct = SqlStruct(basicFields) + + val basicOptionTypes: SqlStruct = SqlStruct( + List( + ("myInt", SqlDecimal(Nullable)), + ("myLong", SqlDecimal(Nullable)), + ("myFloat", SqlDecimal(Nullable)), + ("myDouble", SqlDecimal(Nullable)), + ("myDecimal", SqlDecimal(Nullable)), + ("myBoolean", SqlBool(Nullable)), + ("myString", SqlString(Nullable)) + ) + ) + + val basicWithList: SqlStruct = SqlStruct( + List( + ("myInt", SqlDecimal(Required)), + ("myList", SqlDecimal(Repeated)) + ) + ) + + val basicNested: SqlStruct = SqlStruct( + List( + ("myInt", SqlDecimal(Required)), + ("myStruct", SqlStruct(basicFields, Required)) + ) + ) + + val basicOptionalNested: SqlStruct = SqlStruct( + List( + ("myInt", SqlDecimal(Required)), + ("myStruct", SqlStruct(basicFields, Nullable)) + ) + ) + + val basicNestedWithList: SqlStruct = SqlStruct( + List( + ( + "matrix", + SqlStruct( + List( + ("x", SqlDecimal(Required)), + ("y", SqlDecimal(Required)) + ), + Repeated + ) + ) + ) + ) +} diff --git a/jsoncirce/src/test/scala/org/datatools/bigdatatypes/circe/CirceTypeConversionSpec.scala b/jsoncirce/src/test/scala/org/datatools/bigdatatypes/circe/CirceTypeConversionSpec.scala new file mode 100644 index 00000000..995a5c4e --- /dev/null +++ b/jsoncirce/src/test/scala/org/datatools/bigdatatypes/circe/CirceTypeConversionSpec.scala @@ -0,0 +1,44 @@ +package org.datatools.bigdatatypes.circe + +import io.circe.{Json, JsonObject} +import org.datatools.bigdatatypes.basictypes.* +import org.datatools.bigdatatypes.basictypes.SqlType.* +import org.datatools.bigdatatypes.circe.CirceTypeConversion.* +import org.datatools.bigdatatypes.conversions.SqlInstanceConversion +import org.datatools.bigdatatypes.{CirceTestTypes as C, SqlTestTypes as S, UnitSpec} + +/** Reverse conversion, from Circe types to [[SqlType]]s + */ +class CirceTypeConversionSpec extends UnitSpec { + + "Simple Json Type" should "be converted into SqlType" in { + SqlInstanceConversion[Json].getType(Json.fromString("test")) shouldBe SqlString() + } + + "Simple Json Object" should "be converted into SqlStruct" in { + val j = Json.fromJsonObject(JsonObject(("myInt", Json.fromInt(1)))) + val expected = SqlStruct(List(("myInt", SqlDecimal()))) + SqlInstanceConversion[Json].getType(j) shouldBe expected + } + + "Basic Json fields" should "be converted into Basic SqlTypes" in { + SqlInstanceConversion[Json].getType(C.basicTypes) shouldBe S.basicTypes + } + + "Basic Json fields with Arrays" should "be converted into SqlTypes" in { + SqlInstanceConversion[Json].getType(C.basicWithList) shouldBe S.basicWithList + } + + "Basic Json fields with Nested objects" should "be converted into SqlTypes" in { + SqlInstanceConversion[Json].getType(C.basicNested) shouldBe S.basicNested + } + + "Basic Json fields with Nested Arrays" should "be converted into SqlTypes" in { + SqlInstanceConversion[Json].getType(C.basicNestedWithList) shouldBe S.basicNestedWithList + } + + "Extension method asSqlType" should "convert a Json into SqlTypes" in { + C.basicTypes.asSqlType shouldBe S.basicTypes + } + +} diff --git a/spark/src/test/scala/org/datatools/bigdatatypes/SparkTestTypes.scala b/spark/src/test/scala/org/datatools/bigdatatypes/SparkTestTypes.scala index 8f0a58ac..372c227c 100644 --- a/spark/src/test/scala/org/datatools/bigdatatypes/SparkTestTypes.scala +++ b/spark/src/test/scala/org/datatools/bigdatatypes/SparkTestTypes.scala @@ -15,7 +15,7 @@ import org.apache.spark.sql.types.{ TimestampType } -/** Test types from [[TestTypes]] converted to BigQuery. They can be used in multiple tests +/** Test types from [[TestTypes]] converted to Spark. They can be used in multiple tests */ object SparkTestTypes { @@ -30,7 +30,6 @@ object SparkTestTypes { StructField("myString", StringType, nullable = false) ) - /** BigQuery doesn't have a main object, it has a list of fields in the root path */ val basicTypes: StructType = StructType(basicFields) val basicOptionTypes: Seq[StructField] =