Circe module (data-tools#224)

* improving docs + Circe module in build + small comment improvements * Some conversions from Circe to SqlType + some test types * Circe Conversion working for arrays * Circe docs + examples
scala-steward · Jun 22, 2022 · 7b271fd · 7b271fd
1 parent 3ff1664
commit 7b271fd
Show file tree

Hide file tree

Showing 16 changed files with 546 additions and 36 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,9 @@
+### Big Data Types v1.2.0
+- New module for Circe (JSON)
+ - Conversion from Circe to other types
+ - New docs
+ - More examples
+
 ### Big Data Types v1.1.2
 - Cassandra
  - Fixed minor issues on parser

diff --git a/README.md b/README.md
@@ -14,12 +14,13 @@ Check the [Documentation website](https://data-tools.github.io/big-data-types) t
 
 # Available conversions:
 
-| From / To |Scala Types |BigQuery |Spark |Cassandra |
-|------------|:----------------:|:----------------:|:----------------:|:----------------:|
-|Scala Types | - |:white_check_mark:|:white_check_mark:|:white_check_mark:|
-|BigQuery | | - |:white_check_mark:|:white_check_mark:|
-|Spark | |:white_check_mark:| - |:white_check_mark:|
-|Cassandra | |:white_check_mark:|:white_check_mark:| - |
+| From / To |Scala Types |BigQuery |Spark |Cassandra | Circe (JSON) |
+|--------------|:----------------:|:----------------:|:----------------:|:----------------:|:------------:|
+| Scala Types | - |:white_check_mark:|:white_check_mark:|:white_check_mark:| |
+| BigQuery | | - |:white_check_mark:|:white_check_mark:| |
+| Spark | |:white_check_mark:| - |:white_check_mark:| |
+| Cassandra | |:white_check_mark:|:white_check_mark:| - | |
+| Circe (JSON) | |:white_check_mark:|:white_check_mark:|:white_check_mark:| |
 
 
 Versions for Scala ![Scala 2.12](https://img.shields.io/badge/Scala-2.12-red) ,![Scala_2.13](https://img.shields.io/badge/Scala-2.13-red) 

diff --git a/build.sbt b/build.sbt
@@ -1,5 +1,5 @@
 //used to build Sonatype releases
-lazy val versionNumber = "1.1.2"
+lazy val versionNumber = "1.2.0"
 lazy val projectName = "big-data-types"
 version := versionNumber
 name := projectName
@@ -26,7 +26,7 @@ lazy val publishSettings = Seq(
  ScmInfo(url("https://github.com/data-tools/big-data-types"), "[email protected]:data-tools/big-data-types.git")
  ),
  developers := List(Developer("JavierMonton", "Javier Monton", "", url("https://github.com/JavierMonton"))),
- licenses := Seq("APL2" -> url("http:https://www.apache.org/licenses/LICENSE-2.0.txt")),
+ licenses := Seq("APL2" -> url("https:https://www.apache.org/licenses/LICENSE-2.0.txt")),
  publishMavenStyle := true
 )
 
@@ -69,6 +69,13 @@ lazy val cassandraDependencies = Seq(
  scalatest % Test
 )
 
+val circeVersion = "0.14.1"
+lazy val jsonCirceDependencies = Seq(
+ "io.circe" %% "circe-core",
+ "io.circe" %% "circe-generic",
+ "io.circe" %% "circe-parser"
+ ).map(_ % circeVersion)
+
 lazy val scalatest = "org.scalatest" %% "scalatest" % "3.2.11"
 
 //Project settings
@@ -80,6 +87,7 @@ lazy val root = (project in file("."))
  bigquery,
  spark,
  cassandra,
+ jsonCirce,
  examples
  )
 
@@ -135,6 +143,18 @@ lazy val cassandra = (project in file("cassandra"))
  )
  .dependsOn(core % "test->test;compile->compile")
 
+lazy val jsonCirce = (project in file("jsoncirce"))
+ .configs(IntegrationTest)
+ .settings(
+ name := projectName + "-circe",
+ publishSettings,
+ scalacOptions ++= scalacCommon,
+ crossScalaVersions := supportedScalaVersions,
+ crossVersionSharedSources,
+ libraryDependencies ++= jsonCirceDependencies
+ )
+ .dependsOn(core % "test->test;compile->compile")
+
 // Examples module for testing, with all modules included, not built
 lazy val examples = (project in file("examples"))
  .settings(
@@ -147,6 +167,7 @@ lazy val examples = (project in file("examples"))
  .dependsOn(core % "test->test;compile->compile")
  .dependsOn(bigquery % "test->test;compile->compile")
  .dependsOn(cassandra % "test->test;compile->compile")
+ .dependsOn(jsonCirce % "test->test;compile->compile")
  .settings(
  noPublishSettings,
  crossScalaVersions := List(scala212, scala213),

diff --git a/core/src/test/scala_2/org/datatools/bigdatatypes/TestTypes.scala b/core/src/test/scala_2/org/datatools/bigdatatypes/TestTypes.scala
@@ -1,8 +1,8 @@
 package org.datatools.bigdatatypes
 
-import org.datatools.bigdatatypes.basictypes.SqlType._
-import org.datatools.bigdatatypes.basictypes.SqlTypeMode._
-import org.datatools.bigdatatypes.basictypes._
+import org.datatools.bigdatatypes.basictypes.SqlType.*
+import org.datatools.bigdatatypes.basictypes.SqlTypeMode.*
+import org.datatools.bigdatatypes.basictypes.*
 
 import java.sql.{Date, Timestamp}
 

diff --git a/docs/Contributing/CreateNewType.md b/docs/Contributing/CreateNewType.md
@@ -8,19 +8,19 @@ This is a guide on how to add a new type to the library
 - [How to develop a new type](#how-to-develop-a-new-type)
 - [How it works](#how-it-works)
  * [SqlType ADT](#sqltype-adt)
- * [Conversion / Reverse Conversion](#conversion---reverse-conversion)
+ * [Conversion / Reverse Conversion](#conversion--reverse-conversion)
  + [Conversion](#conversion)
  + [Reverse Conversion](#reverse-conversion)
 - [How to do it](#how-to-do-it)
  * [Create a new subproject in SBT](#create-a-new-subproject-in-sbt)
- * [Conversion: Type Class - SqlType to New Type](#conversion--type-class---sqltype-to-new-type)
+ * [Conversion: Type Class - SqlType to New Type](#conversion-type-class---sqltype-to-new-type)
  + [Defining the syntax](#defining-the-syntax)
  + [Implementing the Type Class](#implementing-the-type-class)
  - [Mode inside Types](#mode-inside-types)
  + [Everything together](#everything-together)
- * [Conversion: SqlInstance to New Type](#conversion--sqlinstance-to-new-type)
- * [Reverse conversion: New Type to SqlType](#reverse-conversion--new-type-to-sqltype)
- * [Everything together](#everything-together-1)
+ * [Conversion: SqlInstance to New Type](#conversion-sqlinstance-to-new-type)
+ * [Reverse conversion: New Type to SqlType](#reverse-conversion-new-type-to-sqltype)
+ * [Everything together](#everything-together)
 
 
 ## How to develop a new type
@@ -78,7 +78,7 @@ an existing _Type Class_ called `SqlTypeConversion`
 By doing this, we will get automatically conversion to the rest of the types of the library
 
 
-# How to do it
+## How to do it
 
 As covered in [Conversion](#conversion), we have to implement 2 types classes, one for types, another for instances.
 Both will derive `SqlTypeConversion` type class into our specific type and by doing so, we will get automatically all conversions into our new type
@@ -123,6 +123,81 @@ lazy val root = (project in file("."))
 
 Now, you can create a new root folder with your type name with the typical structure (src/main/scala_ ...)
 
+## Preparing Tests
+:::Note
+You can develop the conversion before tests, but we recommend to create a set of test before starting to develop a new type,
+it helps a lot to understand your new type and how it is being created. 
+Sometimes a type is not as easy as it seems.
+:::
+
+In the `core` module of the library there are some case classes that should cover all the different scenarios
+ (different types, lists, objects, deep nested objects) so the testing part will consist on:
+- 1 - Create instances of your new types
+- 2 - Pick the already defined [Test Case Classes](../../core/src/test/scala_2/org/datatools/bigdatatypes/TestTypes.scala)
+- Test that 1 can be converted into 2
+- Test that 2 can be converted into 1
+
+:::tip
+You will need to understand the following about your new type:
+- How types are being created
+- How nullable fields works (with Optional types, nullable parameters ...)
+- How lists and nested objects works (if they exist)
+:::
+
+To do so, first, create a new `test/scala` folder with `org.datatools.bigdatatypes` and create an object like `MyTypeTestTypes`
+
+See the example of Spark Types:
+```scala
+object SparkTestTypes {
+
+ val basicFields: Seq[StructField] =
+ List(
+ StructField("myInt", IntegerType, nullable = false),
+ StructField("myLong", LongType, nullable = false),
+ StructField("myFloat", FloatType, nullable = false),
+ StructField("myDouble", DoubleType, nullable = false),
+ StructField("myDecimal", DataTypes.createDecimalType, nullable = false),
+ StructField("myBoolean", BooleanType, nullable = false),
+ StructField("myString", StringType, nullable = false)
+ )
+ val basicWithList: Seq[StructField] =
+ List(
+ StructField("myInt", IntegerType, nullable = false),
+ StructField("myList", ArrayType(IntegerType), nullable = true)
+ )
+// ...
+}
+```
+Create a new package for your tests called `myType` and add there a new class for each conversion. 
+
+### Tests for reverse conversion
+From our type to the generic one
+
+Create a file called `MyTypeConversionSpec` and add there some tests. You can add the following tests:
+- Simple individual type
+- Product type (case class / object)
+- Lists
+- Nested objects
+- Some extra tests for extension methods (syntactic sugars like `.asSqlType` or `.asBigQuery` in normal conversion)
+
+e.g. from Spark:
+```scala
+class SparkTypeConversionSpec extends UnitSpec {
+
+ "Simple Spark DataType" should "be converted into SqlType" in {
+ SqlTypeConversion[IntegerType].getType shouldBe SqlInt()
+ }
+
+ "StructField nullable" should "be converted into Nullable SqlType" in {
+ val sf = StructField("myInt", IntegerType, nullable = true)
+ sf.asSqlType shouldBe SqlInt(Nullable)
+ SqlInstanceConversion[StructField].getType(sf) shouldBe SqlInt(Nullable)
+ }
+ // ...
+}
+```
+
+
 ## Conversion: Type Class - SqlType to New Type
 
 ### Defining the syntax
@@ -176,7 +251,9 @@ As the types usually can be recursive (nested objects) we can start defining a m
  getSchemaWithName(f.transformKey(name, sqlType), sqlType) :: getSchema(SqlStruct(records, mode))
  }
 ```
-**_Note:_** this method probably could be copied, changing only the return type for our type
+:::tip
+This method probably could be copied, changing only the return type for our type. You will create `getSchemaWithName` right now 
+:::
 
 And another method (`getSchemaWithName` in this example) to specify the specific types:
 In this case, we are showing an example from BigQuery as it seems simpler to understand:
@@ -190,8 +267,13 @@ In this case, we are showing an example from BigQuery as it seems simpler to und
  Field.newBuilder(name, StandardSQLTypeName.INT64).setMode(sqlModeToBigQueryMode(mode)).build()
  case SqlFloat(mode) =>
  Field.newBuilder(name, StandardSQLTypeName.FLOAT64).setMode(sqlModeToBigQueryMode(mode)).build()
- ...
- ...
+ case SqlDouble(mode) => ???
+ case SqlDecimal(mode) => ???
+ case SqlBool(mode) => ???
+ case SqlString(mode) => ???
+ case SqlTimestamp(mode) => ???
+ case SqlDate(mode) => ???
+ case SqlStruct(subType, mode) => ???
 }
 ```
 Same example from Spark:
@@ -377,6 +459,9 @@ object SparkTypeConversion {
  implicit val longType: SqlTypeConversion[LongType] = SqlTypeConversion.instance(SqlLong())
  implicit val doubleType: SqlTypeConversion[DoubleType] = SqlTypeConversion.instance(SqlDouble())
 ```
+:::tip
+You can copy&paste all the available types from others modules like the [Spark one](../../spark/src/main/scala/org/datatools/bigdatatypes/spark/SparkTypeConversion.scala)
+:::
 
 - Probably we use an instance of our type, for example, in Spark, we have `StructField` and `StructType` as instances, so we cover them using `SqlInstanceConversion` _Type Class_. In Cassandra we use internally a tuple `(String, DataType)`, and it also works
 

diff --git a/docs/Modules/Circe.md b/docs/Modules/Circe.md
@@ -0,0 +1,68 @@
+---
+sidebar_position: 6
+---
+# Circe (JSON)
+
+[Circe](https://circe.github.io/circe/) is a JSON library for Scala.
+
+The Circe module of this library allows to convert `Json` objects (from Circe) to any other type in the library.
+:::caution
+For now only conversions from Circe to other types are available. Other types to Circe are not ready yet.
+:::
+
+:::info
+Json objects do not have very concrete types, meaning that `number` is a type, 
+but more specific types like `integer`, `float` or others do not exists. 
+Because of that, any conversion between types will convert `number` into `Decimal` types, 
+as `Decimal` is the only one that can ensure the precision of any arbitrary number 
+:::
+<details><summary>About Circe and private types</summary></details>
+<p>
+Circe has more specific types than `JNumber`, like `JLong`, `JDouble` and other, 
+but all of them are private to Circe itself, so we can not use them, not even for matching types during conversions. 
+In any case, even if we were able to use them, when parsing a JSON string (probably most of the cases) 
+we can not detect the specific types
+</p>
+
+```scala
+import io.Circe.Json
+import org.datatools.bigdatatypes.circe.CirceTypeConversion.*
+import org.datatools.bigdatatypes.formats.Formats.implicitDefaultFormats
+import org.datatools.bigdatatypes.spark.SqlInstanceToSpark.InstanceSyntax
+import org.datatools.bigdatatypes.bigquery.SqlInstanceToBigQuery.*
+
+ val circeJson: Json = Json.fromFields(List(
+ ("id", Json.fromString("test")),
+ ("foo", Json.fromString("test")),
+ ("bar", Json.fromInt(1))
+ ))
+
+ val sparkSchema: StructType = circeJson.asSparkSchema
+ val bqSchema: Schema = circeJson.asBigQuery.schema
+```
+
+Or if you do it from a JSON parsed using Circe:
+```scala
+import io.circe._, io.circe.parser._
+
+import org.datatools.bigdatatypes.circe.CirceTypeConversion.*
+import org.datatools.bigdatatypes.formats.Formats.implicitDefaultFormats
+import org.datatools.bigdatatypes.spark.SqlInstanceToSpark.InstanceSyntax
+import org.datatools.bigdatatypes.bigquery.SqlInstanceToBigQuery.*
+
+val rawJson: String = """
+{
+ "foo": "bar",
+ "baz": 123,
+ "list of stuff": [ 4, 5, 6 ]
+}
+"""
+val parseResult = parse(rawJson)
+// parseResult: Either[ParsingFailure, Json]
+val sparkSchema = parseResult.map(j => j.asSparkSchema)
+// sparkSchema: Either[ParsingFailure, StructType]
+val bqSchema = parseResult.map(j => j.asBigQuery.schema)
+// bqSchema: Either[ParsingFailure, Schema]
+```
+
+---
diff --git a/docs/intro.md b/docs/intro.md
@@ -33,11 +33,12 @@ or a BigQuery table into a Cassandra table without having code that relates thos
 
 ### Available conversions:
 
-| From / To |Scala Types |BigQuery |Spark |Cassandra |
-|------------|:----------------:|:----------------:|:----------------:|:----------------:|
-|Scala Types | - |:white_check_mark:|:white_check_mark:|:white_check_mark:|
-|BigQuery | | - |:white_check_mark:|:white_check_mark:|
-|Spark | |:white_check_mark:| - |:white_check_mark:|
-|Cassandra | |:white_check_mark:|:white_check_mark:| - |
+| From / To |Scala Types |BigQuery |Spark |Cassandra | Circe (JSON) |
+|--------------|:----------------:|:----------------:|:----------------:|:----------------:|:------------:|
+| Scala Types | - |:white_check_mark:|:white_check_mark:|:white_check_mark:| |
+| BigQuery | | - |:white_check_mark:|:white_check_mark:| |
+| Spark | |:white_check_mark:| - |:white_check_mark:| |
+| Cassandra | |:white_check_mark:|:white_check_mark:| - | |
+| Circe (JSON) | |:white_check_mark:|:white_check_mark:|:white_check_mark:| |
 
 
diff --git a/examples/src/test/scala/org/datatools/bigdatatypes/CassandraToOthers.scala b/examples/src/test/scala/org/datatools/bigdatatypes/CassandraToOthers.scala
@@ -21,14 +21,18 @@ class CassandraToOthers extends UnitSpec {
  .withColumn("foo", DataTypes.TEXT)
  .withColumn("bar", DataTypes.INT)
 
+ val fields: List[Field] = List(
+ Field.newBuilder("id", StandardSQLTypeName.STRING).setMode(Mode.REQUIRED).build(),
+ Field.newBuilder("foo", StandardSQLTypeName.STRING).setMode(Mode.REQUIRED).build(),
+ Field.newBuilder("bar", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build()
+ )
+ val bqSchema: Schema = Schema.of(toJava(fields))
+
  "Cassandra table" should "be converted into BigQuery Schema" in {
- val fields = List(
- Field.newBuilder("id", StandardSQLTypeName.STRING).setMode(Mode.REQUIRED).build(),
- Field.newBuilder("foo", StandardSQLTypeName.STRING).setMode(Mode.REQUIRED).build(),
- Field.newBuilder("bar", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build()
- )
- val bqSchema = Schema.of(toJava(fields))
- SqlInstanceToBigQuery[CreateTable]
+ SqlInstanceToBigQuery[CreateTable].bigQueryFields(cassandraTable).schema shouldBe bqSchema
+ }
+
+ it should "be converted into BigQuery Schema using extension method" in {
  cassandraTable.asBigQuery.schema shouldBe bqSchema
  }
 }