diff --git a/CHANGELOG.md b/CHANGELOG.md index c57ac416..5cf37eec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +### Big Data Types v1.2.0 +- New module for Circe (JSON) + - Conversion from Circe to other types + - New docs + - More examples + ### Big Data Types v1.1.2 - Cassandra - Fixed minor issues on parser diff --git a/README.md b/README.md index ceaf06d0..04aebcbd 100644 --- a/README.md +++ b/README.md @@ -14,12 +14,13 @@ Check the [Documentation website](https://data-tools.github.io/big-data-types) t # Available conversions: -| From / To |Scala Types |BigQuery |Spark |Cassandra | -|------------|:----------------:|:----------------:|:----------------:|:----------------:| -|Scala Types | - |:white_check_mark:|:white_check_mark:|:white_check_mark:| -|BigQuery | | - |:white_check_mark:|:white_check_mark:| -|Spark | |:white_check_mark:| - |:white_check_mark:| -|Cassandra | |:white_check_mark:|:white_check_mark:| - | +| From / To |Scala Types |BigQuery |Spark |Cassandra | Circe (JSON) | +|--------------|:----------------:|:----------------:|:----------------:|:----------------:|:------------:| +| Scala Types | - |:white_check_mark:|:white_check_mark:|:white_check_mark:| | +| BigQuery | | - |:white_check_mark:|:white_check_mark:| | +| Spark | |:white_check_mark:| - |:white_check_mark:| | +| Cassandra | |:white_check_mark:|:white_check_mark:| - | | +| Circe (JSON) | |:white_check_mark:|:white_check_mark:|:white_check_mark:| | Versions for Scala ![Scala 2.12](https://img.shields.io/badge/Scala-2.12-red) ,![Scala_2.13](https://img.shields.io/badge/Scala-2.13-red) diff --git a/build.sbt b/build.sbt index b0cd8106..afed95e2 100644 --- a/build.sbt +++ b/build.sbt @@ -1,5 +1,5 @@ //used to build Sonatype releases -lazy val versionNumber = "1.1.2" +lazy val versionNumber = "1.2.0" lazy val projectName = "big-data-types" version := versionNumber name := projectName @@ -26,7 +26,7 @@ lazy val publishSettings = Seq( ScmInfo(url("https://github.com/data-tools/big-data-types"), "git@github.com:data-tools/big-data-types.git") ), developers := List(Developer("JavierMonton", "Javier Monton", "", url("https://github.com/JavierMonton"))), - licenses := Seq("APL2" -> url("http://www.apache.org/licenses/LICENSE-2.0.txt")), + licenses := Seq("APL2" -> url("https://www.apache.org/licenses/LICENSE-2.0.txt")), publishMavenStyle := true ) @@ -69,6 +69,13 @@ lazy val cassandraDependencies = Seq( scalatest % Test ) +val circeVersion = "0.14.1" +lazy val jsonCirceDependencies = Seq( + "io.circe" %% "circe-core", + "io.circe" %% "circe-generic", + "io.circe" %% "circe-parser" + ).map(_ % circeVersion) + lazy val scalatest = "org.scalatest" %% "scalatest" % "3.2.11" //Project settings @@ -80,6 +87,7 @@ lazy val root = (project in file(".")) bigquery, spark, cassandra, + jsonCirce, examples ) @@ -135,6 +143,18 @@ lazy val cassandra = (project in file("cassandra")) ) .dependsOn(core % "test->test;compile->compile") +lazy val jsonCirce = (project in file("jsoncirce")) + .configs(IntegrationTest) + .settings( + name := projectName + "-circe", + publishSettings, + scalacOptions ++= scalacCommon, + crossScalaVersions := supportedScalaVersions, + crossVersionSharedSources, + libraryDependencies ++= jsonCirceDependencies + ) + .dependsOn(core % "test->test;compile->compile") + // Examples module for testing, with all modules included, not built lazy val examples = (project in file("examples")) .settings( @@ -147,6 +167,7 @@ lazy val examples = (project in file("examples")) .dependsOn(core % "test->test;compile->compile") .dependsOn(bigquery % "test->test;compile->compile") .dependsOn(cassandra % "test->test;compile->compile") + .dependsOn(jsonCirce % "test->test;compile->compile") .settings( noPublishSettings, crossScalaVersions := List(scala212, scala213), diff --git a/core/src/test/scala_2/org/datatools/bigdatatypes/TestTypes.scala b/core/src/test/scala_2/org/datatools/bigdatatypes/TestTypes.scala index 2fff37ea..6dd6c7e7 100644 --- a/core/src/test/scala_2/org/datatools/bigdatatypes/TestTypes.scala +++ b/core/src/test/scala_2/org/datatools/bigdatatypes/TestTypes.scala @@ -1,8 +1,8 @@ package org.datatools.bigdatatypes -import org.datatools.bigdatatypes.basictypes.SqlType._ -import org.datatools.bigdatatypes.basictypes.SqlTypeMode._ -import org.datatools.bigdatatypes.basictypes._ +import org.datatools.bigdatatypes.basictypes.SqlType.* +import org.datatools.bigdatatypes.basictypes.SqlTypeMode.* +import org.datatools.bigdatatypes.basictypes.* import java.sql.{Date, Timestamp} diff --git a/docs/Contributing/CreateNewType.md b/docs/Contributing/CreateNewType.md index b9b11d79..a77c1413 100644 --- a/docs/Contributing/CreateNewType.md +++ b/docs/Contributing/CreateNewType.md @@ -8,19 +8,19 @@ This is a guide on how to add a new type to the library - [How to develop a new type](#how-to-develop-a-new-type) - [How it works](#how-it-works) * [SqlType ADT](#sqltype-adt) - * [Conversion / Reverse Conversion](#conversion---reverse-conversion) + * [Conversion / Reverse Conversion](#conversion--reverse-conversion) + [Conversion](#conversion) + [Reverse Conversion](#reverse-conversion) - [How to do it](#how-to-do-it) * [Create a new subproject in SBT](#create-a-new-subproject-in-sbt) - * [Conversion: Type Class - SqlType to New Type](#conversion--type-class---sqltype-to-new-type) + * [Conversion: Type Class - SqlType to New Type](#conversion-type-class---sqltype-to-new-type) + [Defining the syntax](#defining-the-syntax) + [Implementing the Type Class](#implementing-the-type-class) - [Mode inside Types](#mode-inside-types) + [Everything together](#everything-together) - * [Conversion: SqlInstance to New Type](#conversion--sqlinstance-to-new-type) - * [Reverse conversion: New Type to SqlType](#reverse-conversion--new-type-to-sqltype) - * [Everything together](#everything-together-1) + * [Conversion: SqlInstance to New Type](#conversion-sqlinstance-to-new-type) + * [Reverse conversion: New Type to SqlType](#reverse-conversion-new-type-to-sqltype) + * [Everything together](#everything-together) ## How to develop a new type @@ -78,7 +78,7 @@ an existing _Type Class_ called `SqlTypeConversion` By doing this, we will get automatically conversion to the rest of the types of the library -# How to do it +## How to do it As covered in [Conversion](#conversion), we have to implement 2 types classes, one for types, another for instances. Both will derive `SqlTypeConversion` type class into our specific type and by doing so, we will get automatically all conversions into our new type @@ -123,6 +123,81 @@ lazy val root = (project in file(".")) Now, you can create a new root folder with your type name with the typical structure (src/main/scala_ ...) +## Preparing Tests +:::Note +You can develop the conversion before tests, but we recommend to create a set of test before starting to develop a new type, +it helps a lot to understand your new type and how it is being created. +Sometimes a type is not as easy as it seems. +::: + +In the `core` module of the library there are some case classes that should cover all the different scenarios + (different types, lists, objects, deep nested objects) so the testing part will consist on: +- 1 - Create instances of your new types +- 2 - Pick the already defined [Test Case Classes](../../core/src/test/scala_2/org/datatools/bigdatatypes/TestTypes.scala) +- Test that 1 can be converted into 2 +- Test that 2 can be converted into 1 + +:::tip +You will need to understand the following about your new type: +- How types are being created +- How nullable fields works (with Optional types, nullable parameters ...) +- How lists and nested objects works (if they exist) +::: + +To do so, first, create a new `test/scala` folder with `org.datatools.bigdatatypes` and create an object like `MyTypeTestTypes` + +See the example of Spark Types: +```scala +object SparkTestTypes { + + val basicFields: Seq[StructField] = + List( + StructField("myInt", IntegerType, nullable = false), + StructField("myLong", LongType, nullable = false), + StructField("myFloat", FloatType, nullable = false), + StructField("myDouble", DoubleType, nullable = false), + StructField("myDecimal", DataTypes.createDecimalType, nullable = false), + StructField("myBoolean", BooleanType, nullable = false), + StructField("myString", StringType, nullable = false) + ) + val basicWithList: Seq[StructField] = + List( + StructField("myInt", IntegerType, nullable = false), + StructField("myList", ArrayType(IntegerType), nullable = true) + ) +// ... +} +``` +Create a new package for your tests called `myType` and add there a new class for each conversion. + +### Tests for reverse conversion +From our type to the generic one + +Create a file called `MyTypeConversionSpec` and add there some tests. You can add the following tests: +- Simple individual type +- Product type (case class / object) +- Lists +- Nested objects +- Some extra tests for extension methods (syntactic sugars like `.asSqlType` or `.asBigQuery` in normal conversion) + +e.g. from Spark: +```scala +class SparkTypeConversionSpec extends UnitSpec { + + "Simple Spark DataType" should "be converted into SqlType" in { + SqlTypeConversion[IntegerType].getType shouldBe SqlInt() + } + + "StructField nullable" should "be converted into Nullable SqlType" in { + val sf = StructField("myInt", IntegerType, nullable = true) + sf.asSqlType shouldBe SqlInt(Nullable) + SqlInstanceConversion[StructField].getType(sf) shouldBe SqlInt(Nullable) + } + // ... +} +``` + + ## Conversion: Type Class - SqlType to New Type ### Defining the syntax @@ -176,7 +251,9 @@ As the types usually can be recursive (nested objects) we can start defining a m getSchemaWithName(f.transformKey(name, sqlType), sqlType) :: getSchema(SqlStruct(records, mode)) } ``` -**_Note:_** this method probably could be copied, changing only the return type for our type +:::tip +This method probably could be copied, changing only the return type for our type. You will create `getSchemaWithName` right now +::: And another method (`getSchemaWithName` in this example) to specify the specific types: In this case, we are showing an example from BigQuery as it seems simpler to understand: @@ -190,8 +267,13 @@ In this case, we are showing an example from BigQuery as it seems simpler to und Field.newBuilder(name, StandardSQLTypeName.INT64).setMode(sqlModeToBigQueryMode(mode)).build() case SqlFloat(mode) => Field.newBuilder(name, StandardSQLTypeName.FLOAT64).setMode(sqlModeToBigQueryMode(mode)).build() - ... - ... + case SqlDouble(mode) => ??? + case SqlDecimal(mode) => ??? + case SqlBool(mode) => ??? + case SqlString(mode) => ??? + case SqlTimestamp(mode) => ??? + case SqlDate(mode) => ??? + case SqlStruct(subType, mode) => ??? } ``` Same example from Spark: @@ -377,6 +459,9 @@ object SparkTypeConversion { implicit val longType: SqlTypeConversion[LongType] = SqlTypeConversion.instance(SqlLong()) implicit val doubleType: SqlTypeConversion[DoubleType] = SqlTypeConversion.instance(SqlDouble()) ``` +:::tip +You can copy&paste all the available types from others modules like the [Spark one](../../spark/src/main/scala/org/datatools/bigdatatypes/spark/SparkTypeConversion.scala) +::: - Probably we use an instance of our type, for example, in Spark, we have `StructField` and `StructType` as instances, so we cover them using `SqlInstanceConversion` _Type Class_. In Cassandra we use internally a tuple `(String, DataType)`, and it also works diff --git a/docs/Modules/Circe.md b/docs/Modules/Circe.md new file mode 100644 index 00000000..c5a8a301 --- /dev/null +++ b/docs/Modules/Circe.md @@ -0,0 +1,68 @@ +--- +sidebar_position: 6 +--- +# Circe (JSON) + +[Circe](https://circe.github.io/circe/) is a JSON library for Scala. + +The Circe module of this library allows to convert `Json` objects (from Circe) to any other type in the library. +:::caution +For now only conversions from Circe to other types are available. Other types to Circe are not ready yet. +::: + +:::info +Json objects do not have very concrete types, meaning that `number` is a type, +but more specific types like `integer`, `float` or others do not exists. +Because of that, any conversion between types will convert `number` into `Decimal` types, +as `Decimal` is the only one that can ensure the precision of any arbitrary number +::: +
About Circe and private types
+

+Circe has more specific types than `JNumber`, like `JLong`, `JDouble` and other, +but all of them are private to Circe itself, so we can not use them, not even for matching types during conversions. +In any case, even if we were able to use them, when parsing a JSON string (probably most of the cases) +we can not detect the specific types +

+ +```scala +import io.Circe.Json +import org.datatools.bigdatatypes.circe.CirceTypeConversion.* +import org.datatools.bigdatatypes.formats.Formats.implicitDefaultFormats +import org.datatools.bigdatatypes.spark.SqlInstanceToSpark.InstanceSyntax +import org.datatools.bigdatatypes.bigquery.SqlInstanceToBigQuery.* + + val circeJson: Json = Json.fromFields(List( + ("id", Json.fromString("test")), + ("foo", Json.fromString("test")), + ("bar", Json.fromInt(1)) + )) + + val sparkSchema: StructType = circeJson.asSparkSchema + val bqSchema: Schema = circeJson.asBigQuery.schema +``` + +Or if you do it from a JSON parsed using Circe: +```scala +import io.circe._, io.circe.parser._ + +import org.datatools.bigdatatypes.circe.CirceTypeConversion.* +import org.datatools.bigdatatypes.formats.Formats.implicitDefaultFormats +import org.datatools.bigdatatypes.spark.SqlInstanceToSpark.InstanceSyntax +import org.datatools.bigdatatypes.bigquery.SqlInstanceToBigQuery.* + +val rawJson: String = """ +{ + "foo": "bar", + "baz": 123, + "list of stuff": [ 4, 5, 6 ] +} +""" +val parseResult = parse(rawJson) +// parseResult: Either[ParsingFailure, Json] +val sparkSchema = parseResult.map(j => j.asSparkSchema) +// sparkSchema: Either[ParsingFailure, StructType] +val bqSchema = parseResult.map(j => j.asBigQuery.schema) +// bqSchema: Either[ParsingFailure, Schema] +``` + +--- diff --git a/docs/intro.md b/docs/intro.md index 909a1853..fbccb59e 100644 --- a/docs/intro.md +++ b/docs/intro.md @@ -33,11 +33,12 @@ or a BigQuery table into a Cassandra table without having code that relates thos ### Available conversions: -| From / To |Scala Types |BigQuery |Spark |Cassandra | -|------------|:----------------:|:----------------:|:----------------:|:----------------:| -|Scala Types | - |:white_check_mark:|:white_check_mark:|:white_check_mark:| -|BigQuery | | - |:white_check_mark:|:white_check_mark:| -|Spark | |:white_check_mark:| - |:white_check_mark:| -|Cassandra | |:white_check_mark:|:white_check_mark:| - | +| From / To |Scala Types |BigQuery |Spark |Cassandra | Circe (JSON) | +|--------------|:----------------:|:----------------:|:----------------:|:----------------:|:------------:| +| Scala Types | - |:white_check_mark:|:white_check_mark:|:white_check_mark:| | +| BigQuery | | - |:white_check_mark:|:white_check_mark:| | +| Spark | |:white_check_mark:| - |:white_check_mark:| | +| Cassandra | |:white_check_mark:|:white_check_mark:| - | | +| Circe (JSON) | |:white_check_mark:|:white_check_mark:|:white_check_mark:| | diff --git a/examples/src/test/scala/org/datatools/bigdatatypes/CassandraToOthers.scala b/examples/src/test/scala/org/datatools/bigdatatypes/CassandraToOthers.scala index 474c881a..72b628a9 100644 --- a/examples/src/test/scala/org/datatools/bigdatatypes/CassandraToOthers.scala +++ b/examples/src/test/scala/org/datatools/bigdatatypes/CassandraToOthers.scala @@ -21,14 +21,18 @@ class CassandraToOthers extends UnitSpec { .withColumn("foo", DataTypes.TEXT) .withColumn("bar", DataTypes.INT) + val fields: List[Field] = List( + Field.newBuilder("id", StandardSQLTypeName.STRING).setMode(Mode.REQUIRED).build(), + Field.newBuilder("foo", StandardSQLTypeName.STRING).setMode(Mode.REQUIRED).build(), + Field.newBuilder("bar", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build() + ) + val bqSchema: Schema = Schema.of(toJava(fields)) + "Cassandra table" should "be converted into BigQuery Schema" in { - val fields = List( - Field.newBuilder("id", StandardSQLTypeName.STRING).setMode(Mode.REQUIRED).build(), - Field.newBuilder("foo", StandardSQLTypeName.STRING).setMode(Mode.REQUIRED).build(), - Field.newBuilder("bar", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build() - ) - val bqSchema = Schema.of(toJava(fields)) - SqlInstanceToBigQuery[CreateTable] + SqlInstanceToBigQuery[CreateTable].bigQueryFields(cassandraTable).schema shouldBe bqSchema + } + + it should "be converted into BigQuery Schema using extension method" in { cassandraTable.asBigQuery.schema shouldBe bqSchema } } diff --git a/examples/src/test/scala/org/datatools/bigdatatypes/CirceToOthers.scala b/examples/src/test/scala/org/datatools/bigdatatypes/CirceToOthers.scala new file mode 100644 index 00000000..89279da8 --- /dev/null +++ b/examples/src/test/scala/org/datatools/bigdatatypes/CirceToOthers.scala @@ -0,0 +1,50 @@ +package org.datatools.bigdatatypes + +import com.google.cloud.bigquery.Field.Mode +import com.google.cloud.bigquery.{Field, Schema, StandardSQLTypeName} +import io.circe.Json +import org.datatools.bigdatatypes.bigquery.JavaConverters.toJava +import org.datatools.bigdatatypes.bigquery.SqlInstanceToBigQuery.{InstanceSchemaSyntax, InstanceSyntax} +import org.datatools.bigdatatypes.cassandra.CassandraTables +import org.datatools.bigdatatypes.cassandra.CassandraTables.AsCassandraInstanceSyntax +import org.datatools.bigdatatypes.circe.CirceTypeConversion.circeJsonType +import org.datatools.bigdatatypes.formats.Formats.implicitDefaultFormats + +class CirceToOthers extends UnitSpec { + + behavior of "Circe types to Cassandra" + + val circeJson: Json = CirceTestTypes.basicTypes + + "Json from Circe" should "be converted into Cassandra Table" in { + CassandraTables.table[Json](CirceTestTypes.basicTypes, "testTable", "myLong").toString shouldBe + "CREATE TABLE testtable (myint decimal,mylong decimal PRIMARY KEY,myfloat decimal,mydouble decimal,mydecimal decimal,myboolean boolean,mystring text)" + } + + it should "be converted into Cassandra Table using extension method" in { + CirceTestTypes.basicTypes.asCassandra("testTable", "myLong").toString shouldBe + "CREATE TABLE testtable (myint decimal,mylong decimal PRIMARY KEY,myfloat decimal,mydouble decimal,mydecimal decimal,myboolean boolean,mystring text)" + } + + behavior of "Circe to BigQuery" + + val fields: List[Field] = List( + Field.newBuilder("id", StandardSQLTypeName.STRING).setMode(Mode.REQUIRED).build(), + Field.newBuilder("foo", StandardSQLTypeName.STRING).setMode(Mode.REQUIRED).build(), + Field.newBuilder("bar", StandardSQLTypeName.NUMERIC).setMode(Mode.REQUIRED).build() + ) + val bqSchema: Schema = Schema.of(toJava(fields)) + + val circe: Json = Json.fromFields( + List( + ("id", Json.fromString("test")), + ("foo", Json.fromString("test")), + ("bar", Json.fromInt(1)) + ) + ) + + it should "be converted into BigQuery Schema" in { + circe.asBigQuery.schema shouldBe bqSchema + } + +} diff --git a/examples/src/test/scala_2/bigdatatypes/CassandraToOthers.scala b/examples/src/test/scala_2/bigdatatypes/CassandraToOthers.scala index d5463078..e4692915 100644 --- a/examples/src/test/scala_2/bigdatatypes/CassandraToOthers.scala +++ b/examples/src/test/scala_2/bigdatatypes/CassandraToOthers.scala @@ -21,7 +21,6 @@ class CassandraToOthers extends UnitSpec { .withColumn("bar", DataTypes.INT) "Cassandra table" should "be converted into Spark Schema" in { - // val sparkSchema: StructType = myDataFrame.schema val sparkSchema: StructType = StructType( List( StructField("id", StringType, nullable = false), diff --git a/examples/src/test/scala_2/bigdatatypes/CirceToOthers.scala b/examples/src/test/scala_2/bigdatatypes/CirceToOthers.scala new file mode 100644 index 00000000..bcba706e --- /dev/null +++ b/examples/src/test/scala_2/bigdatatypes/CirceToOthers.scala @@ -0,0 +1,39 @@ +package bigdatatypes + +import io.circe.Json +import org.apache.spark.sql.types.{DataTypes, StringType, StructField, StructType} +import org.datatools.bigdatatypes.UnitSpec +import org.datatools.bigdatatypes.circe.CirceTypeConversion.circeJsonType +import org.datatools.bigdatatypes.formats.Formats.implicitDefaultFormats +import org.datatools.bigdatatypes.spark.SparkSchemas +import org.datatools.bigdatatypes.spark.SqlInstanceToSpark.InstanceSyntax + +class CirceToOthers extends UnitSpec { + + behavior of "Circe to Spark types" + + val sparkSchema: StructType = StructType( + List( + StructField("id", StringType, nullable = false), + StructField("foo", StringType, nullable = false), + StructField("bar", DataTypes.createDecimalType, nullable = false) + ) + ) + + val circe: Json = Json.fromFields( + List( + ("id", Json.fromString("test")), + ("foo", Json.fromString("test")), + ("bar", Json.fromInt(1)) + ) + ) + + "Circe Json" should "be converted into Spark Schema" in { + SparkSchemas.schema[Json](circe) shouldBe sparkSchema + } + + it should "be converted into Spark Schema using extension method" in { + circe.asSparkSchema shouldBe sparkSchema + } + +} diff --git a/jsoncirce/src/main/scala/org/datatools/bigdatatypes/circe/CirceTypeConversion.scala b/jsoncirce/src/main/scala/org/datatools/bigdatatypes/circe/CirceTypeConversion.scala new file mode 100644 index 00000000..600d1c70 --- /dev/null +++ b/jsoncirce/src/main/scala/org/datatools/bigdatatypes/circe/CirceTypeConversion.scala @@ -0,0 +1,50 @@ +package org.datatools.bigdatatypes.circe + +import io.circe.Json +import org.datatools.bigdatatypes.basictypes.SqlType.* +import org.datatools.bigdatatypes.basictypes.SqlTypeMode.{Repeated, Required} +import org.datatools.bigdatatypes.basictypes.{SqlType, SqlTypeMode} +import org.datatools.bigdatatypes.conversions.SqlInstanceConversion + +import scala.annotation.tailrec + +object CirceTypeConversion { + + /** Circe does not have an implementation of SqlTypeConversion due to its private API. + * We can not detect if a field is JDouble, JLong and so on because they are private, + * so the only way to detect some kind of types is based on their methods `.isNumber`, `isString` and so on + * which are only available if we have an instance of [[Json]] + */ + + /** Implementation of SqlInstanceConversion Type Class */ + implicit val circeJsonType: SqlInstanceConversion[Json] = (value: Json) => convertCirceType(value) + + @tailrec + def convertCirceType(j: Json, repeated: Boolean = false): SqlType = + j match { + case v if v.isArray => convertCirceType(v.asArray.get.apply(0), repeated = true) + case v if v.isNumber => SqlDecimal(isRepeated(repeated)) + case v if v.isString => SqlString(isRepeated(repeated)) + case v if v.isBoolean => SqlBool(isRepeated(repeated)) + case v if v.isObject => + val pairs = v.asObject.get.keys zip v.asObject.get.values + SqlStruct(loopStructs(pairs), isRepeated(repeated)) + } + + /** For recursion, loops over all items in an object + */ + private def loopStructs(l: Iterable[(String, Json)]): List[(String, SqlType)] = + l.map(x => x._1 -> convertCirceType(x._2)).toList + + /** From Boolean to Repeated or Required Mode + */ + private def isRepeated(repeated: Boolean): SqlTypeMode = if (repeated) Repeated else Required + + /** Extension method. Enables val myInstance: Json -> myInstance.asSqlType + * @param value in a Json from Circe + */ + implicit class StructTypeSyntax(value: Json) { + def asSqlType: SqlType = SqlInstanceConversion[Json].getType(value) + } + +} diff --git a/jsoncirce/src/test/scala/org/datatools/bigdatatypes/CirceTestTypes.scala b/jsoncirce/src/test/scala/org/datatools/bigdatatypes/CirceTestTypes.scala new file mode 100644 index 00000000..8edc45a0 --- /dev/null +++ b/jsoncirce/src/test/scala/org/datatools/bigdatatypes/CirceTestTypes.scala @@ -0,0 +1,61 @@ +package org.datatools.bigdatatypes + +import io.circe.Json + +/** Test types from [[TestTypes]] converted to Circe. They can be used in multiple tests + */ +object CirceTestTypes { + + val basicFields: Seq[(String, Json)] = + List( + ("myInt", Json.fromInt(1)), + ("myLong", Json.fromLong(1)), + ("myFloat", Json.fromFloat(1).get), + ("myDouble", Json.fromDouble(1).get), + ("myDecimal", Json.fromBigDecimal(1)), + ("myBoolean", Json.fromBoolean(true)), + ("myString", Json.fromString("")) + ) + + val basicTypes: Json = Json.fromFields(basicFields) + + val basicWithList: Json = Json.fromFields( + List( + ("myInt", Json.fromInt(1)), + ( + "myList", + Json.fromValues( + List( + Json.fromInt(1) + ) + ) + ) + ) + ) + + val basicNested: Json = Json.fromFields( + List( + ("myInt", Json.fromInt(1)), + ("myStruct", Json.fromFields(basicFields)) + ) + ) + + val basicNestedWithList: Json = Json.fromFields( + List( + ( + "matrix", + Json.fromValues( + List( + Json.fromFields( + List( + ("x", Json.fromInt(1)), + ("y", Json.fromInt(1)) + ) + ) + ) + ) + ) + ) + ) + +} diff --git a/jsoncirce/src/test/scala/org/datatools/bigdatatypes/SqlTestTypes.scala b/jsoncirce/src/test/scala/org/datatools/bigdatatypes/SqlTestTypes.scala new file mode 100644 index 00000000..f097cb69 --- /dev/null +++ b/jsoncirce/src/test/scala/org/datatools/bigdatatypes/SqlTestTypes.scala @@ -0,0 +1,82 @@ +package org.datatools.bigdatatypes + +import org.datatools.bigdatatypes.basictypes.* +import org.datatools.bigdatatypes.basictypes.SqlType.* +import org.datatools.bigdatatypes.basictypes.SqlTypeMode.* + +import java.sql.{Date, Timestamp} + +/** Case Classes and their SqlType representations + * This should be used to test SqlTypeConversion and all reverse conversions from other modules + */ +object SqlTestTypes { + + /** Used for case classes, nested or others */ + val basicFields: List[(String, SqlType)] = + List( + ("myInt", SqlDecimal(Required)), + ("myLong", SqlDecimal(Required)), + ("myFloat", SqlDecimal(Required)), + ("myDouble", SqlDecimal(Required)), + ("myDecimal", SqlDecimal(Required)), + ("myBoolean", SqlBool(Required)), + ("myString", SqlString(Required)) + ) + + val basicOption: SqlStruct = SqlStruct( + List( + ("myString", SqlString(Required)), + ("myOptionalString", SqlString(Nullable)) + ) + ) + + val basicTypes: SqlStruct = SqlStruct(basicFields) + + val basicOptionTypes: SqlStruct = SqlStruct( + List( + ("myInt", SqlDecimal(Nullable)), + ("myLong", SqlDecimal(Nullable)), + ("myFloat", SqlDecimal(Nullable)), + ("myDouble", SqlDecimal(Nullable)), + ("myDecimal", SqlDecimal(Nullable)), + ("myBoolean", SqlBool(Nullable)), + ("myString", SqlString(Nullable)) + ) + ) + + val basicWithList: SqlStruct = SqlStruct( + List( + ("myInt", SqlDecimal(Required)), + ("myList", SqlDecimal(Repeated)) + ) + ) + + val basicNested: SqlStruct = SqlStruct( + List( + ("myInt", SqlDecimal(Required)), + ("myStruct", SqlStruct(basicFields, Required)) + ) + ) + + val basicOptionalNested: SqlStruct = SqlStruct( + List( + ("myInt", SqlDecimal(Required)), + ("myStruct", SqlStruct(basicFields, Nullable)) + ) + ) + + val basicNestedWithList: SqlStruct = SqlStruct( + List( + ( + "matrix", + SqlStruct( + List( + ("x", SqlDecimal(Required)), + ("y", SqlDecimal(Required)) + ), + Repeated + ) + ) + ) + ) +} diff --git a/jsoncirce/src/test/scala/org/datatools/bigdatatypes/circe/CirceTypeConversionSpec.scala b/jsoncirce/src/test/scala/org/datatools/bigdatatypes/circe/CirceTypeConversionSpec.scala new file mode 100644 index 00000000..995a5c4e --- /dev/null +++ b/jsoncirce/src/test/scala/org/datatools/bigdatatypes/circe/CirceTypeConversionSpec.scala @@ -0,0 +1,44 @@ +package org.datatools.bigdatatypes.circe + +import io.circe.{Json, JsonObject} +import org.datatools.bigdatatypes.basictypes.* +import org.datatools.bigdatatypes.basictypes.SqlType.* +import org.datatools.bigdatatypes.circe.CirceTypeConversion.* +import org.datatools.bigdatatypes.conversions.SqlInstanceConversion +import org.datatools.bigdatatypes.{CirceTestTypes as C, SqlTestTypes as S, UnitSpec} + +/** Reverse conversion, from Circe types to [[SqlType]]s + */ +class CirceTypeConversionSpec extends UnitSpec { + + "Simple Json Type" should "be converted into SqlType" in { + SqlInstanceConversion[Json].getType(Json.fromString("test")) shouldBe SqlString() + } + + "Simple Json Object" should "be converted into SqlStruct" in { + val j = Json.fromJsonObject(JsonObject(("myInt", Json.fromInt(1)))) + val expected = SqlStruct(List(("myInt", SqlDecimal()))) + SqlInstanceConversion[Json].getType(j) shouldBe expected + } + + "Basic Json fields" should "be converted into Basic SqlTypes" in { + SqlInstanceConversion[Json].getType(C.basicTypes) shouldBe S.basicTypes + } + + "Basic Json fields with Arrays" should "be converted into SqlTypes" in { + SqlInstanceConversion[Json].getType(C.basicWithList) shouldBe S.basicWithList + } + + "Basic Json fields with Nested objects" should "be converted into SqlTypes" in { + SqlInstanceConversion[Json].getType(C.basicNested) shouldBe S.basicNested + } + + "Basic Json fields with Nested Arrays" should "be converted into SqlTypes" in { + SqlInstanceConversion[Json].getType(C.basicNestedWithList) shouldBe S.basicNestedWithList + } + + "Extension method asSqlType" should "convert a Json into SqlTypes" in { + C.basicTypes.asSqlType shouldBe S.basicTypes + } + +} diff --git a/spark/src/test/scala/org/datatools/bigdatatypes/SparkTestTypes.scala b/spark/src/test/scala/org/datatools/bigdatatypes/SparkTestTypes.scala index 8f0a58ac..372c227c 100644 --- a/spark/src/test/scala/org/datatools/bigdatatypes/SparkTestTypes.scala +++ b/spark/src/test/scala/org/datatools/bigdatatypes/SparkTestTypes.scala @@ -15,7 +15,7 @@ import org.apache.spark.sql.types.{ TimestampType } -/** Test types from [[TestTypes]] converted to BigQuery. They can be used in multiple tests +/** Test types from [[TestTypes]] converted to Spark. They can be used in multiple tests */ object SparkTestTypes { @@ -30,7 +30,6 @@ object SparkTestTypes { StructField("myString", StringType, nullable = false) ) - /** BigQuery doesn't have a main object, it has a list of fields in the root path */ val basicTypes: StructType = StructType(basicFields) val basicOptionTypes: Seq[StructField] =