diff --git a/.test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL_Java11.groovy b/.test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL_Java11.groovy
new file mode 100644
index 0000000000000..50fd12411a15a
--- /dev/null
+++ b/.test-infra/jenkins/job_PreCommit_BeamSQL_ZetaSQL_Java11.groovy
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import PrecommitJobBuilder
+import CommonJobProperties as properties
+
+PrecommitJobBuilder builder = new PrecommitJobBuilder(
+ scope: this,
+ nameBase: 'JavaBeamZetaSQLJava11',
+ gradleTask: ':javaPreCommitBeamZetaSQL',
+ gradleSwitches: [
+ '-PdisableSpotlessCheck=true',
+ '-PcompileAndRunTestsWithJava11',
+ "-Pjava11Home=${CommonJobProperties.JAVA_11_HOME}"
+ ], // spotless checked in separate pre-commit
+ triggerPathPatterns: [
+ '^sdks/java/extensions/sql/.*$',
+ ]
+)
+builder.build {
+ publishers {
+ archiveJunit('**/build/test-results/**/*.xml')
+ }
+}
diff --git a/CHANGES.md b/CHANGES.md
index f1915fb79ba14..9aff55608c1d9 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -60,7 +60,8 @@
## New Features / Improvements
-* --direct_num_workers=0 is supported for FnApi runner. It will set the number of threads/subprocesses to number of cores of the machine executing the pipeline ([BEAM-9443](https://issues.apache.org/jira/browse/BEAM-9443)).
+* `--workerCacheMB` flag is supported in Dataflow streaming pipeline ([BEAM-9964](https://issues.apache.org/jira/browse/BEAM-9964))
+* `--direct_num_workers=0` is supported for FnApi runner. It will set the number of threads/subprocesses to number of cores of the machine executing the pipeline ([BEAM-9443](https://issues.apache.org/jira/browse/BEAM-9443)).
* Python SDK now has experimental support for SqlTransform ([BEAM-8603](https://issues.apache.org/jira/browse/BEAM-8603)).
## Breaking Changes
diff --git a/build.gradle b/build.gradle
index 7fed2aa9308c8..848eef631ddf0 100644
--- a/build.gradle
+++ b/build.gradle
@@ -350,6 +350,7 @@ if (project.hasProperty('javaLinkageArtifactIds')) {
if (project.hasProperty('compileAndRunTestsWithJava11')) {
project.javaPreCommitPortabilityApi.dependsOn ':sdks:java:testing:test-utils:verifyJavaVersion'
project.javaExamplesDataflowPrecommit.dependsOn ':sdks:java:testing:test-utils:verifyJavaVersion'
+ project.javaPreCommitBeamZetaSQL.dependsOn ':sdks:java:testing:test-utils:verifyJavaVersion'
} else {
allprojects {
tasks.withType(Test) {
diff --git a/learning/katas/java/Common Transforms/Aggregation/Count/task-remote-info.yaml b/learning/katas/java/Common Transforms/Aggregation/Count/task-remote-info.yaml
index 75d8467d87c7a..82be05c692809 100644
--- a/learning/katas/java/Common Transforms/Aggregation/Count/task-remote-info.yaml
+++ b/learning/katas/java/Common Transforms/Aggregation/Count/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076163
-update_date: Fri, 07 Feb 2020 14:07:39 UTC
+update_date: Tue, 19 May 2020 07:02:13 UTC
diff --git a/learning/katas/java/Common Transforms/Aggregation/Count/task.html b/learning/katas/java/Common Transforms/Aggregation/Count/task.md
similarity index 89%
rename from learning/katas/java/Common Transforms/Aggregation/Count/task.html
rename to learning/katas/java/Common Transforms/Aggregation/Count/task.md
index 41fd3be505c94..1c4eba0e75a00 100644
--- a/learning/katas/java/Common Transforms/Aggregation/Count/task.html
+++ b/learning/katas/java/Common Transforms/Aggregation/Count/task.md
@@ -16,14 +16,12 @@
~ limitations under the License.
-->
-
-
Aggregation - Count
-
- Kata: Count the number of elements from an input.
-
-
+Aggregation - Count
+-------------------
+
+**Kata:** Count the number of elements from an input.
+
-
diff --git a/learning/katas/java/Common Transforms/Filter/Filter/task-remote-info.yaml b/learning/katas/java/Common Transforms/Filter/Filter/task-remote-info.yaml
index 5ec1ba0314b90..6033731695262 100644
--- a/learning/katas/java/Common Transforms/Filter/Filter/task-remote-info.yaml
+++ b/learning/katas/java/Common Transforms/Filter/Filter/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076162
-update_date: Fri, 07 Feb 2020 14:07:31 UTC
+update_date: Tue, 19 May 2020 07:02:10 UTC
diff --git a/learning/katas/java/Common Transforms/Filter/Filter/task.html b/learning/katas/java/Common Transforms/Filter/Filter/task.md
similarity index 73%
rename from learning/katas/java/Common Transforms/Filter/Filter/task.html
rename to learning/katas/java/Common Transforms/Filter/Filter/task.md
index 15eb012ffa40d..e499074e236f9 100644
--- a/learning/katas/java/Common Transforms/Filter/Filter/task.html
+++ b/learning/katas/java/Common Transforms/Filter/Filter/task.md
@@ -16,19 +16,15 @@
~ limitations under the License.
-->
-
-
Filter
-
- The Beam SDKs provide language-specific ways to simplify how you provide your DoFn implementation.
-
-
- Kata: Implement a filter function that filters out the odd numbers by using
-
- Filter.
-
-
+Filter
+------
+
+The Beam SDKs provide language-specific ways to simplify how you provide your DoFn implementation.
+
+**Kata:** Implement a filter function that filters out the odd numbers by using
+[Filter](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/transforms/Filter.html).
+
-
diff --git a/learning/katas/java/Common Transforms/Filter/ParDo/task-remote-info.yaml b/learning/katas/java/Common Transforms/Filter/ParDo/task-remote-info.yaml
index f602ccca515b2..e84c2bb16a3a9 100644
--- a/learning/katas/java/Common Transforms/Filter/ParDo/task-remote-info.yaml
+++ b/learning/katas/java/Common Transforms/Filter/ParDo/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076161
-update_date: Fri, 07 Feb 2020 14:07:30 UTC
+update_date: Tue, 19 May 2020 07:02:07 UTC
diff --git a/learning/katas/java/Common Transforms/Filter/ParDo/task.html b/learning/katas/java/Common Transforms/Filter/ParDo/task.md
similarity index 81%
rename from learning/katas/java/Common Transforms/Filter/ParDo/task.html
rename to learning/katas/java/Common Transforms/Filter/ParDo/task.md
index 61adb7088bb36..be21a28143fc6 100644
--- a/learning/katas/java/Common Transforms/Filter/ParDo/task.html
+++ b/learning/katas/java/Common Transforms/Filter/ParDo/task.md
@@ -16,18 +16,15 @@
~ limitations under the License.
-->
-
-
Filter using ParDo
-
- Kata: Implement a filter function that filters out the even numbers by using
-
- DoFn.
-
-
+Filter using ParDo
+------------------
+
+**Kata:** Implement a filter function that filters out the even numbers by using
+[DoFn](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/transforms/DoFn.html).
+
Use
ParDo with
DoFn and only output the intended element.
-
diff --git a/learning/katas/java/Common Transforms/WithKeys/WithKeys/task-remote-info.yaml b/learning/katas/java/Common Transforms/WithKeys/WithKeys/task-remote-info.yaml
index 90eb8aae3f2dc..870bb130cb9df 100644
--- a/learning/katas/java/Common Transforms/WithKeys/WithKeys/task-remote-info.yaml
+++ b/learning/katas/java/Common Transforms/WithKeys/WithKeys/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076168
-update_date: Fri, 07 Feb 2020 14:07:53 UTC
+update_date: Tue, 19 May 2020 07:02:28 UTC
diff --git a/learning/katas/java/Common Transforms/WithKeys/WithKeys/task.html b/learning/katas/java/Common Transforms/WithKeys/WithKeys/task.md
similarity index 86%
rename from learning/katas/java/Common Transforms/WithKeys/WithKeys/task.html
rename to learning/katas/java/Common Transforms/WithKeys/WithKeys/task.md
index e95e56e44f862..15738d6f3e3b4 100644
--- a/learning/katas/java/Common Transforms/WithKeys/WithKeys/task.html
+++ b/learning/katas/java/Common Transforms/WithKeys/WithKeys/task.md
@@ -16,19 +16,18 @@
~ limitations under the License.
-->
-
-
WithKeys
-
- Kata: Convert each fruit name into a KV of its first letter and itself, e.g.
- apple => KV.of("a", "apple")
-
-
+WithKeys
+--------
+
+**Kata:** Convert each fruit name into a KV of its first letter and itself, e.g.
+`apple => KV.of("a", "apple")`
+
If using a lambda in Java 8, withKeyType(TypeDescriptor) must be called on the
result PTransform.
-
diff --git a/learning/katas/java/Core Transforms/Branching/Branching/task-remote-info.yaml b/learning/katas/java/Core Transforms/Branching/Branching/task-remote-info.yaml
index 40576c0da4c77..5e3927a5d3754 100644
--- a/learning/katas/java/Core Transforms/Branching/Branching/task-remote-info.yaml
+++ b/learning/katas/java/Core Transforms/Branching/Branching/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076188
-update_date: Fri, 07 Feb 2020 14:28:13 UTC
+update_date: Tue, 19 May 2020 07:01:56 UTC
diff --git a/learning/katas/java/Core Transforms/Branching/Branching/task.html b/learning/katas/java/Core Transforms/Branching/Branching/task.md
similarity index 76%
rename from learning/katas/java/Core Transforms/Branching/Branching/task.html
rename to learning/katas/java/Core Transforms/Branching/Branching/task.md
index 12d9645aa03a5..3677fa9032225 100644
--- a/learning/katas/java/Core Transforms/Branching/Branching/task.html
+++ b/learning/katas/java/Core Transforms/Branching/Branching/task.md
@@ -16,20 +16,17 @@
~ limitations under the License.
-->
-
-
Branching
-
- You can use the same PCollection as input for multiple transforms without consuming the input
- or altering it.
-
-
- Kata: Branch out the numbers to two different transforms: one transform is multiplying
- each number by 5 and the other transform is multiplying each number by 10.
-
-
+Branching
+---------
+
+You can use the same PCollection as input for multiple transforms without consuming the input or
+altering it.
+
+**Kata:** Branch out the numbers to two different transforms: one transform is multiplying each
+number by 5 and the other transform is multiplying each number by 10.
+
-
diff --git a/learning/katas/java/Core Transforms/CoGroupByKey/CoGroupByKey/task-remote-info.yaml b/learning/katas/java/Core Transforms/CoGroupByKey/CoGroupByKey/task-remote-info.yaml
index 3e29e7a59f164..31dff911f354e 100644
--- a/learning/katas/java/Core Transforms/CoGroupByKey/CoGroupByKey/task-remote-info.yaml
+++ b/learning/katas/java/Core Transforms/CoGroupByKey/CoGroupByKey/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076151
-update_date: Fri, 07 Feb 2020 14:06:21 UTC
+update_date: Tue, 19 May 2020 07:08:21 UTC
diff --git a/learning/katas/java/Core Transforms/CoGroupByKey/CoGroupByKey/task.html b/learning/katas/java/Core Transforms/CoGroupByKey/CoGroupByKey/task.md
similarity index 75%
rename from learning/katas/java/Core Transforms/CoGroupByKey/CoGroupByKey/task.html
rename to learning/katas/java/Core Transforms/CoGroupByKey/CoGroupByKey/task.md
index 29f5322b0f28c..8b24f7abee27a 100644
--- a/learning/katas/java/Core Transforms/CoGroupByKey/CoGroupByKey/task.html
+++ b/learning/katas/java/Core Transforms/CoGroupByKey/CoGroupByKey/task.md
@@ -16,19 +16,17 @@
~ limitations under the License.
-->
-
-
CoGroupByKey
-
- CoGroupByKey performs a relational join of two or more key/value PCollections that have the same
- key type.
-
-
- Kata: Implement a
-
- CoGroupByKey transform that join words by its first alphabetical letter, and then produces
- the toString() representation of the WordsAlphabet model.
-
-
+CoGroupByKey
+------------
+
+CoGroupByKey performs a relational join of two or more key/value PCollections that have the same
+key type.
+
+**Kata:** Implement a
+[CoGroupByKey](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/transforms/join/CoGroupByKey.html)
+transform that join words by its first alphabetical letter, and then produces the toString()
+representation of the WordsAlphabet model.
+
Refer to the Beam Programming Guide
"CoGroupByKey" section for more information.
-
diff --git a/learning/katas/java/Core Transforms/Combine/BinaryCombineFn Lambda/task-remote-info.yaml b/learning/katas/java/Core Transforms/Combine/BinaryCombineFn Lambda/task-remote-info.yaml
index 4e5e0f61f0f46..7dd4a961b2815 100644
--- a/learning/katas/java/Core Transforms/Combine/BinaryCombineFn Lambda/task-remote-info.yaml
+++ b/learning/katas/java/Core Transforms/Combine/BinaryCombineFn Lambda/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076154
-update_date: Fri, 07 Feb 2020 14:34:40 UTC
+update_date: Tue, 19 May 2020 07:01:38 UTC
diff --git a/learning/katas/java/Core Transforms/Combine/BinaryCombineFn Lambda/task.html b/learning/katas/java/Core Transforms/Combine/BinaryCombineFn Lambda/task.md
similarity index 75%
rename from learning/katas/java/Core Transforms/Combine/BinaryCombineFn Lambda/task.html
rename to learning/katas/java/Core Transforms/Combine/BinaryCombineFn Lambda/task.md
index ccafa6219ae90..45a3e4d0582a7 100644
--- a/learning/katas/java/Core Transforms/Combine/BinaryCombineFn Lambda/task.html
+++ b/learning/katas/java/Core Transforms/Combine/BinaryCombineFn Lambda/task.md
@@ -16,28 +16,25 @@
~ limitations under the License.
-->
-
-
Combine - BinaryCombineFn Lambda
-
- BinaryCombineFn is used for implementing combiners that are more easily expressed as binary
- operations.
-
-
- Since Beam v2.13.0, you can also use lambda or method reference in order to create the
- BinaryCombineFn.
-
-
- Kata: Implement the summation of BigInteger using lambda or method reference.
-
-
+Combine - BinaryCombineFn Lambda
+--------------------------------
+
+BinaryCombineFn is used for implementing combiners that are more easily expressed as binary
+operations.
+
+Since Beam v2.13.0, you can also use lambda or method reference in order to create the
+BinaryCombineFn.
+
+**Kata:** Implement the summation of BigInteger using lambda or method reference.
+
Refer to the Beam Programming Guide
"Combine" section for more information.
-
diff --git a/learning/katas/java/Core Transforms/Combine/BinaryCombineFn/task-remote-info.yaml b/learning/katas/java/Core Transforms/Combine/BinaryCombineFn/task-remote-info.yaml
index cec908ff11360..605c7c0153bf2 100644
--- a/learning/katas/java/Core Transforms/Combine/BinaryCombineFn/task-remote-info.yaml
+++ b/learning/katas/java/Core Transforms/Combine/BinaryCombineFn/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076153
-update_date: Fri, 07 Feb 2020 14:34:37 UTC
+update_date: Tue, 19 May 2020 07:01:35 UTC
diff --git a/learning/katas/java/Core Transforms/Combine/BinaryCombineFn/task.html b/learning/katas/java/Core Transforms/Combine/BinaryCombineFn/task.md
similarity index 55%
rename from learning/katas/java/Core Transforms/Combine/BinaryCombineFn/task.html
rename to learning/katas/java/Core Transforms/Combine/BinaryCombineFn/task.md
index c18d3ac460033..ea026b1a9ac04 100644
--- a/learning/katas/java/Core Transforms/Combine/BinaryCombineFn/task.html
+++ b/learning/katas/java/Core Transforms/Combine/BinaryCombineFn/task.md
@@ -16,35 +16,30 @@
~ limitations under the License.
-->
-
-
Combine - BinaryCombineFn
-
- Combine is a Beam transform for combining collections of elements or values in your data.
- When you apply a Combine transform, you must provide the function that contains the logic for
- combining the elements or values. The combining function should be commutative and associative,
- as the function is not necessarily invoked exactly once on all values with a given key. Because
- the input data (including the value collection) may be distributed across multiple workers, the
- combining function might be called multiple times to perform partial combining on subsets of
- the value collection.
-
-
- BinaryCombineFn is used for implementing combiners that are more easily expressed as binary
- operations.
-
-
+Combine - BinaryCombineFn
+-------------------------
+
+Combine is a Beam transform for combining collections of elements or values in your data. When you
+apply a Combine transform, you must provide the function that contains the logic for combining the
+elements or values. The combining function should be commutative and associative, as the function
+is not necessarily invoked exactly once on all values with a given key. Because the input data
+(including the value collection) may be distributed across multiple workers, the combining function
+might be called multiple times to perform partial combining on subsets of the value collection.
+
+BinaryCombineFn is used for implementing combiners that are more easily expressed as binary
+operations.
+
+**Kata:** Implement the summation of BigInteger using
+[Combine.BinaryCombineFn](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/transforms/Combine.BinaryCombineFn.html).
+
Refer to the Beam Programming Guide
"Combine" section for more information.
-
diff --git a/learning/katas/java/Core Transforms/Combine/Combine PerKey/task-remote-info.yaml b/learning/katas/java/Core Transforms/Combine/Combine PerKey/task-remote-info.yaml
index e58c6a94ce470..ae7879c02da79 100644
--- a/learning/katas/java/Core Transforms/Combine/Combine PerKey/task-remote-info.yaml
+++ b/learning/katas/java/Core Transforms/Combine/Combine PerKey/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076155
-update_date: Fri, 07 Feb 2020 14:34:43 UTC
+update_date: Tue, 19 May 2020 07:01:41 UTC
diff --git a/learning/katas/java/Core Transforms/Combine/Combine PerKey/task.html b/learning/katas/java/Core Transforms/Combine/Combine PerKey/task.md
similarity index 67%
rename from learning/katas/java/Core Transforms/Combine/Combine PerKey/task.html
rename to learning/katas/java/Core Transforms/Combine/Combine PerKey/task.md
index 62b6afb0c1b07..3f80616714d2d 100644
--- a/learning/katas/java/Core Transforms/Combine/Combine PerKey/task.html
+++ b/learning/katas/java/Core Transforms/Combine/Combine PerKey/task.md
@@ -16,33 +16,31 @@
~ limitations under the License.
-->
-
-
Combine - Combine PerKey
-
- After creating a keyed PCollection (for example, by using a GroupByKey transform), a common
- pattern is to combine the collection of values associated with each key into a single, merged
- value. This pattern of a GroupByKey followed by merging the collection of values is equivalent to
- Combine PerKey transform. The combine function you supply to Combine PerKey must be an associative
- reduction function or a subclass of CombineFn.
-
-
- Kata: Implement the sum of scores per player using
-
- Combine.perKey.
-
-
+Combine - Combine PerKey
+------------------------
+
+After creating a keyed PCollection (for example, by using a GroupByKey transform), a common pattern
+is to combine the collection of values associated with each key into a single, merged value. This
+pattern of a GroupByKey followed by merging the collection of values is equivalent to Combine
+PerKey transform. The combine function you supply to Combine PerKey must be an associative
+reduction function or a subclass of CombineFn.
+
+**Kata:** Implement the sum of scores per player using
+[Combine.perKey](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/transforms/CombineFnBase.GlobalCombineFn.html).
+
-
diff --git a/learning/katas/java/Core Transforms/Combine/CombineFn/task-remote-info.yaml b/learning/katas/java/Core Transforms/Combine/CombineFn/task-remote-info.yaml
index 6d028949442a7..195a816d65590 100644
--- a/learning/katas/java/Core Transforms/Combine/CombineFn/task-remote-info.yaml
+++ b/learning/katas/java/Core Transforms/Combine/CombineFn/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076152
-update_date: Fri, 07 Feb 2020 14:34:34 UTC
+update_date: Tue, 19 May 2020 07:01:32 UTC
diff --git a/learning/katas/java/Core Transforms/Combine/CombineFn/task.html b/learning/katas/java/Core Transforms/Combine/CombineFn/task.md
similarity index 51%
rename from learning/katas/java/Core Transforms/Combine/CombineFn/task.html
rename to learning/katas/java/Core Transforms/Combine/CombineFn/task.md
index 94b6be384dd91..13bce44ecbf1a 100644
--- a/learning/katas/java/Core Transforms/Combine/CombineFn/task.html
+++ b/learning/katas/java/Core Transforms/Combine/CombineFn/task.md
@@ -16,37 +16,32 @@
~ limitations under the License.
-->
-
-
Combine - CombineFn
-
- Combine is a Beam transform for combining collections of elements or values in your data.
- When you apply a Combine transform, you must provide the function that contains the logic for
- combining the elements or values. The combining function should be commutative and associative,
- as the function is not necessarily invoked exactly once on all values with a given key. Because
- the input data (including the value collection) may be distributed across multiple workers, the
- combining function might be called multiple times to perform partial combining on subsets of
- the value collection.
-
-
- Complex combination operations might require you to create a subclass of CombineFn that has an
- accumulation type distinct from the input/output type. You should use CombineFn if the combine
- function requires a more sophisticated accumulator, must perform additional pre- or
- post-processing, might change the output type, or takes the key into account.
-
-
+Combine - CombineFn
+-------------------
+
+Combine is a Beam transform for combining collections of elements or values in your data. When you
+apply a Combine transform, you must provide the function that contains the logic for combining the
+elements or values. The combining function should be commutative and associative, as the function
+is not necessarily invoked exactly once on all values with a given key. Because the input data
+(including the value collection) may be distributed across multiple workers, the combining function
+might be called multiple times to perform partial combining on subsets of the value collection.
+
+Complex combination operations might require you to create a subclass of CombineFn that has an
+accumulation type distinct from the input/output type. You should use CombineFn if the combine
+function requires a more sophisticated accumulator, must perform additional pre- or post-processing,
+might change the output type, or takes the key into account.
+
+**Kata:** Implement the average of numbers using
+[Combine.CombineFn](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/transforms/Combine.CombineFn.html).
+
Extend the
Combine.CombineFn class that counts the average of the number.
-
diff --git a/learning/katas/java/Core Transforms/Combine/Simple Function/task-remote-info.yaml b/learning/katas/java/Core Transforms/Combine/Simple Function/task-remote-info.yaml
index 6bf9717cbf64f..cc9a7560915df 100644
--- a/learning/katas/java/Core Transforms/Combine/Simple Function/task-remote-info.yaml
+++ b/learning/katas/java/Core Transforms/Combine/Simple Function/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076194
-update_date: Fri, 07 Feb 2020 14:34:33 UTC
+update_date: Tue, 19 May 2020 07:07:04 UTC
diff --git a/learning/katas/java/Core Transforms/Combine/Simple Function/task.html b/learning/katas/java/Core Transforms/Combine/Simple Function/task.md
similarity index 57%
rename from learning/katas/java/Core Transforms/Combine/Simple Function/task.html
rename to learning/katas/java/Core Transforms/Combine/Simple Function/task.md
index d501be8fcc2e5..b20fb9d4749ba 100644
--- a/learning/katas/java/Core Transforms/Combine/Simple Function/task.html
+++ b/learning/katas/java/Core Transforms/Combine/Simple Function/task.md
@@ -16,34 +16,29 @@
~ limitations under the License.
-->
-
-
Combine - Simple Function
-
- Combine is a Beam transform for combining collections of elements or values in your data.
- When you apply a Combine transform, you must provide the function that contains the logic for
- combining the elements or values. The combining function should be commutative and associative,
- as the function is not necessarily invoked exactly once on all values with a given key. Because
- the input data (including the value collection) may be distributed across multiple workers, the
- combining function might be called multiple times to perform partial combining on subsets of
- the value collection.
-
-
- Simple combine operations, such as sums, can usually be implemented as a simple function.
-
-
+Combine - Simple Function
+-------------------------
+
+Combine is a Beam transform for combining collections of elements or values in your data. When you
+apply a Combine transform, you must provide the function that contains the logic for combining the
+elements or values. The combining function should be commutative and associative, as the function
+is not necessarily invoked exactly once on all values with a given key. Because the input data
+(including the value collection) may be distributed across multiple workers, the combining function
+might be called multiple times to perform partial combining on subsets of the value collection.
+
+Simple combine operations, such as sums, can usually be implemented as a simple function.
+
+**Kata:** Implement the summation of numbers using
+[Combine.globally(SerializableFunction)](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/transforms/SerializableFunction.html).
+
- Transforms can have a nested structure, where a complex transform performs multiple simpler
- transforms (such as more than one ParDo, Combine, GroupByKey, or even other composite transforms).
- These transforms are called composite transforms. Nesting multiple transforms inside a single
- composite transform can make your code more modular and easier to understand.
-
-
- To create your own composite transform, create a subclass of the PTransform class and override
- the expand method to specify the actual processing logic. You can then use this transform just as
- you would a built-in transform from the Beam SDK. For the PTransform class type parameters, you
- pass the PCollection types that your transform takes as input, and produces as output. Within
- your PTransform subclass, you’ll need to override the expand method. The expand method is where
- you add the processing logic for the PTransform. Your override of expand must accept the
- appropriate type of input PCollection as a parameter, and specify the output PCollection as the
- return value.
-
-
- Kata: Please implement a composite transform "ExtractAndMultiplyNumbers" that extracts
- numbers from comma separated line and then multiplies each number by 10.
-
-
diff --git a/learning/katas/java/Core Transforms/Composite Transform/Composite Transform/task.md b/learning/katas/java/Core Transforms/Composite Transform/Composite Transform/task.md
new file mode 100644
index 0000000000000..45854bfe95f31
--- /dev/null
+++ b/learning/katas/java/Core Transforms/Composite Transform/Composite Transform/task.md
@@ -0,0 +1,47 @@
+
+
+Composite Transform
+-------------------
+
+Transforms can have a nested structure, where a complex transform performs multiple simpler
+transforms (such as more than one ParDo, Combine, GroupByKey, or even other composite transforms).
+These transforms are called composite transforms. Nesting multiple transforms inside a single
+composite transform can make your code more modular and easier to understand.
+
+To create your own composite transform, create a subclass of the PTransform class and override the
+expand method to specify the actual processing logic. You can then use this transform just as you
+would a built-in transform from the Beam SDK. For the PTransform class type parameters, you pass
+the PCollection types that your transform takes as input, and produces as output. Within your
+PTransform subclass, you’ll need to override the expand method. The expand method is where you add
+the processing logic for the PTransform. Your override of expand must accept the appropriate type
+of input PCollection as a parameter, and specify the output PCollection as the return value.
+
+**Kata:** Please implement a composite transform "ExtractAndMultiplyNumbers" that extracts numbers
+from comma separated line and then multiplies each number by 10.
+
+
- In addition to the element and the OutputReceiver, Beam will populate other parameters to your
- DoFn’s @ProcessElement method. Any combination of these parameters can be added to your process
- method in any order.
-
-
-
-
- Timestamp: To access the timestamp of an input element, add a parameter annotated with
- @Timestamp of type Instant
-
-
- Window: To access the window an input element falls into, add a parameter of the type of the
- window used for the input PCollection.
-
-
- PaneInfo: When triggers are used, Beam provides a PaneInfo object that contains information
- about the current firing. Using PaneInfo you can determine whether this is an early or a
- late firing, and how many times this window has already fired for this key.
-
-
- PipelineOptions: The PipelineOptions for the current pipeline can always be accessed in a
- process method by adding it as a parameter.
-
-
diff --git a/learning/katas/java/Core Transforms/DoFn Additional Parameters/DoFn Additional Parameters/task.md b/learning/katas/java/Core Transforms/DoFn Additional Parameters/DoFn Additional Parameters/task.md
new file mode 100644
index 0000000000000..32657dcb8635e
--- /dev/null
+++ b/learning/katas/java/Core Transforms/DoFn Additional Parameters/DoFn Additional Parameters/task.md
@@ -0,0 +1,38 @@
+
+
+DoFn Additional Parameters
+--------------------------
+
+In addition to the element and the OutputReceiver, Beam will populate other parameters to your
+DoFn’s @ProcessElement method. Any combination of these parameters can be added to your process
+method in any order.
+
+* **Timestamp**: To access the timestamp of an input element, add a parameter annotated with
+@Timestamp of type Instant
+* **Window**: To access the window an input element falls into, add a parameter of the type of
+the window used for the input PCollection.
+* **PaneInfo**: When triggers are used, Beam provides a PaneInfo object that contains information
+about the current firing. Using PaneInfo you can determine whether this is an early or a late
+firing, and how many times this window has already fired for this key.
+* **PipelineOptions**: The PipelineOptions for the current pipeline can always be accessed in a
+process method by adding it as a parameter.
+
+Refer to the Beam Programming Guide
+["Accessing additional parameters in your DoFn"](https://beam.apache.org/documentation/programming-guide/#other-dofn-parameters)
+section for more information.
diff --git a/learning/katas/java/Core Transforms/Flatten/Flatten/task-remote-info.yaml b/learning/katas/java/Core Transforms/Flatten/Flatten/task-remote-info.yaml
index 7da7fb33fe672..f82552000d1b0 100644
--- a/learning/katas/java/Core Transforms/Flatten/Flatten/task-remote-info.yaml
+++ b/learning/katas/java/Core Transforms/Flatten/Flatten/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076187
-update_date: Fri, 07 Feb 2020 14:28:11 UTC
+update_date: Tue, 19 May 2020 07:01:44 UTC
diff --git a/learning/katas/java/Core Transforms/Flatten/Flatten/task.html b/learning/katas/java/Core Transforms/Flatten/Flatten/task.md
similarity index 73%
rename from learning/katas/java/Core Transforms/Flatten/Flatten/task.html
rename to learning/katas/java/Core Transforms/Flatten/Flatten/task.md
index f0bf35e9c8048..5bf19ba5e6328 100644
--- a/learning/katas/java/Core Transforms/Flatten/Flatten/task.html
+++ b/learning/katas/java/Core Transforms/Flatten/Flatten/task.md
@@ -16,26 +16,24 @@
~ limitations under the License.
-->
-
-
Flatten
-
- Flatten is a Beam transform for PCollection objects that store the same data type.
- Flatten merges multiple PCollection objects into a single logical PCollection.
-
-
- Kata: Implement a
-
- Flatten transform that merges two PCollection of words into a single PCollection.
-
-
+Flatten
+-------
+
+Flatten is a Beam transform for PCollection objects that store the same data type. Flatten merges
+multiple PCollection objects into a single logical PCollection.
+
+**Kata:** Implement a
+[Flatten](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/transforms/Flatten.html)
+transform that merges two PCollection of words into a single PCollection.
+
Refer to the Beam Programming Guide
"Flatten" section for more information.
-
diff --git a/learning/katas/java/Core Transforms/GroupByKey/GroupByKey/task-remote-info.yaml b/learning/katas/java/Core Transforms/GroupByKey/GroupByKey/task-remote-info.yaml
index c8fad3b294c05..803643efd658c 100644
--- a/learning/katas/java/Core Transforms/GroupByKey/GroupByKey/task-remote-info.yaml
+++ b/learning/katas/java/Core Transforms/GroupByKey/GroupByKey/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076186
-update_date: Fri, 07 Feb 2020 14:28:07 UTC
+update_date: Tue, 19 May 2020 07:01:26 UTC
diff --git a/learning/katas/java/Core Transforms/GroupByKey/GroupByKey/task.html b/learning/katas/java/Core Transforms/GroupByKey/GroupByKey/task.md
similarity index 64%
rename from learning/katas/java/Core Transforms/GroupByKey/GroupByKey/task.html
rename to learning/katas/java/Core Transforms/GroupByKey/GroupByKey/task.md
index 54082b091bf27..cfaa2eb632ab5 100644
--- a/learning/katas/java/Core Transforms/GroupByKey/GroupByKey/task.html
+++ b/learning/katas/java/Core Transforms/GroupByKey/GroupByKey/task.md
@@ -16,30 +16,28 @@
~ limitations under the License.
-->
-
-
GroupByKey
-
- GroupByKey is a Beam transform for processing collections of key/value pairs. It’s a parallel
- reduction operation, analogous to the Shuffle phase of a Map/Shuffle/Reduce-style algorithm. The
- input to GroupByKey is a collection of key/value pairs that represents a multimap, where the
- collection contains multiple pairs that have the same key, but different values. Given such a
- collection, you use GroupByKey to collect all of the values associated with each unique key.
-
-
- Kata: Implement a
-
- GroupByKey transform that groups words by its first letter.
-
-
+GroupByKey
+----------
+
+GroupByKey is a Beam transform for processing collections of key/value pairs. It’s a parallel
+reduction operation, analogous to the Shuffle phase of a Map/Shuffle/Reduce-style algorithm. The
+input to GroupByKey is a collection of key/value pairs that represents a multimap, where the
+collection contains multiple pairs that have the same key, but different values. Given such a
+collection, you use GroupByKey to collect all of the values associated with each unique key.
+
+**Kata:** Implement a
+[GroupByKey](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/transforms/GroupByKey.html)
+transform that groups words by its first letter.
+
Refer to the Beam Programming Guide
"GroupByKey" section for more information.
-
diff --git a/learning/katas/java/Core Transforms/Map/FlatMapElements/task-remote-info.yaml b/learning/katas/java/Core Transforms/Map/FlatMapElements/task-remote-info.yaml
index eccf16787840a..e237aa9e7b046 100644
--- a/learning/katas/java/Core Transforms/Map/FlatMapElements/task-remote-info.yaml
+++ b/learning/katas/java/Core Transforms/Map/FlatMapElements/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076150
-update_date: Fri, 07 Feb 2020 14:28:26 UTC
+update_date: Tue, 19 May 2020 07:01:22 UTC
diff --git a/learning/katas/java/Core Transforms/Map/FlatMapElements/task.html b/learning/katas/java/Core Transforms/Map/FlatMapElements/task.md
similarity index 69%
rename from learning/katas/java/Core Transforms/Map/FlatMapElements/task.html
rename to learning/katas/java/Core Transforms/Map/FlatMapElements/task.md
index 50f1627c0e7ef..8eb555cfdd42a 100644
--- a/learning/katas/java/Core Transforms/Map/FlatMapElements/task.html
+++ b/learning/katas/java/Core Transforms/Map/FlatMapElements/task.md
@@ -16,29 +16,24 @@
~ limitations under the License.
-->
-
-
FlatMapElements
-
- The Beam SDKs provide language-specific ways to simplify how you provide your DoFn implementation.
-
-
- FlatMapElements can be used to simplify a DoFn that maps an element to multiple elements (one to
- many).
-
-
- Kata: Implement a function that maps each input sentence into words tokenized by whitespace
- (" ") using
-
- FlatMapElements.into(...).via(...).
-
-
+FlatMapElements
+---------------
+
+The Beam SDKs provide language-specific ways to simplify how you provide your DoFn implementation.
+
+FlatMapElements can be used to simplify a DoFn that maps an element to multiple elements (one to
+many).
+
+**Kata:** Implement a function that maps each input sentence into words tokenized by whitespace
+(" ") using [FlatMapElements.into(...).via(...)](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/transforms/FlatMapElements.html).
+
-
+MapElements
+-----------
+
+The Beam SDKs provide language-specific ways to simplify how you provide your DoFn implementation.
+
+MapElements can be used to simplify a DoFn that maps an element to another element (one to one).
+
+**Kata:** Implement a simple map function that multiplies all input elements by 5 using
+[MapElements.into(...).via(...)](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/transforms/MapElements.html).
+
-
\ No newline at end of file
diff --git a/learning/katas/java/Core Transforms/Map/ParDo/task-remote-info.yaml b/learning/katas/java/Core Transforms/Map/ParDo/task-remote-info.yaml
index 89498764e53d6..60375db960619 100644
--- a/learning/katas/java/Core Transforms/Map/ParDo/task-remote-info.yaml
+++ b/learning/katas/java/Core Transforms/Map/ParDo/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076185
-update_date: Fri, 07 Feb 2020 14:28:05 UTC
+update_date: Tue, 19 May 2020 07:01:14 UTC
diff --git a/learning/katas/java/Core Transforms/Map/ParDo/task.html b/learning/katas/java/Core Transforms/Map/ParDo/task.md
similarity index 71%
rename from learning/katas/java/Core Transforms/Map/ParDo/task.html
rename to learning/katas/java/Core Transforms/Map/ParDo/task.md
index 15a0ea1fc68cb..bfcb16fdbb391 100644
--- a/learning/katas/java/Core Transforms/Map/ParDo/task.html
+++ b/learning/katas/java/Core Transforms/Map/ParDo/task.md
@@ -16,27 +16,25 @@
~ limitations under the License.
-->
-
-
ParDo
-
- ParDo is a Beam transform for generic parallel processing. The ParDo processing paradigm is
- similar to the “Map” phase of a Map/Shuffle/Reduce-style algorithm: a ParDo transform considers
- each element in the input PCollection, performs some processing function (your user code) on
- that element, and emits zero, one, or multiple elements to an output PCollection.
-
-
- Kata: Please write a simple ParDo that maps the input element by multiplying it by 10.
-
-
+ParDo
+-----
+
+ParDo is a Beam transform for generic parallel processing. The ParDo processing paradigm is similar
+to the “Map” phase of a Map/Shuffle/Reduce-style algorithm: a ParDo transform considers each
+element in the input PCollection, performs some processing function (your user code) on that
+element, and emits zero, one, or multiple elements to an output PCollection.
+
+**Kata:** Please write a simple ParDo that maps the input element by multiplying it by 10.
+
Refer to the Beam Programming Guide
"ParDo" section for
more information.
-
diff --git a/learning/katas/java/Core Transforms/Partition/Partition/task-remote-info.yaml b/learning/katas/java/Core Transforms/Partition/Partition/task-remote-info.yaml
index ad0c8123173d8..871b8da0e8ddc 100644
--- a/learning/katas/java/Core Transforms/Partition/Partition/task-remote-info.yaml
+++ b/learning/katas/java/Core Transforms/Partition/Partition/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076156
-update_date: Fri, 07 Feb 2020 14:06:48 UTC
+update_date: Tue, 19 May 2020 07:01:46 UTC
diff --git a/learning/katas/java/Core Transforms/Partition/Partition/task.html b/learning/katas/java/Core Transforms/Partition/Partition/task.md
similarity index 59%
rename from learning/katas/java/Core Transforms/Partition/Partition/task.html
rename to learning/katas/java/Core Transforms/Partition/Partition/task.md
index 96e559c99341d..e254afddc335f 100644
--- a/learning/katas/java/Core Transforms/Partition/Partition/task.html
+++ b/learning/katas/java/Core Transforms/Partition/Partition/task.md
@@ -16,33 +16,29 @@
~ limitations under the License.
-->
-
-
Partition
-
- Partition is a Beam transform for PCollection objects that store the same data type.
- Partition splits a single PCollection into a fixed number of smaller collections.
-
-
- Partition divides the elements of a PCollection according to a partitioning function
- that you provide. The partitioning function contains the logic that determines how to split up
- the elements of the input PCollection into each resulting partition PCollection.
-
-
- Kata: Implement a
-
- Partition transform that splits a PCollection of numbers into two PCollections.
- The first PCollection contains numbers greater than 100, and the second PCollection contains
- the remaining numbers.
-
-
+Partition
+---------
+
+Partition is a Beam transform for PCollection objects that store the same data type. Partition
+splits a single PCollection into a fixed number of smaller collections.
+
+Partition divides the elements of a PCollection according to a partitioning function that you
+provide. The partitioning function contains the logic that determines how to split up the elements
+of the input PCollection into each resulting partition PCollection.
+
+**Kata:** Implement a
+[Partition](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/transforms/Partition.html)
+transform that splits a PCollection of numbers into two PCollections. The first PCollection
+contains numbers greater than 100, and the second PCollection contains the remaining numbers.
+
Refer to the Beam Programming Guide
"Partition" section for more information.
-
diff --git a/learning/katas/java/Core Transforms/Side Input/Side Input/task-remote-info.yaml b/learning/katas/java/Core Transforms/Side Input/Side Input/task-remote-info.yaml
index f0673f8c00dd9..e9c76e40dfb49 100644
--- a/learning/katas/java/Core Transforms/Side Input/Side Input/task-remote-info.yaml
+++ b/learning/katas/java/Core Transforms/Side Input/Side Input/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076157
-update_date: Fri, 07 Feb 2020 14:06:55 UTC
+update_date: Tue, 19 May 2020 07:01:49 UTC
diff --git a/learning/katas/java/Core Transforms/Side Input/Side Input/task.html b/learning/katas/java/Core Transforms/Side Input/Side Input/task.md
similarity index 65%
rename from learning/katas/java/Core Transforms/Side Input/Side Input/task.html
rename to learning/katas/java/Core Transforms/Side Input/Side Input/task.md
index 9e7045b9476f9..6ebde279100ea 100644
--- a/learning/katas/java/Core Transforms/Side Input/Side Input/task.html
+++ b/learning/katas/java/Core Transforms/Side Input/Side Input/task.md
@@ -16,29 +16,27 @@
~ limitations under the License.
-->
-
-
Side Input
-
- In addition to the main input PCollection, you can provide additional inputs to a ParDo transform
- in the form of side inputs. A side input is an additional input that your DoFn can access each
- time it processes an element in the input PCollection. When you specify a side input, you create
- a view of some other data that can be read from within the ParDo transform’s DoFn while
- processing each element.
-
-
- Side inputs are useful if your ParDo needs to inject additional data when processing each element
- in the input PCollection, but the additional data needs to be determined at runtime (and not
- hard-coded). Such values might be determined by the input data, or depend on a different branch
- of your pipeline.
-
-
- Kata: Please enrich each Person with the country based on the city he/she lives in.
-
-
+Side Input
+----------
+
+In addition to the main input PCollection, you can provide additional inputs to a ParDo transform
+in the form of side inputs. A side input is an additional input that your DoFn can access each time
+it processes an element in the input PCollection. When you specify a side input, you create a view
+of some other data that can be read from within the ParDo transform’s DoFn while processing each
+element.
+
+Side inputs are useful if your ParDo needs to inject additional data when processing each element
+in the input PCollection, but the additional data needs to be determined at runtime (and not
+hard-coded). Such values might be determined by the input data, or depend on a different branch of
+your pipeline.
+
+**Kata:** Please enrich each Person with the country based on the city he/she lives in.
+
Use
View to create PCollectionView of citiesToCountries.
Refer to the Beam Programming Guide
"Side inputs"
section for more information.
-
diff --git a/learning/katas/java/Core Transforms/Side Output/Side Output/task-remote-info.yaml b/learning/katas/java/Core Transforms/Side Output/Side Output/task-remote-info.yaml
index 4eab016d77d90..5260041c1bd48 100644
--- a/learning/katas/java/Core Transforms/Side Output/Side Output/task-remote-info.yaml
+++ b/learning/katas/java/Core Transforms/Side Output/Side Output/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076158
-update_date: Fri, 07 Feb 2020 14:07:02 UTC
+update_date: Tue, 19 May 2020 07:01:53 UTC
diff --git a/learning/katas/java/Core Transforms/Side Output/Side Output/task.html b/learning/katas/java/Core Transforms/Side Output/Side Output/task.md
similarity index 77%
rename from learning/katas/java/Core Transforms/Side Output/Side Output/task.html
rename to learning/katas/java/Core Transforms/Side Output/Side Output/task.md
index d24f73d5253f8..d6902442bb539 100644
--- a/learning/katas/java/Core Transforms/Side Output/Side Output/task.html
+++ b/learning/katas/java/Core Transforms/Side Output/Side Output/task.md
@@ -16,18 +16,16 @@
~ limitations under the License.
-->
-
-
Side Output
-
- While ParDo always produces a main output PCollection (as the return value from apply), you can
- also have your ParDo produce any number of additional output PCollections. If you choose to have
- multiple outputs, your ParDo returns all of the output PCollections (including the main output)
- bundled together.
-
-
- Kata: Implement additional output to your ParDo for numbers bigger than 100.
-
-
+Side Output
+-----------
+
+While ParDo always produces a main output PCollection (as the return value from apply), you can
+also have your ParDo produce any number of additional output PCollections. If you choose to have
+multiple outputs, your ParDo returns all of the output PCollections (including the main output)
+bundled together.
+
+**Kata:** Implement additional output to your ParDo for numbers bigger than 100.
+
Refer to the Beam Programming Guide
"Additional outputs" section for more information.
-
diff --git a/learning/katas/java/Examples/Word Count/Word Count/task-remote-info.yaml b/learning/katas/java/Examples/Word Count/Word Count/task-remote-info.yaml
index 224dbaee0acd3..35e7dde636da9 100644
--- a/learning/katas/java/Examples/Word Count/Word Count/task-remote-info.yaml
+++ b/learning/katas/java/Examples/Word Count/Word Count/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076176
-update_date: Fri, 07 Feb 2020 14:08:54 UTC
+update_date: Tue, 19 May 2020 07:02:51 UTC
diff --git a/learning/katas/java/Examples/Word Count/Word Count/task.html b/learning/katas/java/Examples/Word Count/Word Count/task.md
similarity index 78%
rename from learning/katas/java/Examples/Word Count/Word Count/task.html
rename to learning/katas/java/Examples/Word Count/Word Count/task.md
index a963aab4cb1b2..d8dbdff4bef5d 100644
--- a/learning/katas/java/Examples/Word Count/Word Count/task.html
+++ b/learning/katas/java/Examples/Word Count/Word Count/task.md
@@ -16,21 +16,18 @@
~ limitations under the License.
-->
-
-
Word Count Pipeline
-
- Kata: Create a pipeline that counts the number of words.
-
-
- Please output the count of each word in the following format:
-
-
- word:count
- ball:5
- book:3
-
-
+Word Count Pipeline
+-------------------
+
+**Kata:** Create a pipeline that counts the number of words.
+
+Please output the count of each word in the following format:
+```text
+word:count
+ball:5
+book:3
+```
+
Refer to your katas above.
-
diff --git a/learning/katas/java/IO/Built-in IOs/Built-in IOs/task-remote-info.yaml b/learning/katas/java/IO/Built-in IOs/Built-in IOs/task-remote-info.yaml
index ef8aff28455e2..caf130d69278d 100644
--- a/learning/katas/java/IO/Built-in IOs/Built-in IOs/task-remote-info.yaml
+++ b/learning/katas/java/IO/Built-in IOs/Built-in IOs/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076208
-update_date: Fri, 07 Feb 2020 14:52:55 UTC
+update_date: Tue, 19 May 2020 07:02:32 UTC
diff --git a/learning/katas/python/IO/Built-in IOs/Built-in IOs/task.html b/learning/katas/java/IO/Built-in IOs/Built-in IOs/task.md
similarity index 64%
rename from learning/katas/python/IO/Built-in IOs/Built-in IOs/task.html
rename to learning/katas/java/IO/Built-in IOs/Built-in IOs/task.md
index ef1b2083a8e90..b083c732eeb1c 100644
--- a/learning/katas/python/IO/Built-in IOs/Built-in IOs/task.html
+++ b/learning/katas/java/IO/Built-in IOs/Built-in IOs/task.md
@@ -16,17 +16,14 @@
~ limitations under the License.
-->
-
-
Built-in I/Os
-
- Beam SDKs provide many out of the box I/O transforms that can be used to read from many
- different sources and write to many different sinks.
-
- Note: There is no kata for this task. Please proceed to the next task.
-
-
+Built-in I/Os
+-------------
+
+Beam SDKs provide many out of the box I/O transforms that can be used to read from many different
+sources and write to many different sinks.
+
+See the [Beam-provided I/O Transforms](https://beam.apache.org/documentation/io/built-in/) page for
+a list of the currently available I/O transforms.
+
+**Note:** There is no kata for this task. Please click the "Check" button and proceed to the next
+task.
\ No newline at end of file
diff --git a/learning/katas/java/IO/TextIO/TextIO Read/task-remote-info.yaml b/learning/katas/java/IO/TextIO/TextIO Read/task-remote-info.yaml
index d66bb01abd251..b904cbe131c8a 100644
--- a/learning/katas/java/IO/TextIO/TextIO Read/task-remote-info.yaml
+++ b/learning/katas/java/IO/TextIO/TextIO Read/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076189
-update_date: Fri, 07 Feb 2020 14:28:15 UTC
+update_date: Tue, 19 May 2020 07:06:02 UTC
diff --git a/learning/katas/java/IO/TextIO/TextIO Read/task.html b/learning/katas/java/IO/TextIO/TextIO Read/task.md
similarity index 62%
rename from learning/katas/java/IO/TextIO/TextIO Read/task.html
rename to learning/katas/java/IO/TextIO/TextIO Read/task.md
index 1ebad84212594..23dd394cae82d 100644
--- a/learning/katas/java/IO/TextIO/TextIO Read/task.html
+++ b/learning/katas/java/IO/TextIO/TextIO Read/task.md
@@ -16,32 +16,29 @@
~ limitations under the License.
-->
-
-
TextIO Read
-
- When you create a pipeline, you often need to read data from some external source, such as a file
- or a database. Likewise, you may want your pipeline to output its result data to an external
- storage system. Beam provides read and write transforms for a number of common data storage types.
- If you want your pipeline to read from or write to a data storage format that isn’t supported by
- the built-in transforms, you can implement your own read and write transforms.
-
-
- To read a PCollection from one or more text files, use TextIO.read() to instantiate a transform
- and use TextIO.Read.from(String) to specify the path of the file(s) to be read.
-
-
- Kata: Read the 'countries.txt' file and convert each country name into uppercase.
-
-
+TextIO Read
+-----------
+
+When you create a pipeline, you often need to read data from some external source, such as a file
+or a database. Likewise, you may want your pipeline to output its result data to an external
+storage system. Beam provides read and write transforms for a number of common data storage types.
+If you want your pipeline to read from or write to a data storage format that isn’t supported by
+the built-in transforms, you can implement your own read and write transforms.
+
+To read a PCollection from one or more text files, use TextIO.read() to instantiate a transform
+and use TextIO.Read.from(String) to specify the path of the file(s) to be read.
+
+**Kata:** Read the 'countries.txt' file and convert each country name into uppercase.
+
Refer to the Beam Programming Guide
"Reading input data" section for more information.
-
diff --git a/learning/katas/java/Introduction/Hello Beam/Hello Beam/task-remote-info.yaml b/learning/katas/java/Introduction/Hello Beam/Hello Beam/task-remote-info.yaml
index 521fdb194b38e..f1bd95787b4ae 100644
--- a/learning/katas/java/Introduction/Hello Beam/Hello Beam/task-remote-info.yaml
+++ b/learning/katas/java/Introduction/Hello Beam/Hello Beam/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076147
-update_date: Fri, 07 Feb 2020 14:05:53 UTC
+update_date: Tue, 19 May 2020 07:01:10 UTC
diff --git a/learning/katas/java/Introduction/Hello Beam/Hello Beam/task.html b/learning/katas/java/Introduction/Hello Beam/Hello Beam/task.md
similarity index 51%
rename from learning/katas/java/Introduction/Hello Beam/Hello Beam/task.html
rename to learning/katas/java/Introduction/Hello Beam/Hello Beam/task.md
index 28e579bf8f984..c1ef872d6d7a3 100644
--- a/learning/katas/java/Introduction/Hello Beam/Hello Beam/task.html
+++ b/learning/katas/java/Introduction/Hello Beam/Hello Beam/task.md
@@ -16,38 +16,34 @@
~ limitations under the License.
-->
-
-
Hello Beam Pipeline
-
- Apache Beam is an open source, unified model for defining both batch and streaming data-parallel
- processing pipelines. Using one of the open source Beam SDKs, you build a program that defines the
- pipeline. The pipeline is then executed by one of Beam’s supported distributed processing
- back-ends, which include Apache Apex, Apache Flink, Apache Spark, and Google Cloud Dataflow.
-
-
- Beam is particularly useful for Embarrassingly Parallel data processing tasks, in which the
- problem can be decomposed into many smaller bundles of data that can be processed independently
- and in parallel. You can also use Beam for Extract, Transform, and Load (ETL) tasks and pure data
- integration. These tasks are useful for moving data between different storage media and data
- sources, transforming data into a more desirable format, or loading data onto a new system.
-
- Kata: Your first kata is to create a simple pipeline that takes a hardcoded input element
- "Hello Beam".
-
-
+Welcome To Apache Beam
+----------------------
+
+Apache Beam is an open source, unified model for defining both batch and streaming data-parallel
+processing pipelines. Using one of the open source Beam SDKs, you build a program that defines the
+pipeline. The pipeline is then executed by one of Beam’s supported distributed processing back-ends,
+which include Apache Apex, Apache Flink, Apache Spark, and Google Cloud Dataflow.
+
+Beam is particularly useful for Embarrassingly Parallel data processing tasks, in which the problem
+can be decomposed into many smaller bundles of data that can be processed independently and in
+parallel. You can also use Beam for Extract, Transform, and Load (ETL) tasks and pure data
+integration. These tasks are useful for moving data between different storage media and data
+sources, transforming data into a more desirable format, or loading data onto a new system.
+
+To learn more about Apache Beam, refer to
+[Apache Beam Overview](https://beam.apache.org/get-started/beam-overview/).
+
+**Kata:** Your first kata is to create a simple pipeline that takes a hardcoded input element
+"Hello Beam".
+
-
diff --git a/learning/katas/java/Triggers/Early Triggers/Early Triggers/task-remote-info.yaml b/learning/katas/java/Triggers/Early Triggers/Early Triggers/task-remote-info.yaml
index 823ad7b76d006..6ad36e81349f1 100644
--- a/learning/katas/java/Triggers/Early Triggers/Early Triggers/task-remote-info.yaml
+++ b/learning/katas/java/Triggers/Early Triggers/Early Triggers/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076174
-update_date: Fri, 07 Feb 2020 14:08:38 UTC
+update_date: Tue, 19 May 2020 07:02:45 UTC
diff --git a/learning/katas/java/Triggers/Early Triggers/Early Triggers/task.html b/learning/katas/java/Triggers/Early Triggers/Early Triggers/task.md
similarity index 83%
rename from learning/katas/java/Triggers/Early Triggers/Early Triggers/task.html
rename to learning/katas/java/Triggers/Early Triggers/Early Triggers/task.md
index 6a7f1cbad6c1c..12d579f319ed1 100644
--- a/learning/katas/java/Triggers/Early Triggers/Early Triggers/task.html
+++ b/learning/katas/java/Triggers/Early Triggers/Early Triggers/task.md
@@ -16,44 +16,45 @@
~ limitations under the License.
-->
-
-
Early Triggers
-
- Triggers allow Beam to emit early results, before all the data in a given window has arrived.
- For example, emitting after a certain amount of time elapses, or after a certain number of
- elements arrives.
-
-
- Kata: Given that events are being generated every second and a fixed window of 1-day
- duration, please implement an early trigger that emits the number of events count immediately
- after new element is processed.
-
-
+Early Triggers
+--------------
+
+Triggers allow Beam to emit early results, before all the data in a given window has arrived. For
+example, emitting after a certain amount of time elapses, or after a certain number of elements
+arrives.
+
+**Kata:** Given that events are being generated every second and a fixed window of 1-day duration,
+please implement an early trigger that emits the number of events count immediately after new
+element is processed.
+
-
\ No newline at end of file
diff --git a/learning/katas/java/Triggers/Event Time Triggers/Event Time Triggers/task-remote-info.yaml b/learning/katas/java/Triggers/Event Time Triggers/Event Time Triggers/task-remote-info.yaml
index 56a53bb168618..d4d5c9da9d10f 100644
--- a/learning/katas/java/Triggers/Event Time Triggers/Event Time Triggers/task-remote-info.yaml
+++ b/learning/katas/java/Triggers/Event Time Triggers/Event Time Triggers/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076173
-update_date: Fri, 07 Feb 2020 14:08:31 UTC
+update_date: Tue, 19 May 2020 07:02:42 UTC
diff --git a/learning/katas/java/Triggers/Event Time Triggers/Event Time Triggers/task.html b/learning/katas/java/Triggers/Event Time Triggers/Event Time Triggers/task.md
similarity index 59%
rename from learning/katas/java/Triggers/Event Time Triggers/Event Time Triggers/task.html
rename to learning/katas/java/Triggers/Event Time Triggers/Event Time Triggers/task.md
index 5a124aa3ccb40..273ac8f637b7c 100644
--- a/learning/katas/java/Triggers/Event Time Triggers/Event Time Triggers/task.html
+++ b/learning/katas/java/Triggers/Event Time Triggers/Event Time Triggers/task.md
@@ -16,63 +16,57 @@
~ limitations under the License.
-->
-
-
Event Time Triggers
-
- When collecting and grouping data into windows, Beam uses triggers to determine when to emit the
- aggregated results of each window (referred to as a pane). If you use Beam’s default windowing
- configuration and default trigger, Beam outputs the aggregated result when it estimates all data
- has arrived, and discards all subsequent data for that window.
-
-
- You can set triggers for your PCollections to change this default behavior. Beam provides a
- number of pre-built triggers that you can set:
-
-
-
-
Event time triggers
-
Processing time triggers
-
Data-driven triggers
-
Composite triggers
-
-
-
- Event time triggers operate on the event time, as indicated by the timestamp on each data
- element. Beam’s default trigger is event time-based.
-
-
- The AfterWatermark trigger operates on event time. The AfterWatermark trigger emits the contents
- of a window after the watermark passes the end of the window, based on the timestamps attached
- to the data elements. The watermark is a global progress metric, and is Beam’s notion of input
- completeness within your pipeline at any given point. AfterWatermark.pastEndOfWindow() only fires
- when the watermark passes the end of the window.
-
-
- Kata: Given that events are being generated every second, please implement a trigger that
- emits the number of events count within a fixed window of 5-second duration.
-
-
+Event Time Triggers
+-------------------
+
+When collecting and grouping data into windows, Beam uses triggers to determine when to emit the
+aggregated results of each window (referred to as a pane). If you use Beam’s default windowing
+configuration and default trigger, Beam outputs the aggregated result when it estimates all data
+has arrived, and discards all subsequent data for that window.
+
+You can set triggers for your PCollections to change this default behavior. Beam provides a number
+of pre-built triggers that you can set:
+
+* Event time triggers
+* Processing time triggers
+* Data-driven triggers
+* Composite triggers
+
+Event time triggers operate on the event time, as indicated by the timestamp on each data element.
+Beam’s default trigger is event time-based.
+
+The AfterWatermark trigger operates on event time. The AfterWatermark trigger emits the contents
+of a window after the watermark passes the end of the window, based on the timestamps attached to
+the data elements. The watermark is a global progress metric, and is Beam’s notion of input
+completeness within your pipeline at any given point. AfterWatermark.pastEndOfWindow() only fires
+when the watermark passes the end of the window.
+
+**Kata:** Given that events are being generated every second, please implement a trigger that emits
+the number of events count within a fixed window of 5-second duration.
+
-
diff --git a/learning/katas/java/Triggers/Window Accumulation Mode/Window Accumulation Mode/task-remote-info.yaml b/learning/katas/java/Triggers/Window Accumulation Mode/Window Accumulation Mode/task-remote-info.yaml
index f69bd76fa9859..75c5c3e2be6b2 100644
--- a/learning/katas/java/Triggers/Window Accumulation Mode/Window Accumulation Mode/task-remote-info.yaml
+++ b/learning/katas/java/Triggers/Window Accumulation Mode/Window Accumulation Mode/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076175
-update_date: Fri, 07 Feb 2020 14:08:45 UTC
+update_date: Tue, 19 May 2020 07:02:47 UTC
diff --git a/learning/katas/java/Triggers/Window Accumulation Mode/Window Accumulation Mode/task.html b/learning/katas/java/Triggers/Window Accumulation Mode/Window Accumulation Mode/task.md
similarity index 80%
rename from learning/katas/java/Triggers/Window Accumulation Mode/Window Accumulation Mode/task.html
rename to learning/katas/java/Triggers/Window Accumulation Mode/Window Accumulation Mode/task.md
index f40784eabdc60..90f44861899c5 100644
--- a/learning/katas/java/Triggers/Window Accumulation Mode/Window Accumulation Mode/task.html
+++ b/learning/katas/java/Triggers/Window Accumulation Mode/Window Accumulation Mode/task.md
@@ -16,48 +16,50 @@
~ limitations under the License.
-->
-
-
Window Accumulation Mode
-
- When you specify a trigger, you must also set the the window’s accumulation mode. When a trigger
- fires, it emits the current contents of the window as a pane. Since a trigger can fire multiple
- times, the accumulation mode determines whether the system accumulates the window panes as the
- trigger fires, or discards them.
-
-
- Kata: Given that events are being generated every second and a fixed window of 1-day
- duration, please implement an early trigger that emits the number of events count immediately
- after new element is processed in accumulating mode.
-
-
+Window Accumulation Mode
+------------------------
+
+When you specify a trigger, you must also set the the window’s accumulation mode. When a trigger
+fires, it emits the current contents of the window as a pane. Since a trigger can fire multiple
+times, the accumulation mode determines whether the system accumulates the window panes as the
+trigger fires, or discards them.
+
+**Kata:** Given that events are being generated every second and a fixed window of 1-day duration,
+please implement an early trigger that emits the number of events count immediately after new
+element is processed in accumulating mode.
+
Use
accumulatingFiredPanes() to set a window to accumulate the panes that are produced when the
trigger fires.
-
diff --git a/learning/katas/java/Windowing/Adding Timestamp/ParDo/task-remote-info.yaml b/learning/katas/java/Windowing/Adding Timestamp/ParDo/task-remote-info.yaml
index 6541f3ccc803f..ffc33a03d3788 100644
--- a/learning/katas/java/Windowing/Adding Timestamp/ParDo/task-remote-info.yaml
+++ b/learning/katas/java/Windowing/Adding Timestamp/ParDo/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076170
-update_date: Fri, 07 Feb 2020 14:08:15 UTC
+update_date: Tue, 19 May 2020 07:02:35 UTC
diff --git a/learning/katas/java/Windowing/Adding Timestamp/ParDo/task.html b/learning/katas/java/Windowing/Adding Timestamp/ParDo/task.md
similarity index 76%
rename from learning/katas/java/Windowing/Adding Timestamp/ParDo/task.html
rename to learning/katas/java/Windowing/Adding Timestamp/ParDo/task.md
index 403fc11d0d0a2..c801480f5e652 100644
--- a/learning/katas/java/Windowing/Adding Timestamp/ParDo/task.html
+++ b/learning/katas/java/Windowing/Adding Timestamp/ParDo/task.md
@@ -16,33 +16,31 @@
~ limitations under the License.
-->
-
-
Adding Timestamp - ParDo
-
- Bounded sources (such as a file from TextIO) do not provide timestamps for elements. If you need
- timestamps, you must add them to your PCollection’s elements.
-
-
- You can assign new timestamps to the elements of a PCollection by applying a ParDo transform that
- outputs new elements with timestamps that you set.
-
-
- Kata: Please assign each element a timestamp based on the the Event.getDate().
-
-
+Adding Timestamp - ParDo
+------------------------
+
+Bounded sources (such as a file from TextIO) do not provide timestamps for elements. If you need
+timestamps, you must add them to your PCollection’s elements.
+
+You can assign new timestamps to the elements of a PCollection by applying a ParDo transform that
+outputs new elements with timestamps that you set.
+
+**Kata:** Please assign each element a timestamp based on the the `Event.getDate()`.
+
-
diff --git a/learning/katas/java/Windowing/Adding Timestamp/WithTimestamps/task-remote-info.yaml b/learning/katas/java/Windowing/Adding Timestamp/WithTimestamps/task-remote-info.yaml
index 5e8431f094d31..c4ecf093ad696 100644
--- a/learning/katas/java/Windowing/Adding Timestamp/WithTimestamps/task-remote-info.yaml
+++ b/learning/katas/java/Windowing/Adding Timestamp/WithTimestamps/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076190
-update_date: Fri, 07 Feb 2020 14:28:17 UTC
+update_date: Tue, 19 May 2020 07:06:05 UTC
diff --git a/learning/katas/java/Windowing/Adding Timestamp/WithTimestamps/task.html b/learning/katas/java/Windowing/Adding Timestamp/WithTimestamps/task.md
similarity index 71%
rename from learning/katas/java/Windowing/Adding Timestamp/WithTimestamps/task.html
rename to learning/katas/java/Windowing/Adding Timestamp/WithTimestamps/task.md
index bd49a7424fb31..1fdede83349f0 100644
--- a/learning/katas/java/Windowing/Adding Timestamp/WithTimestamps/task.html
+++ b/learning/katas/java/Windowing/Adding Timestamp/WithTimestamps/task.md
@@ -16,27 +16,24 @@
~ limitations under the License.
-->
-
-
Adding Timestamp - WithTimestamps
-
- Bounded sources (such as a file from TextIO) do not provide timestamps for elements. If you need
- timestamps, you must add them to your PCollection’s elements.
-
-
- You can assign new timestamps to the elements of a PCollection by applying a ParDo transform that
- outputs new elements with timestamps that you set.
-
-
- Kata: Please assign each element a timestamp based on the the Event.getDate().
-
-
+Adding Timestamp - WithTimestamps
+---------------------------------
+
+Bounded sources (such as a file from TextIO) do not provide timestamps for elements. If you need
+timestamps, you must add them to your PCollection’s elements.
+
+You can assign new timestamps to the elements of a PCollection by applying a ParDo transform that
+outputs new elements with timestamps that you set.
+
+**Kata:** Please assign each element a timestamp based on the the `Event.getDate()`.
+
-
\ No newline at end of file
diff --git a/learning/katas/java/Windowing/Fixed Time Window/Fixed Time Window/task-remote-info.yaml b/learning/katas/java/Windowing/Fixed Time Window/Fixed Time Window/task-remote-info.yaml
index 14e82e16ea427..574b554cfff00 100644
--- a/learning/katas/java/Windowing/Fixed Time Window/Fixed Time Window/task-remote-info.yaml
+++ b/learning/katas/java/Windowing/Fixed Time Window/Fixed Time Window/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076172
-update_date: Fri, 07 Feb 2020 14:08:23 UTC
+update_date: Tue, 19 May 2020 07:02:39 UTC
diff --git a/learning/katas/java/Windowing/Fixed Time Window/Fixed Time Window/task.html b/learning/katas/java/Windowing/Fixed Time Window/Fixed Time Window/task.html
deleted file mode 100644
index 7f010c79b63ed..0000000000000
--- a/learning/katas/java/Windowing/Fixed Time Window/Fixed Time Window/task.html
+++ /dev/null
@@ -1,61 +0,0 @@
-
-
-
-
Fixed Time Window
-
- Windowing subdivides a PCollection according to the timestamps of its individual elements.
- Transforms that aggregate multiple elements, such as GroupByKey and Combine, work implicitly on
- a per-window basis — they process each PCollection as a succession of multiple, finite windows,
- though the entire collection itself may be of unbounded size.
-
-
- In the Beam model, any PCollection (including unbounded PCollections) can be subdivided into
- logical windows. Each element in a PCollection is assigned to one or more windows according to
- the PCollection’s windowing function, and each individual window contains a finite number of
- elements. Grouping transforms then consider each PCollection’s elements on a per-window basis.
- GroupByKey, for example, implicitly groups the elements of a PCollection by key and window.
-
-
- Beam provides several windowing functions, including:
-
-
Fixed Time Windows
-
Sliding Time Windows
-
Per-Session Windows
-
Single Global Window
-
-
-
- The simplest form of windowing is using fixed time windows. A fixed time window represents a
- consistent duration, non overlapping time interval in the data stream.
-
-
- Kata: Please count the number of events that happened based on fixed window with
- 1-day duration.
-
- Refer to the Beam Programming Guide
-
- "Fixed time windows" section for more information.
-
-
\ No newline at end of file
diff --git a/learning/katas/java/Windowing/Fixed Time Window/Fixed Time Window/task.md b/learning/katas/java/Windowing/Fixed Time Window/Fixed Time Window/task.md
new file mode 100644
index 0000000000000..ff4c19c285a5e
--- /dev/null
+++ b/learning/katas/java/Windowing/Fixed Time Window/Fixed Time Window/task.md
@@ -0,0 +1,53 @@
+
+
+Fixed Time Window
+-----------------
+
+Windowing subdivides a PCollection according to the timestamps of its individual elements.
+Transforms that aggregate multiple elements, such as GroupByKey and Combine, work implicitly on a
+per-window basis — they process each PCollection as a succession of multiple, finite windows,
+though the entire collection itself may be of unbounded size.
+
+In the Beam model, any PCollection (including unbounded PCollections) can be subdivided into
+logical windows. Each element in a PCollection is assigned to one or more windows according to the
+PCollection’s windowing function, and each individual window contains a finite number of elements.
+Grouping transforms then consider each PCollection’s elements on a per-window basis. GroupByKey,
+for example, implicitly groups the elements of a PCollection by key and window.
+
+Beam provides several windowing functions, including:
+* Fixed Time Windows
+* Sliding Time Windows
+* Per-Session Windows
+* Single Global Window
+
+The simplest form of windowing is using fixed time windows. A fixed time window represents a
+consistent duration, non overlapping time interval in the data stream.
+
+**Kata:** Please count the number of events that happened based on fixed window with 1-day duration.
+
+
-
diff --git a/learning/katas/python/Common Transforms/Aggregation/Sum/tests.py b/learning/katas/python/Common Transforms/Aggregation/Sum/tests.py
index e761cdf5beab7..30964e7995613 100644
--- a/learning/katas/python/Common Transforms/Aggregation/Sum/tests.py
+++ b/learning/katas/python/Common Transforms/Aggregation/Sum/tests.py
@@ -14,8 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from test_helper import failed, passed, get_file_output, \
- test_is_not_empty, test_answer_placeholders_text_deleted
+from test_helper import failed, passed, get_file_output, test_is_not_empty
def test_output():
@@ -31,5 +30,4 @@ def test_output():
if __name__ == '__main__':
test_is_not_empty()
- test_answer_placeholders_text_deleted()
test_output()
diff --git a/learning/katas/python/Common Transforms/Filter/Filter/task-remote-info.yaml b/learning/katas/python/Common Transforms/Filter/Filter/task-remote-info.yaml
index c13409cde4e1a..b128f6e993e23 100644
--- a/learning/katas/python/Common Transforms/Filter/Filter/task-remote-info.yaml
+++ b/learning/katas/python/Common Transforms/Filter/Filter/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755596
-update_date: Fri, 07 Feb 2020 13:57:21 UTC
+update_date: Tue, 19 May 2020 03:05:30 UTC
diff --git a/learning/katas/python/Common Transforms/Filter/Filter/task.html b/learning/katas/python/Common Transforms/Filter/Filter/task.md
similarity index 72%
rename from learning/katas/python/Common Transforms/Filter/Filter/task.html
rename to learning/katas/python/Common Transforms/Filter/Filter/task.md
index 797f77c32ef90..2092263d6009a 100644
--- a/learning/katas/python/Common Transforms/Filter/Filter/task.html
+++ b/learning/katas/python/Common Transforms/Filter/Filter/task.md
@@ -16,19 +16,15 @@
~ limitations under the License.
-->
-
-
Filter
-
- The Beam SDKs provide language-specific ways to simplify how you provide your DoFn implementation.
-
-
- Kata: Implement a filter function that filters out the odd numbers by using
-
- Filter.
-
-
+Filter
+------
+
+The Beam SDKs provide language-specific ways to simplify how you provide your DoFn implementation.
+
+**Kata:** Implement a filter function that filters out the odd numbers by using
+[Filter](https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.Filter).
+
-
diff --git a/learning/katas/python/Common Transforms/Filter/Filter/tests.py b/learning/katas/python/Common Transforms/Filter/Filter/tests.py
index 03487d2def126..da8cd880051fd 100644
--- a/learning/katas/python/Common Transforms/Filter/Filter/tests.py
+++ b/learning/katas/python/Common Transforms/Filter/Filter/tests.py
@@ -14,19 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from test_helper import failed, passed, \
- get_answer_placeholders, get_file_output, test_is_not_empty, \
- test_answer_placeholders_text_deleted
-
-
-def test_filter():
- placeholders = get_answer_placeholders()
- placeholder = placeholders[0]
-
- if 'beam.Filter' in placeholder:
- passed()
- else:
- failed('Use beam.Filter')
+from test_helper import failed, passed, get_file_output, test_is_not_empty
def test_output():
@@ -42,6 +30,4 @@ def test_output():
if __name__ == '__main__':
test_is_not_empty()
- test_answer_placeholders_text_deleted()
- test_filter()
test_output()
diff --git a/learning/katas/python/Common Transforms/Filter/ParDo/task-remote-info.yaml b/learning/katas/python/Common Transforms/Filter/ParDo/task-remote-info.yaml
index c897e3ac176bb..227501c0853f8 100644
--- a/learning/katas/python/Common Transforms/Filter/ParDo/task-remote-info.yaml
+++ b/learning/katas/python/Common Transforms/Filter/ParDo/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755595
-update_date: Fri, 07 Feb 2020 13:57:19 UTC
+update_date: Tue, 19 May 2020 03:05:27 UTC
diff --git a/learning/katas/python/Common Transforms/Filter/ParDo/task.html b/learning/katas/python/Common Transforms/Filter/ParDo/task.md
similarity index 79%
rename from learning/katas/python/Common Transforms/Filter/ParDo/task.html
rename to learning/katas/python/Common Transforms/Filter/ParDo/task.md
index 1c4ea1be03d5b..2204a8d88b835 100644
--- a/learning/katas/python/Common Transforms/Filter/ParDo/task.html
+++ b/learning/katas/python/Common Transforms/Filter/ParDo/task.md
@@ -16,16 +16,13 @@
~ limitations under the License.
-->
-
-
Filter using ParDo
-
- Kata: Implement a filter function that filters out the even numbers by using
-
- ParDo.
-
-
+Filter using ParDo
+------------------
+
+**Kata:** Implement a filter function that filters out the even numbers by using
+[ParDo](https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.ParDo).
+
Override
process method. You can use "yield" for each intended element.
-
diff --git a/learning/katas/python/Common Transforms/Filter/ParDo/tests.py b/learning/katas/python/Common Transforms/Filter/ParDo/tests.py
index b1d475bd3bdcd..0a2cbba3efe7b 100644
--- a/learning/katas/python/Common Transforms/Filter/ParDo/tests.py
+++ b/learning/katas/python/Common Transforms/Filter/ParDo/tests.py
@@ -14,8 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from test_helper import failed, passed, get_file_output, \
- test_is_not_empty, test_answer_placeholders_text_deleted
+from test_helper import failed, passed, get_file_output, test_is_not_empty
def test_output():
@@ -31,5 +30,4 @@ def test_output():
if __name__ == '__main__':
test_is_not_empty()
- test_answer_placeholders_text_deleted()
test_output()
diff --git a/learning/katas/python/Common Transforms/WithKeys/WithKeys/task-remote-info.yaml b/learning/katas/python/Common Transforms/WithKeys/WithKeys/task-remote-info.yaml
index ceb1f3daea623..686b9b79ba0a8 100644
--- a/learning/katas/python/Common Transforms/WithKeys/WithKeys/task-remote-info.yaml
+++ b/learning/katas/python/Common Transforms/WithKeys/WithKeys/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1124221
-update_date: Mon, 09 Mar 2020 14:34:20 UTC
+update_date: Tue, 19 May 2020 03:05:49 UTC
diff --git a/learning/katas/python/Common Transforms/WithKeys/WithKeys/task.html b/learning/katas/python/Common Transforms/WithKeys/WithKeys/task.md
similarity index 85%
rename from learning/katas/python/Common Transforms/WithKeys/WithKeys/task.html
rename to learning/katas/python/Common Transforms/WithKeys/WithKeys/task.md
index d02b933591678..820e3a4b01a9b 100644
--- a/learning/katas/python/Common Transforms/WithKeys/WithKeys/task.html
+++ b/learning/katas/python/Common Transforms/WithKeys/WithKeys/task.md
@@ -16,15 +16,13 @@
~ limitations under the License.
-->
-
-
WithKeys
-
- Kata: Convert each fruit name into a KV of its first letter and itself, e.g.
- apple => ('a', 'apple')
-
-
+WithKeys
+--------
+
+**Kata:** Convert each fruit name into a KV of its first letter and itself, e.g.
+`apple => ('a', 'apple')`
+
-
diff --git a/learning/katas/python/Common Transforms/WithKeys/WithKeys/tests.py b/learning/katas/python/Common Transforms/WithKeys/WithKeys/tests.py
index a3059f27bfa2f..7b37e800894f5 100644
--- a/learning/katas/python/Common Transforms/WithKeys/WithKeys/tests.py
+++ b/learning/katas/python/Common Transforms/WithKeys/WithKeys/tests.py
@@ -14,19 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from test_helper import failed, passed, \
- get_answer_placeholders, get_file_output, test_is_not_empty, \
- test_answer_placeholders_text_deleted
-
-
-def test_filter():
- placeholders = get_answer_placeholders()
- placeholder = placeholders[0]
-
- if 'beam.WithKeys' in placeholder:
- passed()
- else:
- failed('Use beam.WithKeys')
+from test_helper import failed, passed, get_file_output, test_is_not_empty
def test_output():
@@ -44,6 +32,4 @@ def test_output():
if __name__ == '__main__':
test_is_not_empty()
- test_answer_placeholders_text_deleted()
- test_filter()
test_output()
diff --git a/learning/katas/python/Core Transforms/Branching/Branching/task-remote-info.yaml b/learning/katas/python/Core Transforms/Branching/Branching/task-remote-info.yaml
index 28b3f94853f64..bc28ecce41238 100644
--- a/learning/katas/python/Core Transforms/Branching/Branching/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Branching/Branching/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755592
-update_date: Fri, 07 Feb 2020 13:57:12 UTC
+update_date: Tue, 19 May 2020 03:05:20 UTC
diff --git a/learning/katas/python/Core Transforms/Branching/Branching/task.html b/learning/katas/python/Core Transforms/Branching/Branching/task.md
similarity index 76%
rename from learning/katas/python/Core Transforms/Branching/Branching/task.html
rename to learning/katas/python/Core Transforms/Branching/Branching/task.md
index 12d9645aa03a5..293dec70d4e04 100644
--- a/learning/katas/python/Core Transforms/Branching/Branching/task.html
+++ b/learning/katas/python/Core Transforms/Branching/Branching/task.md
@@ -16,20 +16,16 @@
~ limitations under the License.
-->
-
-
Branching
-
- You can use the same PCollection as input for multiple transforms without consuming the input
- or altering it.
-
-
- Kata: Branch out the numbers to two different transforms: one transform is multiplying
- each number by 5 and the other transform is multiplying each number by 10.
-
-
+Branching
+---------
+
+You can use the same PCollection as input for multiple transforms without consuming the input or
+altering it.
+
+**Kata:** Branch out the numbers to two different transforms: one transform is multiplying each
+number by 5 and the other transform is multiplying each number by 10.
-
diff --git a/learning/katas/python/Core Transforms/Branching/Branching/tests.py b/learning/katas/python/Core Transforms/Branching/Branching/tests.py
index de1fea6ac46a3..6df8cd5fdee77 100644
--- a/learning/katas/python/Core Transforms/Branching/Branching/tests.py
+++ b/learning/katas/python/Core Transforms/Branching/Branching/tests.py
@@ -14,8 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from test_helper import failed, passed, get_file_output, \
- test_is_not_empty, test_answer_placeholders_text_deleted
+from test_helper import failed, passed, get_file_output, test_is_not_empty
def test_output():
@@ -41,5 +40,4 @@ def test_output():
if __name__ == '__main__':
test_is_not_empty()
- test_answer_placeholders_text_deleted()
test_output()
diff --git a/learning/katas/python/Core Transforms/CoGroupByKey/CoGroupByKey/task-remote-info.yaml b/learning/katas/python/Core Transforms/CoGroupByKey/CoGroupByKey/task-remote-info.yaml
index cc40fdd3a88c2..43038c403b788 100644
--- a/learning/katas/python/Core Transforms/CoGroupByKey/CoGroupByKey/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/CoGroupByKey/CoGroupByKey/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755583
-update_date: Fri, 07 Feb 2020 13:56:44 UTC
+update_date: Tue, 19 May 2020 03:04:56 UTC
diff --git a/learning/katas/python/Core Transforms/CoGroupByKey/CoGroupByKey/task.html b/learning/katas/python/Core Transforms/CoGroupByKey/CoGroupByKey/task.md
similarity index 69%
rename from learning/katas/python/Core Transforms/CoGroupByKey/CoGroupByKey/task.html
rename to learning/katas/python/Core Transforms/CoGroupByKey/CoGroupByKey/task.md
index 5c7ecf2a81a9a..dc2da5ffabfd2 100644
--- a/learning/katas/python/Core Transforms/CoGroupByKey/CoGroupByKey/task.html
+++ b/learning/katas/python/Core Transforms/CoGroupByKey/CoGroupByKey/task.md
@@ -16,27 +16,25 @@
~ limitations under the License.
-->
-
-
CoGroupByKey
-
- CoGroupByKey performs a relational join of two or more key/value PCollections that have the same
- key type.
-
-
- Kata: Implement a
-
- CoGroupByKey transform that join words by its first alphabetical letter, and then produces
- the string representation of the WordsAlphabet model.
-
-
+CoGroupByKey
+------------
+
+CoGroupByKey performs a relational join of two or more key/value PCollections that have the same
+key type.
+
+**Kata:** Implement a
+[CoGroupByKey](https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.util.html#apache_beam.transforms.util.CoGroupByKey)
+transform that join words by its first alphabetical letter, and then produces the string
+representation of the WordsAlphabet model.
+
Refer to
- CoGroupByKeyto solve this problem.
+ CoGroupByKey to solve this problem.
+
Refer to the Beam Programming Guide
"CoGroupByKey" section for more information.
-
diff --git a/learning/katas/python/Core Transforms/CoGroupByKey/CoGroupByKey/tests.py b/learning/katas/python/Core Transforms/CoGroupByKey/CoGroupByKey/tests.py
index da12782656665..16e0501187ef4 100644
--- a/learning/katas/python/Core Transforms/CoGroupByKey/CoGroupByKey/tests.py
+++ b/learning/katas/python/Core Transforms/CoGroupByKey/CoGroupByKey/tests.py
@@ -14,8 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from test_helper import failed, passed, get_file_output, \
- test_is_not_empty, test_answer_placeholders_text_deleted
+from test_helper import failed, passed, get_file_output, test_is_not_empty
def test_output():
@@ -35,5 +34,4 @@ def test_output():
if __name__ == '__main__':
test_is_not_empty()
- test_answer_placeholders_text_deleted()
test_output()
diff --git a/learning/katas/python/Core Transforms/Combine/Combine PerKey/task-remote-info.yaml b/learning/katas/python/Core Transforms/Combine/Combine PerKey/task-remote-info.yaml
index eafecc41c2316..c7f007a4f2e12 100644
--- a/learning/katas/python/Core Transforms/Combine/Combine PerKey/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Combine/Combine PerKey/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755587
-update_date: Fri, 07 Feb 2020 13:56:58 UTC
+update_date: Tue, 19 May 2020 03:05:05 UTC
diff --git a/learning/katas/python/Core Transforms/Combine/Combine PerKey/task.html b/learning/katas/python/Core Transforms/Combine/Combine PerKey/task.md
similarity index 67%
rename from learning/katas/python/Core Transforms/Combine/Combine PerKey/task.html
rename to learning/katas/python/Core Transforms/Combine/Combine PerKey/task.md
index 044aae7f9b1a2..c52512b823ab1 100644
--- a/learning/katas/python/Core Transforms/Combine/Combine PerKey/task.html
+++ b/learning/katas/python/Core Transforms/Combine/Combine PerKey/task.md
@@ -16,33 +16,31 @@
~ limitations under the License.
-->
-
-
Combine - Combine PerKey
-
- After creating a keyed PCollection (for example, by using a GroupByKey transform), a common
- pattern is to combine the collection of values associated with each key into a single, merged
- value. This pattern of a GroupByKey followed by merging the collection of values is equivalent to
- Combine PerKey transform. The combine function you supply to Combine PerKey must be an associative
- reduction function or a subclass of CombineFn.
-
-
- Kata: Implement the sum of scores per player using
-
- CombinePerKey.
-
-
+Combine - Combine PerKey
+------------------------
+
+After creating a keyed PCollection (for example, by using a GroupByKey transform), a common pattern
+is to combine the collection of values associated with each key into a single, merged value. This
+pattern of a GroupByKey followed by merging the collection of values is equivalent to Combine PerKey
+transform. The combine function you supply to Combine PerKey must be an associative reduction
+function or a subclass of CombineFn.
+
+**Kata:** Implement the sum of scores per player using
+[CombinePerKey](https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.CombinePerKey).
+
-
diff --git a/learning/katas/python/Core Transforms/Combine/Combine PerKey/tests.py b/learning/katas/python/Core Transforms/Combine/Combine PerKey/tests.py
index e8042837ed227..cd6ab2e6638f6 100644
--- a/learning/katas/python/Core Transforms/Combine/Combine PerKey/tests.py
+++ b/learning/katas/python/Core Transforms/Combine/Combine PerKey/tests.py
@@ -14,19 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from test_helper import failed, passed, \
- get_answer_placeholders, get_file_output, test_is_not_empty, \
- test_answer_placeholders_text_deleted
-
-
-def test_combine_placeholders():
- placeholders = get_answer_placeholders()
- placeholder = placeholders[0]
-
- if 'beam.CombinePerKey' in placeholder:
- passed()
- else:
- failed('Use beam.CombinePerKey')
+from test_helper import failed, passed, get_file_output, test_is_not_empty
def test_output():
@@ -47,6 +35,4 @@ def test_output():
if __name__ == '__main__':
test_is_not_empty()
- test_answer_placeholders_text_deleted()
- test_combine_placeholders()
test_output()
diff --git a/learning/katas/python/Core Transforms/Combine/CombineFn/task-remote-info.yaml b/learning/katas/python/Core Transforms/Combine/CombineFn/task-remote-info.yaml
index 79a29f2a4048b..8330f0553036a 100644
--- a/learning/katas/python/Core Transforms/Combine/CombineFn/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Combine/CombineFn/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755585
-update_date: Fri, 07 Feb 2020 13:56:52 UTC
+update_date: Tue, 19 May 2020 03:06:40 UTC
diff --git a/learning/katas/python/Core Transforms/Combine/CombineFn/task.html b/learning/katas/python/Core Transforms/Combine/CombineFn/task.md
similarity index 51%
rename from learning/katas/python/Core Transforms/Combine/CombineFn/task.html
rename to learning/katas/python/Core Transforms/Combine/CombineFn/task.md
index 4828e0f6a1e9a..b083774a7558e 100644
--- a/learning/katas/python/Core Transforms/Combine/CombineFn/task.html
+++ b/learning/katas/python/Core Transforms/Combine/CombineFn/task.md
@@ -16,37 +16,32 @@
~ limitations under the License.
-->
-
-
Combine - CombineFn
-
- Combine is a Beam transform for combining collections of elements or values in your data.
- When you apply a Combine transform, you must provide the function that contains the logic for
- combining the elements or values. The combining function should be commutative and associative,
- as the function is not necessarily invoked exactly once on all values with a given key. Because
- the input data (including the value collection) may be distributed across multiple workers, the
- combining function might be called multiple times to perform partial combining on subsets of
- the value collection.
-
-
- Complex combination operations might require you to create a subclass of CombineFn that has an
- accumulation type distinct from the input/output type. You should use CombineFn if the combine
- function requires a more sophisticated accumulator, must perform additional pre- or
- post-processing, might change the output type, or takes the key into account.
-
-
+Combine - CombineFn
+-------------------
+
+Combine is a Beam transform for combining collections of elements or values in your data. When you
+apply a Combine transform, you must provide the function that contains the logic for combining the
+elements or values. The combining function should be commutative and associative, as the function
+is not necessarily invoked exactly once on all values with a given key. Because the input data
+(including the value collection) may be distributed across multiple workers, the combining function
+ might be called multiple times to perform partial combining on subsets of the value collection.
+
+Complex combination operations might require you to create a subclass of CombineFn that has an
+accumulation type distinct from the input/output type. You should use CombineFn if the combine
+function requires a more sophisticated accumulator, must perform additional pre- or post-processing,
+might change the output type, or takes the key into account.
+
+**Kata:** Implement the average of numbers using
+[Combine.CombineFn](https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.CombineFn).
+
Extend the
CombineFn class that counts the average of the number.
-
diff --git a/learning/katas/python/Core Transforms/Combine/CombineFn/tests.py b/learning/katas/python/Core Transforms/Combine/CombineFn/tests.py
index 656e5b38abed6..9883983df90f2 100644
--- a/learning/katas/python/Core Transforms/Combine/CombineFn/tests.py
+++ b/learning/katas/python/Core Transforms/Combine/CombineFn/tests.py
@@ -14,19 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from test_helper import failed, passed, \
- get_answer_placeholders, get_file_output, test_is_not_empty, \
- test_answer_placeholders_text_deleted
-
-
-def test_combine_placeholders():
- placeholders = get_answer_placeholders()
- placeholder = placeholders[1]
-
- if 'beam.CombineGlobally' in placeholder:
- passed()
- else:
- failed('Use beam.CombineGlobally')
+from test_helper import failed, passed, get_file_output, test_is_not_empty
def test_output():
@@ -42,6 +30,4 @@ def test_output():
if __name__ == '__main__':
test_is_not_empty()
- test_answer_placeholders_text_deleted()
- test_combine_placeholders()
test_output()
diff --git a/learning/katas/python/Core Transforms/Combine/Simple Function/task-remote-info.yaml b/learning/katas/python/Core Transforms/Combine/Simple Function/task-remote-info.yaml
index 9f6681d15dbf2..d61da2975c584 100644
--- a/learning/katas/python/Core Transforms/Combine/Simple Function/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Combine/Simple Function/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755584
-update_date: Fri, 07 Feb 2020 13:56:48 UTC
+update_date: Tue, 19 May 2020 03:05:00 UTC
diff --git a/learning/katas/python/Core Transforms/Combine/Simple Function/task.html b/learning/katas/python/Core Transforms/Combine/Simple Function/task.md
similarity index 54%
rename from learning/katas/python/Core Transforms/Combine/Simple Function/task.html
rename to learning/katas/python/Core Transforms/Combine/Simple Function/task.md
index 5e4bd02e2157b..47835894fe1cf 100644
--- a/learning/katas/python/Core Transforms/Combine/Simple Function/task.html
+++ b/learning/katas/python/Core Transforms/Combine/Simple Function/task.md
@@ -16,32 +16,27 @@
~ limitations under the License.
-->
-
-
Combine - Simple Function
-
- Combine is a Beam transform for combining collections of elements or values in your data.
- When you apply a Combine transform, you must provide the function that contains the logic for
- combining the elements or values. The combining function should be commutative and associative,
- as the function is not necessarily invoked exactly once on all values with a given key. Because
- the input data (including the value collection) may be distributed across multiple workers, the
- combining function might be called multiple times to perform partial combining on subsets of
- the value collection.
-
-
- Simple combine operations, such as sums, can usually be implemented as a simple function.
-
-
+Combine - Simple Function
+-------------------------
+
+Combine is a Beam transform for combining collections of elements or values in your data. When you
+apply a Combine transform, you must provide the function that contains the logic for combining the
+elements or values. The combining function should be commutative and associative, as the function
+is not necessarily invoked exactly once on all values with a given key. Because the input data
+(including the value collection) may be distributed across multiple workers, the combining function
+ might be called multiple times to perform partial combining on subsets of the value collection.
+
+Simple combine operations, such as sums, can usually be implemented as a simple function.
+
+**Kata:** Implement the summation of numbers using
+[CombineGlobally](https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.CombineGlobally).
+
Implement a simple Python function that performs the summation of the values.
-
diff --git a/learning/katas/python/Core Transforms/Combine/Simple Function/tests.py b/learning/katas/python/Core Transforms/Combine/Simple Function/tests.py
index 2d740d8400e64..a5465af2d106d 100644
--- a/learning/katas/python/Core Transforms/Combine/Simple Function/tests.py
+++ b/learning/katas/python/Core Transforms/Combine/Simple Function/tests.py
@@ -14,19 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from test_helper import failed, passed, \
- get_answer_placeholders, get_file_output, test_is_not_empty, \
- test_answer_placeholders_text_deleted
-
-
-def test_combine_placeholders():
- placeholders = get_answer_placeholders()
- placeholder = placeholders[1]
-
- if 'beam.CombineGlobally' in placeholder:
- passed()
- else:
- failed('Use beam.CombineGlobally')
+from test_helper import failed, passed, get_file_output, test_is_not_empty
def test_output():
@@ -42,6 +30,4 @@ def test_output():
if __name__ == '__main__':
test_is_not_empty()
- test_answer_placeholders_text_deleted()
- test_combine_placeholders()
test_output()
diff --git a/learning/katas/python/Core Transforms/Composite Transform/Composite Transform/task-remote-info.yaml b/learning/katas/python/Core Transforms/Composite Transform/Composite Transform/task-remote-info.yaml
index 85d30168d3c27..c0a5566689c03 100644
--- a/learning/katas/python/Core Transforms/Composite Transform/Composite Transform/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Composite Transform/Composite Transform/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755593
-update_date: Fri, 07 Feb 2020 13:57:15 UTC
+update_date: Tue, 19 May 2020 03:05:23 UTC
diff --git a/learning/katas/python/Core Transforms/Composite Transform/Composite Transform/task.html b/learning/katas/python/Core Transforms/Composite Transform/Composite Transform/task.md
similarity index 51%
rename from learning/katas/python/Core Transforms/Composite Transform/Composite Transform/task.html
rename to learning/katas/python/Core Transforms/Composite Transform/Composite Transform/task.md
index 94c0e44e2ada2..b2d27ba89983e 100644
--- a/learning/katas/python/Core Transforms/Composite Transform/Composite Transform/task.html
+++ b/learning/katas/python/Core Transforms/Composite Transform/Composite Transform/task.md
@@ -16,34 +16,31 @@
~ limitations under the License.
-->
-
-
Composite Transform
-
- Transforms can have a nested structure, where a complex transform performs multiple simpler
- transforms (such as more than one ParDo, Combine, GroupByKey, or even other composite transforms).
- These transforms are called composite transforms. Nesting multiple transforms inside a single
- composite transform can make your code more modular and easier to understand.
-
-
- To create your own composite transform, create a subclass of the PTransform class and override
- the expand method to specify the actual processing logic. You can then use this transform just as
- you would a built-in transform from the Beam SDK. Within your PTransform subclass, you’ll need to
- override the expand method. The expand method is where you add the processing logic for the
- PTransform. Your override of expand must accept the appropriate type of input PCollection as a
- parameter, and specify the output PCollection as the return value.
-
-
- Kata: Please implement a composite transform "ExtractAndMultiplyNumbers" that extracts
- numbers from comma separated line and then multiplies each number by 10.
-
-
+Composite Transform
+-------------------
+
+Transforms can have a nested structure, where a complex transform performs multiple simpler
+transforms (such as more than one ParDo, Combine, GroupByKey, or even other composite transforms).
+These transforms are called composite transforms. Nesting multiple transforms inside a single
+composite transform can make your code more modular and easier to understand.
+
+To create your own composite transform, create a subclass of the PTransform class and override the
+expand method to specify the actual processing logic. You can then use this transform just as you
+would a built-in transform from the Beam SDK. Within your PTransform subclass, you’ll need to
+override the expand method. The expand method is where you add the processing logic for the
+PTransform. Your override of expand must accept the appropriate type of input PCollection as a
+parameter, and specify the output PCollection as the return value.
+
+**Kata:** Please implement a composite transform "ExtractAndMultiplyNumbers" that extracts numbers
+from comma separated line and then multiplies each number by 10.
+
-
diff --git a/learning/katas/python/Core Transforms/Composite Transform/Composite Transform/tests.py b/learning/katas/python/Core Transforms/Composite Transform/Composite Transform/tests.py
index cc7db805461fe..f0fa900f8cbe9 100644
--- a/learning/katas/python/Core Transforms/Composite Transform/Composite Transform/tests.py
+++ b/learning/katas/python/Core Transforms/Composite Transform/Composite Transform/tests.py
@@ -14,19 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from test_helper import failed, passed, \
- get_answer_placeholders, get_file_output, test_is_not_empty, \
- test_answer_placeholders_text_deleted
-
-
-def test_composite_expand_method():
- placeholders = get_answer_placeholders()
- placeholder = placeholders[0]
-
- if 'def expand(' in placeholder:
- passed()
- else:
- failed('Override "expand" method')
+from test_helper import failed, passed, get_file_output, test_is_not_empty
def test_output():
@@ -42,6 +30,4 @@ def test_output():
if __name__ == '__main__':
test_is_not_empty()
- test_answer_placeholders_text_deleted()
- test_composite_expand_method()
test_output()
diff --git a/learning/katas/python/Core Transforms/Flatten/Flatten/task-remote-info.yaml b/learning/katas/python/Core Transforms/Flatten/Flatten/task-remote-info.yaml
index 634212c1544c0..a64890a7e3633 100644
--- a/learning/katas/python/Core Transforms/Flatten/Flatten/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Flatten/Flatten/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755588
-update_date: Fri, 07 Feb 2020 13:57:02 UTC
+update_date: Tue, 19 May 2020 03:05:08 UTC
diff --git a/learning/katas/python/Core Transforms/Flatten/Flatten/task.html b/learning/katas/python/Core Transforms/Flatten/Flatten/task.md
similarity index 72%
rename from learning/katas/python/Core Transforms/Flatten/Flatten/task.html
rename to learning/katas/python/Core Transforms/Flatten/Flatten/task.md
index 488c139b37e65..1d52b8609cd32 100644
--- a/learning/katas/python/Core Transforms/Flatten/Flatten/task.html
+++ b/learning/katas/python/Core Transforms/Flatten/Flatten/task.md
@@ -16,25 +16,23 @@
~ limitations under the License.
-->
-
-
Flatten
-
- Flatten is a Beam transform for PCollection objects that store the same data type.
- Flatten merges multiple PCollection objects into a single logical PCollection.
-
-
- Kata: Implement a
-
- Flatten transform that merges two PCollection of words into a single PCollection.
-
-
+Flatten
+-------
+
+Flatten is a Beam transform for PCollection objects that store the same data type. Flatten merges
+multiple PCollection objects into a single logical PCollection.
+
+**Kata:** Implement a
+[Flatten](https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.Flatten)
+transform that merges two PCollection of words into a single PCollection.
+
Refer to the Beam Programming Guide
"Flatten" section for more information.
-
diff --git a/learning/katas/python/Core Transforms/Flatten/Flatten/tests.py b/learning/katas/python/Core Transforms/Flatten/Flatten/tests.py
index c2caa2ee87db1..db32bc65ee974 100644
--- a/learning/katas/python/Core Transforms/Flatten/Flatten/tests.py
+++ b/learning/katas/python/Core Transforms/Flatten/Flatten/tests.py
@@ -14,19 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from test_helper import failed, passed, \
- get_answer_placeholders, get_file_output, test_is_not_empty, \
- test_answer_placeholders_text_deleted
-
-
-def test_flatten():
- placeholders = get_answer_placeholders()
- placeholder = placeholders[0]
-
- if 'beam.Flatten' in placeholder:
- passed()
- else:
- failed('Use beam.Flatten')
+from test_helper import failed, passed, get_file_output, test_is_not_empty
def test_output():
@@ -42,6 +30,4 @@ def test_output():
if __name__ == '__main__':
test_is_not_empty()
- test_answer_placeholders_text_deleted()
- test_flatten()
test_output()
diff --git a/learning/katas/python/Core Transforms/GroupByKey/GroupByKey/task-remote-info.yaml b/learning/katas/python/Core Transforms/GroupByKey/GroupByKey/task-remote-info.yaml
index 23d7d408b2188..6a23287105cf2 100644
--- a/learning/katas/python/Core Transforms/GroupByKey/GroupByKey/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/GroupByKey/GroupByKey/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755582
-update_date: Fri, 07 Feb 2020 13:56:41 UTC
+update_date: Tue, 19 May 2020 03:04:53 UTC
diff --git a/learning/katas/python/Core Transforms/GroupByKey/GroupByKey/task.html b/learning/katas/python/Core Transforms/GroupByKey/GroupByKey/task.md
similarity index 61%
rename from learning/katas/python/Core Transforms/GroupByKey/GroupByKey/task.html
rename to learning/katas/python/Core Transforms/GroupByKey/GroupByKey/task.md
index 042912afb48f6..fb32244fff6a2 100644
--- a/learning/katas/python/Core Transforms/GroupByKey/GroupByKey/task.html
+++ b/learning/katas/python/Core Transforms/GroupByKey/GroupByKey/task.md
@@ -16,29 +16,27 @@
~ limitations under the License.
-->
-
-
GroupByKey
-
- GroupByKey is a Beam transform for processing collections of key/value pairs. It’s a parallel
- reduction operation, analogous to the Shuffle phase of a Map/Shuffle/Reduce-style algorithm. The
- input to GroupByKey is a collection of key/value pairs that represents a multimap, where the
- collection contains multiple pairs that have the same key, but different values. Given such a
- collection, you use GroupByKey to collect all of the values associated with each unique key.
-
-
- Kata: Implement a
-
- GroupByKey transform that groups words by its first letter.
-
-
+GroupByKey
+----------
+
+GroupByKey is a Beam transform for processing collections of key/value pairs. It’s a parallel
+reduction operation, analogous to the Shuffle phase of a Map/Shuffle/Reduce-style algorithm.
+The input to GroupByKey is a collection of key/value pairs that represents a multimap, where the
+collection contains multiple pairs that have the same key, but different values. Given such a
+collection, you use GroupByKey to collect all of the values associated with each unique key.
+
+**Kata:** Implement a
+[GroupByKey](https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.GroupByKey)
+transform that groups words by its first letter.
+
Refer to the Beam Programming Guide
"GroupByKey" section for more information.
-
diff --git a/learning/katas/python/Core Transforms/GroupByKey/GroupByKey/tests.py b/learning/katas/python/Core Transforms/GroupByKey/GroupByKey/tests.py
index 8f9ffd57f98d0..e16fb6cb48832 100644
--- a/learning/katas/python/Core Transforms/GroupByKey/GroupByKey/tests.py
+++ b/learning/katas/python/Core Transforms/GroupByKey/GroupByKey/tests.py
@@ -14,8 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from test_helper import failed, passed, get_file_output, \
- test_is_not_empty, test_answer_placeholders_text_deleted
+from test_helper import failed, passed, get_file_output, test_is_not_empty
def test_output():
@@ -33,5 +32,4 @@ def test_output():
if __name__ == '__main__':
test_is_not_empty()
- test_answer_placeholders_text_deleted()
test_output()
diff --git a/learning/katas/python/Core Transforms/Map/FlatMap/task-remote-info.yaml b/learning/katas/python/Core Transforms/Map/FlatMap/task-remote-info.yaml
index 4911596641ba3..f98961e93e7b5 100644
--- a/learning/katas/python/Core Transforms/Map/FlatMap/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Map/FlatMap/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755580
-update_date: Fri, 07 Feb 2020 13:56:38 UTC
+update_date: Tue, 19 May 2020 03:04:50 UTC
diff --git a/learning/katas/python/Core Transforms/Map/FlatMap/task.html b/learning/katas/python/Core Transforms/Map/FlatMap/task.md
similarity index 70%
rename from learning/katas/python/Core Transforms/Map/FlatMap/task.html
rename to learning/katas/python/Core Transforms/Map/FlatMap/task.md
index f69fffd1f3097..7c6aadec400a6 100644
--- a/learning/katas/python/Core Transforms/Map/FlatMap/task.html
+++ b/learning/katas/python/Core Transforms/Map/FlatMap/task.md
@@ -16,28 +16,24 @@
~ limitations under the License.
-->
-
-
FlatMapElements
-
- The Beam SDKs provide language-specific ways to simplify how you provide your DoFn implementation.
-
-
- FlatMap can be used to simplify DoFn that maps an element to multiple elements (one to many).
-
-
- Kata: Implement a function that maps each input sentence into words tokenized by whitespace
- (" ") using
-
- FlatMap.
-
-
+FlatMapElements
+---------------
+
+The Beam SDKs provide language-specific ways to simplify how you provide your DoFn implementation.
+
+FlatMap can be used to simplify DoFn that maps an element to multiple elements (one to many).
+
+**Kata:** Implement a function that maps each input sentence into words tokenized by
+whitespace (" ") using
+[FlatMap](https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.FlatMap).
+
-
diff --git a/learning/katas/python/Core Transforms/Map/FlatMap/tests.py b/learning/katas/python/Core Transforms/Map/FlatMap/tests.py
index 6eaaa643e9388..e166eae902a1b 100644
--- a/learning/katas/python/Core Transforms/Map/FlatMap/tests.py
+++ b/learning/katas/python/Core Transforms/Map/FlatMap/tests.py
@@ -14,19 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from test_helper import failed, passed, \
- get_answer_placeholders, get_file_output, test_is_not_empty, \
- test_answer_placeholders_text_deleted
-
-
-def test_flatmap():
- placeholders = get_answer_placeholders()
- placeholder = placeholders[0]
-
- if 'beam.FlatMap' in placeholder:
- passed()
- else:
- failed('Use beam.FlatMap')
+from test_helper import failed, passed, get_file_output, test_is_not_empty
def test_output():
@@ -42,6 +30,4 @@ def test_output():
if __name__ == '__main__':
test_is_not_empty()
- test_answer_placeholders_text_deleted()
- test_flatmap()
test_output()
diff --git a/learning/katas/python/Core Transforms/Map/Map/task-remote-info.yaml b/learning/katas/python/Core Transforms/Map/Map/task-remote-info.yaml
index a9505b797650c..66446ef58a854 100644
--- a/learning/katas/python/Core Transforms/Map/Map/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Map/Map/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755579
-update_date: Fri, 07 Feb 2020 13:56:35 UTC
+update_date: Tue, 19 May 2020 03:04:48 UTC
diff --git a/learning/katas/python/Core Transforms/Map/Map/task.html b/learning/katas/python/Core Transforms/Map/Map/task.md
similarity index 76%
rename from learning/katas/python/Core Transforms/Map/Map/task.html
rename to learning/katas/python/Core Transforms/Map/Map/task.md
index fee1a4bc1d060..46694d948e22f 100644
--- a/learning/katas/python/Core Transforms/Map/Map/task.html
+++ b/learning/katas/python/Core Transforms/Map/Map/task.md
@@ -16,24 +16,21 @@
~ limitations under the License.
-->
-
-
MapElements
-
- The Beam SDKs provide language-specific ways to simplify how you provide your DoFn implementation.
-
-
- Kata: Implement a simple map function that multiplies all input elements by 5 using
-
- Map.
-
-
+MapElements
+-----------
+
+The Beam SDKs provide language-specific ways to simplify how you provide your DoFn implementation.
+
+**Kata:** Implement a simple map function that multiplies all input elements by 5 using
+[Map](https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.Map).
+
Refer to the Beam Programming Guide
"ParDo" section for
more information.
-
diff --git a/learning/katas/python/Core Transforms/Map/ParDo OneToMany/tests.py b/learning/katas/python/Core Transforms/Map/ParDo OneToMany/tests.py
index b934821e35660..c83a7dd7b0cb2 100644
--- a/learning/katas/python/Core Transforms/Map/ParDo OneToMany/tests.py
+++ b/learning/katas/python/Core Transforms/Map/ParDo OneToMany/tests.py
@@ -14,29 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from test_helper import failed, passed, \
- get_answer_placeholders, get_file_output, test_is_not_empty, \
- test_answer_placeholders_text_deleted
-
-
-def test_dofn_process_method():
- placeholders = get_answer_placeholders()
- placeholder = placeholders[0]
-
- if 'def process(self, element' in placeholder:
- passed()
- else:
- failed('Override "process" method')
-
-
-def test_pardo():
- placeholders = get_answer_placeholders()
- placeholder = placeholders[1]
-
- if 'beam.ParDo(BreakIntoWordsDoFn())' in placeholder:
- passed()
- else:
- failed('Use beam.ParDo')
+from test_helper import failed, passed, get_file_output, test_is_not_empty
def test_output():
@@ -52,7 +30,4 @@ def test_output():
if __name__ == '__main__':
test_is_not_empty()
- test_answer_placeholders_text_deleted()
- test_dofn_process_method()
- test_pardo()
test_output()
diff --git a/learning/katas/python/Core Transforms/Map/ParDo/task-remote-info.yaml b/learning/katas/python/Core Transforms/Map/ParDo/task-remote-info.yaml
index 8a43bcd9ebc69..97b55d7821e72 100644
--- a/learning/katas/python/Core Transforms/Map/ParDo/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Map/ParDo/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755577
-update_date: Fri, 07 Feb 2020 13:56:29 UTC
+update_date: Tue, 19 May 2020 03:04:42 UTC
diff --git a/learning/katas/python/Core Transforms/Map/ParDo/task.html b/learning/katas/python/Core Transforms/Map/ParDo/task.md
similarity index 74%
rename from learning/katas/python/Core Transforms/Map/ParDo/task.html
rename to learning/katas/python/Core Transforms/Map/ParDo/task.md
index e6eab7bbd5fba..bbb52a54f52b9 100644
--- a/learning/katas/python/Core Transforms/Map/ParDo/task.html
+++ b/learning/katas/python/Core Transforms/Map/ParDo/task.md
@@ -16,30 +16,29 @@
~ limitations under the License.
-->
-
-
ParDo
-
- ParDo is a Beam transform for generic parallel processing. The ParDo processing paradigm is
- similar to the “Map” phase of a Map/Shuffle/Reduce-style algorithm: a ParDo transform considers
- each element in the input PCollection, performs some processing function (your user code) on
- that element, and emits zero, one, or multiple elements to an output PCollection.
-
-
- Kata: Please write a simple ParDo that maps the input element by multiplying it by 10.
-
-
+ParDo
+-----
+
+ParDo is a Beam transform for generic parallel processing. The ParDo processing paradigm is similar
+to the “Map” phase of a Map/Shuffle/Reduce-style algorithm: a ParDo transform considers each element
+in the input PCollection, performs some processing function (your user code) on that element, and
+emits zero, one, or multiple elements to an output PCollection.
+
+**Kata:** Please write a simple ParDo that maps the input element by multiplying it by 10.
+
Refer to the Beam Programming Guide
"ParDo" section for
more information.
-
diff --git a/learning/katas/python/Core Transforms/Map/ParDo/tests.py b/learning/katas/python/Core Transforms/Map/ParDo/tests.py
index 55913188a937b..a274e68cd3534 100644
--- a/learning/katas/python/Core Transforms/Map/ParDo/tests.py
+++ b/learning/katas/python/Core Transforms/Map/ParDo/tests.py
@@ -14,29 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from test_helper import failed, passed, \
- get_answer_placeholders, get_file_output, test_is_not_empty, \
- test_answer_placeholders_text_deleted
-
-
-def test_dofn_process_method():
- placeholders = get_answer_placeholders()
- placeholder = placeholders[0]
-
- if 'def process(self, element' in placeholder:
- passed()
- else:
- failed('Override "process" method')
-
-
-def test_pardo():
- placeholders = get_answer_placeholders()
- placeholder = placeholders[1]
-
- if 'beam.ParDo(MultiplyByTenDoFn())' in placeholder:
- passed()
- else:
- failed('Use beam.ParDo')
+from test_helper import failed, passed, get_file_output, test_is_not_empty
def test_output():
@@ -52,7 +30,4 @@ def test_output():
if __name__ == '__main__':
test_is_not_empty()
- test_answer_placeholders_text_deleted()
- test_dofn_process_method()
- test_pardo()
test_output()
diff --git a/learning/katas/python/Core Transforms/Partition/Partition/task-remote-info.yaml b/learning/katas/python/Core Transforms/Partition/Partition/task-remote-info.yaml
index 948872272133b..3a551d6f8fa12 100644
--- a/learning/katas/python/Core Transforms/Partition/Partition/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Partition/Partition/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755589
-update_date: Fri, 07 Feb 2020 13:59:37 UTC
+update_date: Tue, 19 May 2020 03:05:12 UTC
diff --git a/learning/katas/python/Core Transforms/Partition/Partition/task.html b/learning/katas/python/Core Transforms/Partition/Partition/task.md
similarity index 58%
rename from learning/katas/python/Core Transforms/Partition/Partition/task.html
rename to learning/katas/python/Core Transforms/Partition/Partition/task.md
index 513fd3a82267b..62a244d4752dc 100644
--- a/learning/katas/python/Core Transforms/Partition/Partition/task.html
+++ b/learning/katas/python/Core Transforms/Partition/Partition/task.md
@@ -16,32 +16,28 @@
~ limitations under the License.
-->
-
-
Partition
-
- Partition is a Beam transform for PCollection objects that store the same data type.
- Partition splits a single PCollection into a fixed number of smaller collections.
-
-
- Partition divides the elements of a PCollection according to a partitioning function
- that you provide. The partitioning function contains the logic that determines how to split up
- the elements of the input PCollection into each resulting partition PCollection.
-
-
- Kata: Implement a
-
- Partition transform that splits a PCollection of numbers into two PCollections.
- The first PCollection contains numbers greater than 100, and the second PCollection contains
- the remaining numbers.
-
-
+Partition
+---------
+
+Partition is a Beam transform for PCollection objects that store the same data type. Partition
+splits a single PCollection into a fixed number of smaller collections.
+
+Partition divides the elements of a PCollection according to a partitioning function that you
+provide. The partitioning function contains the logic that determines how to split up the elements
+of the input PCollection into each resulting partition PCollection.
+
+**Kata:** Implement a
+[Partition](https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.Partition)
+transform that splits a PCollection of numbers into two PCollections. The first PCollection
+contains numbers greater than 100, and the second PCollection contains the remaining numbers.
+
Refer to the Beam Programming Guide
"Partition" section for more information.
-
diff --git a/learning/katas/python/Core Transforms/Partition/Partition/tests.py b/learning/katas/python/Core Transforms/Partition/Partition/tests.py
index bbeeaf7c196e8..d8285aede76f6 100644
--- a/learning/katas/python/Core Transforms/Partition/Partition/tests.py
+++ b/learning/katas/python/Core Transforms/Partition/Partition/tests.py
@@ -14,19 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from test_helper import failed, passed, \
- get_answer_placeholders, get_file_output, test_is_not_empty, \
- test_answer_placeholders_text_deleted
-
-
-def test_partition():
- placeholders = get_answer_placeholders()
- placeholder = placeholders[1]
-
- if 'beam.Partition' in placeholder:
- passed()
- else:
- failed('Use beam.Partition')
+from test_helper import failed, passed, get_file_output, test_is_not_empty
def test_output():
@@ -51,6 +39,4 @@ def test_output():
if __name__ == '__main__':
test_is_not_empty()
- test_answer_placeholders_text_deleted()
- test_partition()
test_output()
diff --git a/learning/katas/python/Core Transforms/Side Input/Side Input/task-remote-info.yaml b/learning/katas/python/Core Transforms/Side Input/Side Input/task-remote-info.yaml
index 338c410d5074f..4957ecec404c1 100644
--- a/learning/katas/python/Core Transforms/Side Input/Side Input/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Side Input/Side Input/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755590
-update_date: Fri, 07 Feb 2020 13:57:06 UTC
+update_date: Tue, 19 May 2020 03:05:14 UTC
diff --git a/learning/katas/python/Core Transforms/Side Input/Side Input/task.html b/learning/katas/python/Core Transforms/Side Input/Side Input/task.md
similarity index 64%
rename from learning/katas/python/Core Transforms/Side Input/Side Input/task.html
rename to learning/katas/python/Core Transforms/Side Input/Side Input/task.md
index b9136274c102f..5d67a0beae287 100644
--- a/learning/katas/python/Core Transforms/Side Input/Side Input/task.html
+++ b/learning/katas/python/Core Transforms/Side Input/Side Input/task.md
@@ -16,38 +16,36 @@
~ limitations under the License.
-->
-
-
Side Input
-
- In addition to the main input PCollection, you can provide additional inputs to a ParDo transform
- in the form of side inputs. A side input is an additional input that your DoFn can access each
- time it processes an element in the input PCollection. When you specify a side input, you create
- a view of some other data that can be read from within the ParDo transform’s DoFn while
- processing each element.
-
-
- Side inputs are useful if your ParDo needs to inject additional data when processing each element
- in the input PCollection, but the additional data needs to be determined at runtime (and not
- hard-coded). Such values might be determined by the input data, or depend on a different branch
- of your pipeline.
-
-
- Kata: Please enrich each Person with the country based on the city he/she lives in.
-
-
+Side Input
+----------
+
+In addition to the main input PCollection, you can provide additional inputs to a ParDo transform
+in the form of side inputs. A side input is an additional input that your DoFn can access each time
+it processes an element in the input PCollection. When you specify a side input, you create a view
+of some other data that can be read from within the ParDo transform’s DoFn while processing each
+element.
+
+Side inputs are useful if your ParDo needs to inject additional data when processing each element
+in the input PCollection, but the additional data needs to be determined at runtime (and not
+hard-coded). Such values might be determined by the input data, or depend on a different branch of
+your pipeline.
+
+**Kata:** Please enrich each Person with the country based on the city he/she lives in.
+
Override
process method that also accepts side input argument.
Refer to the Beam Programming Guide
"Side inputs"
section for more information.
-
diff --git a/learning/katas/python/Core Transforms/Side Input/Side Input/tests.py b/learning/katas/python/Core Transforms/Side Input/Side Input/tests.py
index 8fdd7da55e207..6171323e0bbb7 100644
--- a/learning/katas/python/Core Transforms/Side Input/Side Input/tests.py
+++ b/learning/katas/python/Core Transforms/Side Input/Side Input/tests.py
@@ -22,29 +22,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from test_helper import failed, passed, \
- get_answer_placeholders, get_file_output, test_is_not_empty, \
- test_answer_placeholders_text_deleted
-
-
-def test_dofn_process_method():
- placeholders = get_answer_placeholders()
- placeholder = placeholders[0]
-
- if 'def process(self, element' in placeholder:
- passed()
- else:
- failed('Override "process" method')
-
-
-def test_pardo():
- placeholders = get_answer_placeholders()
- placeholder = placeholders[1]
-
- if 'beam.ParDo(EnrichCountryDoFn(),' in placeholder:
- passed()
- else:
- failed('Use beam.ParDo that accepts side input')
+from test_helper import failed, passed, get_file_output, test_is_not_empty
def test_output():
@@ -66,7 +44,4 @@ def test_output():
if __name__ == '__main__':
test_is_not_empty()
- test_answer_placeholders_text_deleted()
- test_dofn_process_method()
- test_pardo()
test_output()
diff --git a/learning/katas/python/Core Transforms/Side Output/Side Output/task-remote-info.yaml b/learning/katas/python/Core Transforms/Side Output/Side Output/task-remote-info.yaml
index 74de155ae7cc6..158110e2e711f 100644
--- a/learning/katas/python/Core Transforms/Side Output/Side Output/task-remote-info.yaml
+++ b/learning/katas/python/Core Transforms/Side Output/Side Output/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755591
-update_date: Fri, 07 Feb 2020 13:57:09 UTC
+update_date: Tue, 19 May 2020 03:05:17 UTC
diff --git a/learning/katas/python/Core Transforms/Side Output/Side Output/task.html b/learning/katas/python/Core Transforms/Side Output/Side Output/task.md
similarity index 77%
rename from learning/katas/python/Core Transforms/Side Output/Side Output/task.html
rename to learning/katas/python/Core Transforms/Side Output/Side Output/task.md
index b6e05431ea10b..5f9ee5d781f8d 100644
--- a/learning/katas/python/Core Transforms/Side Output/Side Output/task.html
+++ b/learning/katas/python/Core Transforms/Side Output/Side Output/task.md
@@ -16,18 +16,16 @@
~ limitations under the License.
-->
-
-
Side Output
-
- While ParDo always produces a main output PCollection (as the return value from apply), you can
- also have your ParDo produce any number of additional output PCollections. If you choose to have
- multiple outputs, your ParDo returns all of the output PCollections (including the main output)
- bundled together.
-
-
- Kata: Implement additional output to your ParDo for numbers bigger than 100.
-
-
+Side Output
+-----------
+
+While ParDo always produces a main output PCollection (as the return value from apply), you can
+also have your ParDo produce any number of additional output PCollections. If you choose to have
+multiple outputs, your ParDo returns all of the output PCollections (including the main output)
+bundled together.
+
+**Kata:** Implement additional output to your ParDo for numbers bigger than 100.
+
Refer to the Beam Programming Guide
"Additional outputs" section for more information.
-
diff --git a/learning/katas/python/Core Transforms/Side Output/Side Output/tests.py b/learning/katas/python/Core Transforms/Side Output/Side Output/tests.py
index 1af84398a91fe..89b299b40744e 100644
--- a/learning/katas/python/Core Transforms/Side Output/Side Output/tests.py
+++ b/learning/katas/python/Core Transforms/Side Output/Side Output/tests.py
@@ -14,29 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from test_helper import failed, passed, \
- get_answer_placeholders, get_file_output, test_is_not_empty, \
- test_answer_placeholders_text_deleted
-
-
-def test_dofn_process_method():
- placeholders = get_answer_placeholders()
- placeholder = placeholders[0]
-
- if 'pvalue.TaggedOutput' in placeholder:
- passed()
- else:
- failed('Use pvalue.TaggedOutput')
-
-
-def test_pardo():
- placeholders = get_answer_placeholders()
- placeholder = placeholders[1]
-
- if all(['beam.ParDo(ProcessNumbersDoFn())', '.with_outputs,']) in placeholder:
- passed()
- else:
- failed('Use beam.ParDo that outputs multiple outputs')
+from test_helper import failed, passed, get_file_output, test_is_not_empty
def test_output():
@@ -61,7 +39,4 @@ def test_output():
if __name__ == '__main__':
test_is_not_empty()
- test_answer_placeholders_text_deleted()
- test_dofn_process_method()
- test_pardo()
test_output()
diff --git a/learning/katas/python/Examples/Word Count/Word Count/task-remote-info.yaml b/learning/katas/python/Examples/Word Count/Word Count/task-remote-info.yaml
index b1f7f2c9d8a9d..eec4604c1c247 100644
--- a/learning/katas/python/Examples/Word Count/Word Count/task-remote-info.yaml
+++ b/learning/katas/python/Examples/Word Count/Word Count/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755604
-update_date: Fri, 07 Feb 2020 13:57:43 UTC
+update_date: Tue, 19 May 2020 03:06:04 UTC
diff --git a/learning/katas/python/Examples/Word Count/Word Count/task.html b/learning/katas/python/Examples/Word Count/Word Count/task.md
similarity index 84%
rename from learning/katas/python/Examples/Word Count/Word Count/task.html
rename to learning/katas/python/Examples/Word Count/Word Count/task.md
index 82ce81cbf8f35..b1bb44c2ced90 100644
--- a/learning/katas/python/Examples/Word Count/Word Count/task.html
+++ b/learning/katas/python/Examples/Word Count/Word Count/task.md
@@ -16,25 +16,23 @@
~ limitations under the License.
-->
-
-
Word Count Pipeline
-
- Kata: Create a pipeline that counts the number of words.
-
-
- Please output the count of each word in the following format:
-
-
+Word Count Pipeline
+-------------------
+
+**Kata:** Create a pipeline that counts the number of words.
+
+Please output the count of each word in the following format:
+```text
word:count
ball:5
book:3
-
-
+```
+
Refer to your katas above.
+
Use
MapTuple to unpack key-value pair into different function arguments.
-
diff --git a/learning/katas/python/Examples/Word Count/Word Count/tests.py b/learning/katas/python/Examples/Word Count/Word Count/tests.py
index 16b7bf511c0ae..50a2bdcc3e614 100644
--- a/learning/katas/python/Examples/Word Count/Word Count/tests.py
+++ b/learning/katas/python/Examples/Word Count/Word Count/tests.py
@@ -14,8 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from test_helper import failed, passed, get_file_output, \
- test_is_not_empty, test_answer_placeholders_text_deleted
+from test_helper import failed, passed, get_file_output, test_is_not_empty
def test_output():
@@ -37,5 +36,4 @@ def test_output():
if __name__ == '__main__':
test_is_not_empty()
- test_answer_placeholders_text_deleted()
test_output()
diff --git a/learning/katas/python/IO/Built-in IOs/Built-in IOs/task-remote-info.yaml b/learning/katas/python/IO/Built-in IOs/Built-in IOs/task-remote-info.yaml
index 3dfb10425346d..a5130d4e01378 100644
--- a/learning/katas/python/IO/Built-in IOs/Built-in IOs/task-remote-info.yaml
+++ b/learning/katas/python/IO/Built-in IOs/Built-in IOs/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1076138
-update_date: Fri, 07 Feb 2020 13:56:24 UTC
+update_date: Tue, 19 May 2020 03:05:56 UTC
diff --git a/learning/katas/java/IO/Built-in IOs/Built-in IOs/task.html b/learning/katas/python/IO/Built-in IOs/Built-in IOs/task.md
similarity index 63%
rename from learning/katas/java/IO/Built-in IOs/Built-in IOs/task.html
rename to learning/katas/python/IO/Built-in IOs/Built-in IOs/task.md
index 447dfa3141cff..df4df796ebce2 100644
--- a/learning/katas/java/IO/Built-in IOs/Built-in IOs/task.html
+++ b/learning/katas/python/IO/Built-in IOs/Built-in IOs/task.md
@@ -16,18 +16,13 @@
~ limitations under the License.
-->
-
-
Built-in I/Os
-
- Beam SDKs provide many out of the box I/O transforms that can be used to read from many
- different sources and write to many different sinks.
-
- Note: There is no kata for this task. Please click the "Check" button and
- proceed to the next task.
-
-
\ No newline at end of file
+Built-in I/Os
+-------------
+
+Beam SDKs provide many out of the box I/O transforms that can be used to read from many different
+sources and write to many different sinks.
+
+See the [Beam-provided I/O Transforms](https://beam.apache.org/documentation/io/built-in/) page
+for a list of the currently available I/O transforms.
+
+**Note:** There is no kata for this task. Please proceed to the next task.
diff --git a/learning/katas/python/IO/TextIO/ReadFromText/task-remote-info.yaml b/learning/katas/python/IO/TextIO/ReadFromText/task-remote-info.yaml
index a6a6ee6702f68..9afea636a85e9 100644
--- a/learning/katas/python/IO/TextIO/ReadFromText/task-remote-info.yaml
+++ b/learning/katas/python/IO/TextIO/ReadFromText/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755602
-update_date: Fri, 07 Feb 2020 13:57:40 UTC
+update_date: Tue, 19 May 2020 03:05:52 UTC
diff --git a/learning/katas/python/IO/TextIO/ReadFromText/task.html b/learning/katas/python/IO/TextIO/ReadFromText/task.md
similarity index 60%
rename from learning/katas/python/IO/TextIO/ReadFromText/task.html
rename to learning/katas/python/IO/TextIO/ReadFromText/task.md
index c4fc0bde6967a..4f3a7cfc5842f 100644
--- a/learning/katas/python/IO/TextIO/ReadFromText/task.html
+++ b/learning/katas/python/IO/TextIO/ReadFromText/task.md
@@ -16,30 +16,27 @@
~ limitations under the License.
-->
-
-
ReadFromText
-
- When you create a pipeline, you often need to read data from some external source, such as a file
- or a database. Likewise, you may want your pipeline to output its result data to an external
- storage system. Beam provides read and write transforms for a number of common data storage types.
- If you want your pipeline to read from or write to a data storage format that isn’t supported by
- the built-in transforms, you can implement your own read and write transforms.
-
-
- To read a PCollection from one or more text files, use beam.io.ReadFromText to instantiate a
- transform and specify the path of the file(s) to be read.
-
-
- Kata: Read the 'countries.txt' file and convert each country name into uppercase.
-
-
+ReadFromText
+------------
+
+When you create a pipeline, you often need to read data from some external source, such as a file
+or a database. Likewise, you may want your pipeline to output its result data to an external
+storage system. Beam provides read and write transforms for a number of common data storage types.
+If you want your pipeline to read from or write to a data storage format that isn’t supported by
+the built-in transforms, you can implement your own read and write transforms.
+
+To read a PCollection from one or more text files, use beam.io.ReadFromText to instantiate a
+transform and specify the path of the file(s) to be read.
+
+**Kata:** Read the 'countries.txt' file and convert each country name into uppercase.
+
Refer to the Beam Programming Guide
"Reading input data" section for more information.
-
diff --git a/learning/katas/python/IO/TextIO/ReadFromText/tests.py b/learning/katas/python/IO/TextIO/ReadFromText/tests.py
index 273aada623974..5a29e43d8797f 100644
--- a/learning/katas/python/IO/TextIO/ReadFromText/tests.py
+++ b/learning/katas/python/IO/TextIO/ReadFromText/tests.py
@@ -14,19 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from test_helper import failed, passed, \
- get_answer_placeholders, get_file_output, test_is_not_empty, \
- test_answer_placeholders_text_deleted
-
-
-def test_readfromtext_method():
- placeholders = get_answer_placeholders()
- placeholder = placeholders[0]
-
- if 'ReadFromText(' in placeholder:
- passed()
- else:
- failed('Use beam.io.ReadFromText')
+from test_helper import failed, passed, get_file_output, test_is_not_empty
def test_output():
@@ -53,6 +41,4 @@ def test_output():
if __name__ == '__main__':
test_is_not_empty()
- test_answer_placeholders_text_deleted()
- test_readfromtext_method()
test_output()
diff --git a/learning/katas/python/Introduction/Hello Beam/Hello Beam/task-remote-info.yaml b/learning/katas/python/Introduction/Hello Beam/Hello Beam/task-remote-info.yaml
index 800507e89f534..d4953da14b5a6 100644
--- a/learning/katas/python/Introduction/Hello Beam/Hello Beam/task-remote-info.yaml
+++ b/learning/katas/python/Introduction/Hello Beam/Hello Beam/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 755575
-update_date: Fri, 07 Feb 2020 13:56:26 UTC
+update_date: Tue, 19 May 2020 03:04:39 UTC
diff --git a/learning/katas/python/Introduction/Hello Beam/Hello Beam/task.html b/learning/katas/python/Introduction/Hello Beam/Hello Beam/task.md
similarity index 51%
rename from learning/katas/python/Introduction/Hello Beam/Hello Beam/task.html
rename to learning/katas/python/Introduction/Hello Beam/Hello Beam/task.md
index e71982d045f18..b6df12c28d862 100644
--- a/learning/katas/python/Introduction/Hello Beam/Hello Beam/task.html
+++ b/learning/katas/python/Introduction/Hello Beam/Hello Beam/task.md
@@ -16,38 +16,34 @@
~ limitations under the License.
-->
-
-
Hello Beam Pipeline
-
- Apache Beam is an open source, unified model for defining both batch and streaming data-parallel
- processing pipelines. Using one of the open source Beam SDKs, you build a program that defines the
- pipeline. The pipeline is then executed by one of Beam’s supported distributed processing
- back-ends, which include Apache Apex, Apache Flink, Apache Spark, and Google Cloud Dataflow.
-
-
- Beam is particularly useful for Embarrassingly Parallel data processing tasks, in which the
- problem can be decomposed into many smaller bundles of data that can be processed independently
- and in parallel. You can also use Beam for Extract, Transform, and Load (ETL) tasks and pure data
- integration. These tasks are useful for moving data between different storage media and data
- sources, transforming data into a more desirable format, or loading data onto a new system.
-
- Kata: Your first kata is to create a simple pipeline that takes a hardcoded input element
- "Hello Beam".
-
-
+Welcome To Apache Beam
+----------------------
+
+Apache Beam is an open source, unified model for defining both batch and streaming data-parallel
+processing pipelines. Using one of the open source Beam SDKs, you build a program that defines the
+pipeline. The pipeline is then executed by one of Beam’s supported distributed processing back-ends,
+which include Apache Apex, Apache Flink, Apache Spark, and Google Cloud Dataflow.
+
+Beam is particularly useful for Embarrassingly Parallel data processing tasks, in which the problem
+can be decomposed into many smaller bundles of data that can be processed independently and in
+parallel. You can also use Beam for Extract, Transform, and Load (ETL) tasks and pure data
+integration. These tasks are useful for moving data between different storage media and data
+sources, transforming data into a more desirable format, or loading data onto a new system.
+
+To learn more about Apache Beam, refer to
+[Apache Beam Overview](https://beam.apache.org/get-started/beam-overview/).
+
+**Kata:** Your first kata is to create a simple pipeline that takes a hardcoded input element
+"Hello Beam".
+
-
diff --git a/learning/katas/python/Introduction/Hello Beam/Hello Beam/tests.py b/learning/katas/python/Introduction/Hello Beam/Hello Beam/tests.py
index 33d45a642c03a..d0e9098678581 100644
--- a/learning/katas/python/Introduction/Hello Beam/Hello Beam/tests.py
+++ b/learning/katas/python/Introduction/Hello Beam/Hello Beam/tests.py
@@ -14,18 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from test_helper import failed, passed, \
- get_answer_placeholders, get_file_output, test_is_not_empty, \
- test_answer_placeholders_text_deleted
-
-
-def test_answer_placeholders():
- placeholders = get_answer_placeholders()
- placeholder = placeholders[0]
- if 'beam.Create' in placeholder:
- passed()
- else:
- failed('Use beam.Create')
+from test_helper import failed, passed, get_file_output, test_is_not_empty
def test_output():
@@ -39,6 +28,4 @@ def test_output():
if __name__ == '__main__':
test_is_not_empty()
- test_answer_placeholders_text_deleted()
- test_answer_placeholders()
test_output()
diff --git a/learning/katas/python/Windowing/Adding Timestamp/ParDo/task-info.yaml b/learning/katas/python/Windowing/Adding Timestamp/ParDo/task-info.yaml
index be661cb1758a4..bbdc8d0177958 100644
--- a/learning/katas/python/Windowing/Adding Timestamp/ParDo/task-info.yaml
+++ b/learning/katas/python/Windowing/Adding Timestamp/ParDo/task-info.yaml
@@ -22,10 +22,10 @@ files:
- name: task.py
visible: true
placeholders:
- - offset: 1211
- length: 163
+ - offset: 1231
+ length: 155
placeholder_text: TODO()
- - offset: 1740
+ - offset: 1917
length: 30
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/Windowing/Adding Timestamp/ParDo/task-remote-info.yaml b/learning/katas/python/Windowing/Adding Timestamp/ParDo/task-remote-info.yaml
index d65ccb3f218e1..3eafb58a76137 100644
--- a/learning/katas/python/Windowing/Adding Timestamp/ParDo/task-remote-info.yaml
+++ b/learning/katas/python/Windowing/Adding Timestamp/ParDo/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1124219
-update_date: Mon, 09 Mar 2020 14:33:58 UTC
+update_date: Tue, 19 May 2020 03:06:43 UTC
diff --git a/learning/katas/python/Windowing/Adding Timestamp/ParDo/task.html b/learning/katas/python/Windowing/Adding Timestamp/ParDo/task.md
similarity index 76%
rename from learning/katas/python/Windowing/Adding Timestamp/ParDo/task.html
rename to learning/katas/python/Windowing/Adding Timestamp/ParDo/task.md
index 2e93a0c9bb215..f664a7b2306bc 100644
--- a/learning/katas/python/Windowing/Adding Timestamp/ParDo/task.html
+++ b/learning/katas/python/Windowing/Adding Timestamp/ParDo/task.md
@@ -16,32 +16,30 @@
~ limitations under the License.
-->
-
-
Adding Timestamp - ParDo
-
- Bounded sources (such as a file from TextIO) do not provide timestamps for elements. If you need
- timestamps, you must add them to your PCollection’s elements.
-
-
- You can assign new timestamps to the elements of a PCollection by applying a ParDo transform that
- outputs new elements with timestamps that you set.
-
-
- Kata: Please assign each element a timestamp based on the the Event.date.
-
-
+Adding Timestamp - ParDo
+------------------------
+
+Bounded sources (such as a file from TextIO) do not provide timestamps for elements. If you need
+timestamps, you must add them to your PCollection’s elements.
+
+You can assign new timestamps to the elements of a PCollection by applying a ParDo transform that
+outputs new elements with timestamps that you set.
+
+**Kata:** Please assign each element a timestamp based on the the `Event.timestamp`.
+
-
diff --git a/learning/katas/python/Windowing/Adding Timestamp/ParDo/task.py b/learning/katas/python/Windowing/Adding Timestamp/ParDo/task.py
index f2e6ce6e7460c..aba4f6eedf313 100644
--- a/learning/katas/python/Windowing/Adding Timestamp/ParDo/task.py
+++ b/learning/katas/python/Windowing/Adding Timestamp/ParDo/task.py
@@ -15,7 +15,7 @@
# limitations under the License.
import datetime
-import time
+import pytz
import apache_beam as beam
from apache_beam.transforms import window
@@ -24,30 +24,30 @@
class Event:
- def __init__(self, id, event, date):
+ def __init__(self, id, event, timestamp):
self.id = id
self.event = event
- self.date = date
+ self.timestamp = timestamp
def __str__(self) -> str:
- return f'Event({self.id}, {self.event}, {self.date})'
+ return f'Event({self.id}, {self.event}, {self.timestamp})'
class AddTimestampDoFn(beam.DoFn):
def process(self, element, **kwargs):
- unix_timestamp = time.mktime(element.date.timetuple())
+ unix_timestamp = element.timestamp.timestamp()
yield window.TimestampedValue(element, unix_timestamp)
p = beam.Pipeline()
(p | beam.Create([
- Event('1', 'book-order', datetime.date(2020, 3, 4)),
- Event('2', 'pencil-order', datetime.date(2020, 3, 5)),
- Event('3', 'paper-order', datetime.date(2020, 3, 6)),
- Event('4', 'pencil-order', datetime.date(2020, 3, 7)),
- Event('5', 'book-order', datetime.date(2020, 3, 8)),
+ Event('1', 'book-order', datetime.datetime(2020, 3, 4, 0, 0, 0, 0, tzinfo=pytz.UTC)),
+ Event('2', 'pencil-order', datetime.datetime(2020, 3, 5, 0, 0, 0, 0, tzinfo=pytz.UTC)),
+ Event('3', 'paper-order', datetime.datetime(2020, 3, 6, 0, 0, 0, 0, tzinfo=pytz.UTC)),
+ Event('4', 'pencil-order', datetime.datetime(2020, 3, 7, 0, 0, 0, 0, tzinfo=pytz.UTC)),
+ Event('5', 'book-order', datetime.datetime(2020, 3, 8, 0, 0, 0, 0, tzinfo=pytz.UTC)),
])
| beam.ParDo(AddTimestampDoFn())
| LogElements(with_timestamp=True))
diff --git a/learning/katas/python/Windowing/Adding Timestamp/ParDo/tests.py b/learning/katas/python/Windowing/Adding Timestamp/ParDo/tests.py
index 1db5ba142f2b7..2a2011f707c4a 100644
--- a/learning/katas/python/Windowing/Adding Timestamp/ParDo/tests.py
+++ b/learning/katas/python/Windowing/Adding Timestamp/ParDo/tests.py
@@ -14,40 +14,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from test_helper import failed, passed, \
- get_answer_placeholders, get_file_output, test_is_not_empty, \
- test_answer_placeholders_text_deleted
-
-
-def test_dofn_process_method():
- placeholders = get_answer_placeholders()
- placeholder = placeholders[0]
-
- if 'def process(self,' in placeholder:
- passed()
- else:
- failed('Override "process" method')
-
-
-def test_pardo():
- placeholders = get_answer_placeholders()
- placeholder = placeholders[1]
-
- if 'beam.ParDo(AddTimestampDoFn())' in placeholder:
- passed()
- else:
- failed('Use beam.ParDo')
+from test_helper import failed, passed, get_file_output, test_is_not_empty
def test_output():
output = get_file_output()
answers = [
- "Event(1, book-order, 2020-03-04), timestamp='2020-03-03T16:00:00Z'",
- "Event(2, pencil-order, 2020-03-05), timestamp='2020-03-04T16:00:00Z'",
- "Event(3, paper-order, 2020-03-06), timestamp='2020-03-05T16:00:00Z'",
- "Event(4, pencil-order, 2020-03-07), timestamp='2020-03-06T16:00:00Z'",
- "Event(5, book-order, 2020-03-08), timestamp='2020-03-07T16:00:00Z'"
+ "Event(1, book-order, 2020-03-04 00:00:00+00:00), timestamp='2020-03-04T00:00:00Z'",
+ "Event(2, pencil-order, 2020-03-05 00:00:00+00:00), timestamp='2020-03-05T00:00:00Z'",
+ "Event(3, paper-order, 2020-03-06 00:00:00+00:00), timestamp='2020-03-06T00:00:00Z'",
+ "Event(4, pencil-order, 2020-03-07 00:00:00+00:00), timestamp='2020-03-07T00:00:00Z'",
+ "Event(5, book-order, 2020-03-08 00:00:00+00:00), timestamp='2020-03-08T00:00:00Z'"
]
if all(line in output for line in answers):
@@ -58,7 +36,4 @@ def test_output():
if __name__ == '__main__':
test_is_not_empty()
- test_answer_placeholders_text_deleted()
- test_dofn_process_method()
- test_pardo()
test_output()
diff --git a/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task-info.yaml b/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task-info.yaml
index 289777f8f57c4..5c91b23e68436 100644
--- a/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task-info.yaml
+++ b/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task-info.yaml
@@ -22,7 +22,7 @@ files:
- name: task.py
visible: true
placeholders:
- - offset: 2074
+ - offset: 2067
length: 85
placeholder_text: TODO()
- name: tests.py
diff --git a/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task-remote-info.yaml b/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task-remote-info.yaml
index c826ee1d0de75..70623269a5d5b 100644
--- a/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task-remote-info.yaml
+++ b/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task-remote-info.yaml
@@ -1,2 +1,2 @@
id: 1124220
-update_date: Mon, 09 Mar 2020 14:34:10 UTC
+update_date: Tue, 19 May 2020 03:06:01 UTC
diff --git a/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task.html b/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task.html
deleted file mode 100644
index 2709b677aef2b..0000000000000
--- a/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task.html
+++ /dev/null
@@ -1,61 +0,0 @@
-
-
-
-
Fixed Time Window
-
- Windowing subdivides a PCollection according to the timestamps of its individual elements.
- Transforms that aggregate multiple elements, such as GroupByKey and Combine, work implicitly on
- a per-window basis — they process each PCollection as a succession of multiple, finite windows,
- though the entire collection itself may be of unbounded size.
-
-
- In the Beam model, any PCollection (including unbounded PCollections) can be subdivided into
- logical windows. Each element in a PCollection is assigned to one or more windows according to
- the PCollection’s windowing function, and each individual window contains a finite number of
- elements. Grouping transforms then consider each PCollection’s elements on a per-window basis.
- GroupByKey, for example, implicitly groups the elements of a PCollection by key and window.
-
-
- Beam provides several windowing functions, including:
-
-
Fixed Time Windows
-
Sliding Time Windows
-
Per-Session Windows
-
Single Global Window
-
-
-
- The simplest form of windowing is using fixed time windows. A fixed time window represents a
- consistent duration, non overlapping time interval in the data stream.
-
-
- Kata: Please count the number of events that happened based on fixed window with
- 1-day duration.
-
- Refer to the Beam Programming Guide
-
- "Fixed time windows" section for more information.
-
-
diff --git a/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task.md b/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task.md
new file mode 100644
index 0000000000000..cbff325153905
--- /dev/null
+++ b/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task.md
@@ -0,0 +1,54 @@
+
+
+Fixed Time Window
+-----------------
+
+Windowing subdivides a PCollection according to the timestamps of its individual elements.
+Transforms that aggregate multiple elements, such as GroupByKey and Combine, work implicitly on a
+per-window basis — they process each PCollection as a succession of multiple, finite windows,
+though the entire collection itself may be of unbounded size.
+
+In the Beam model, any PCollection (including unbounded PCollections) can be subdivided into
+logical windows. Each element in a PCollection is assigned to one or more windows according to the
+PCollection’s windowing function, and each individual window contains a finite number of elements.
+Grouping transforms then consider each PCollection’s elements on a per-window basis. GroupByKey,
+for example, implicitly groups the elements of a PCollection by key and window.
+
+Beam provides several windowing functions, including:
+
+* Fixed Time Windows
+* Sliding Time Windows
+* Per-Session Windows
+* Single Global Window
+
+The simplest form of windowing is using fixed time windows. A fixed time window represents a
+consistent duration, non overlapping time interval in the data stream.
+
+**Kata:** Please count the number of events that happened based on fixed window with 1-day duration.
+
+
+ Refer to the Beam Programming Guide
+
+ "Fixed time windows" section for more information.
+
diff --git a/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task.py b/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task.py
index 384eb1a97fdd3..0444becb41fe8 100644
--- a/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task.py
+++ b/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/task.py
@@ -15,6 +15,7 @@
# limitations under the License.
from datetime import datetime
+import pytz
import apache_beam as beam
from apache_beam.transforms import window
@@ -25,16 +26,16 @@
p = beam.Pipeline()
(p | beam.Create([
- window.TimestampedValue("event", datetime.fromisoformat('2020-03-01T00:00:00+00:00').timestamp()),
- window.TimestampedValue("event", datetime.fromisoformat('2020-03-01T00:00:00+00:00').timestamp()),
- window.TimestampedValue("event", datetime.fromisoformat('2020-03-01T00:00:00+00:00').timestamp()),
- window.TimestampedValue("event", datetime.fromisoformat('2020-03-01T00:00:00+00:00').timestamp()),
- window.TimestampedValue("event", datetime.fromisoformat('2020-03-05T00:00:00+00:00').timestamp()),
- window.TimestampedValue("event", datetime.fromisoformat('2020-03-05T00:00:00+00:00').timestamp()),
- window.TimestampedValue("event", datetime.fromisoformat('2020-03-08T00:00:00+00:00').timestamp()),
- window.TimestampedValue("event", datetime.fromisoformat('2020-03-08T00:00:00+00:00').timestamp()),
- window.TimestampedValue("event", datetime.fromisoformat('2020-03-08T00:00:00+00:00').timestamp()),
- window.TimestampedValue("event", datetime.fromisoformat('2020-03-10T00:00:00+00:00').timestamp()),
+ window.TimestampedValue("event", datetime(2020, 3, 1, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
+ window.TimestampedValue("event", datetime(2020, 3, 1, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
+ window.TimestampedValue("event", datetime(2020, 3, 1, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
+ window.TimestampedValue("event", datetime(2020, 3, 1, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
+ window.TimestampedValue("event", datetime(2020, 3, 5, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
+ window.TimestampedValue("event", datetime(2020, 3, 5, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
+ window.TimestampedValue("event", datetime(2020, 3, 8, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
+ window.TimestampedValue("event", datetime(2020, 3, 8, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
+ window.TimestampedValue("event", datetime(2020, 3, 8, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
+ window.TimestampedValue("event", datetime(2020, 3, 10, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp()),
])
| beam.WindowInto(window.FixedWindows(24*60*60))
| beam.combiners.Count.PerElement()
diff --git a/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/tests.py b/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/tests.py
index ee5b2ab3e1f28..e627fb5f4ec44 100644
--- a/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/tests.py
+++ b/learning/katas/python/Windowing/Fixed Time Window/Fixed Time Window/tests.py
@@ -14,19 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from test_helper import failed, passed, \
- get_answer_placeholders, get_file_output, test_is_not_empty, \
- test_answer_placeholders_text_deleted
-
-
-def test_fixedwindows():
- placeholders = get_answer_placeholders()
- placeholder = placeholders[0]
-
- if 'FixedWindows' in placeholder:
- passed()
- else:
- failed('Use FixedWindows')
+from test_helper import failed, passed, get_file_output, test_is_not_empty
def test_output():
@@ -47,6 +35,4 @@ def test_output():
if __name__ == '__main__':
test_is_not_empty()
- test_answer_placeholders_text_deleted()
- test_fixedwindows()
test_output()
diff --git a/learning/katas/python/course-remote-info.yaml b/learning/katas/python/course-remote-info.yaml
index 38b403da0f4b4..aa03268344306 100644
--- a/learning/katas/python/course-remote-info.yaml
+++ b/learning/katas/python/course-remote-info.yaml
@@ -1,2 +1,2 @@
id: 54532
-update_date: Mon, 09 Mar 2020 14:33:44 UTC
+update_date: Tue, 19 May 2020 03:04:36 UTC
diff --git a/learning/katas/python/requirements.txt b/learning/katas/python/requirements.txt
index 94d77ad88f2f0..d3b069b0b463d 100644
--- a/learning/katas/python/requirements.txt
+++ b/learning/katas/python/requirements.txt
@@ -16,3 +16,5 @@
apache-beam==2.19.0
apache-beam[test]==2.19.0
+
+pytz~=2019.3
\ No newline at end of file
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineOptions.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineOptions.java
index 9ed686e0ab499..22bff18dbdc21 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineOptions.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineOptions.java
@@ -126,6 +126,15 @@ public interface FlinkPipelineOptions
void setFailOnCheckpointingErrors(Boolean failOnCheckpointingErrors);
+ @Description(
+ "If set, finishes the current bundle and flushes all output before checkpointing the state of the operators. "
+ + "By default, starts checkpointing immediately and buffers any remaining bundle output as part of the checkpoint. "
+ + "The setting may affect the checkpoint alignment.")
+ @Default.Boolean(false)
+ boolean getFinishBundleBeforeCheckpointing();
+
+ void setFinishBundleBeforeCheckpointing(boolean finishBundleBeforeCheckpointing);
+
@Description(
"Shuts down sources which have been idle for the configured time of milliseconds. Once a source has been "
+ "shut down, checkpointing is not possible anymore. Shutting down the sources eventually leads to pipeline "
@@ -175,7 +184,7 @@ public interface FlinkPipelineOptions
@Default.Boolean(false)
Boolean getDisableMetrics();
- void setDisableMetrics(Boolean enableMetrics);
+ void setDisableMetrics(Boolean disableMetrics);
/** Enables or disables externalized checkpoints. */
@Description(
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java
index 51cd98d359128..f800bc20e5125 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java
@@ -189,6 +189,8 @@ public class DoFnOperator extends AbstractStreamOperator timerService;
private transient PushedBackElementsHandler> pushedBackElementsHandler;
@@ -294,6 +296,8 @@ public DoFnOperator(
flinkOptions.getCheckpointingInterval()
+ Math.max(0, flinkOptions.getMinPauseBetweenCheckpoints()));
}
+
+ this.finishBundleBeforeCheckpointing = flinkOptions.getFinishBundleBeforeCheckpointing();
}
// allow overriding this in WindowDoFnOperator because this one dynamically creates
@@ -843,6 +847,17 @@ protected final void invokeFinishBundle() {
}
}
+ @Override
+ public void prepareSnapshotPreBarrier(long checkpointId) {
+ if (finishBundleBeforeCheckpointing) {
+ // We finish the bundle and flush any pending data.
+ // This avoids buffering any data as part of snapshotState() below.
+ while (bundleStarted) {
+ invokeFinishBundle();
+ }
+ }
+ }
+
@Override
public final void snapshotState(StateSnapshotContext context) throws Exception {
if (checkpointStats != null) {
@@ -855,8 +870,6 @@ public final void snapshotState(StateSnapshotContext context) throws Exception {
bufferingDoFnRunner.checkpoint(context.getCheckpointId());
}
- // We can't output here anymore because the checkpoint barrier has already been
- // sent downstream. This is going to change with 1.6/1.7's prepareSnapshotBarrier.
try {
outputManager.openBuffer();
// Ensure that no new bundle gets started as part of finishing a bundle
@@ -946,8 +959,15 @@ BufferedOutputManager create(
/**
* A {@link DoFnRunners.OutputManager} that can buffer its outputs. Uses {@link
* PushedBackElementsHandler} to buffer the data. Buffering data is necessary because no elements
- * can be emitted during {@code snapshotState}. This can be removed once we upgrade Flink to >=
- * 1.6 which allows us to finish the bundle before the checkpoint barriers have been emitted.
+ * can be emitted during {@code snapshotState} which is called when the checkpoint barrier already
+ * has been sent downstream. Emitting elements would break the flow of checkpoint barrier and
+ * violate exactly-once semantics.
+ *
+ *
This buffering can be deactived using {@code
+ * FlinkPipelineOptions#setFinishBundleBeforeCheckpointing(true)}. If activated, we flush out
+ * bundle data before the barrier is sent downstream. This is done via {@code
+ * prepareSnapshotPreBarrier}. When Flink supports unaligned checkpoints, this should become the
+ * default and this class should be removed as in https://github.com/apache/beam/pull/9652.
*/
public static class BufferedOutputManager implements DoFnRunners.OutputManager {
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkPipelineOptionsTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkPipelineOptionsTest.java
index 897979abc464e..13c04aa47ed68 100644
--- a/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkPipelineOptionsTest.java
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkPipelineOptionsTest.java
@@ -83,6 +83,7 @@ public void testDefaults() {
assertThat(options.getCheckpointTimeoutMillis(), is(-1L));
assertThat(options.getNumConcurrentCheckpoints(), is(1));
assertThat(options.getFailOnCheckpointingErrors(), is(true));
+ assertThat(options.getFinishBundleBeforeCheckpointing(), is(false));
assertThat(options.getNumberOfExecutionRetries(), is(-1));
assertThat(options.getExecutionRetryDelay(), is(-1L));
assertThat(options.getRetainExternalizedCheckpointsOnCancellation(), is(false));
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/website/PipelineOptionsTableGenerator.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/website/PipelineOptionsTableGenerator.java
index 8ca490e816518..7ef13601edea5 100644
--- a/runners/flink/src/test/java/org/apache/beam/runners/flink/website/PipelineOptionsTableGenerator.java
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/website/PipelineOptionsTableGenerator.java
@@ -84,8 +84,8 @@ private static void printHeader() {
System.out.println(
"");
}
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/ClassLoaderFileSystem.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/ClassLoaderFileSystem.java
new file mode 100644
index 0000000000000..8437ccfdf5fa1
--- /dev/null
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/ClassLoaderFileSystem.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io;
+
+import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument;
+
+import com.google.auto.service.AutoService;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.channels.Channels;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.channels.WritableByteChannel;
+import java.util.Collection;
+import java.util.List;
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.annotations.Experimental;
+import org.apache.beam.sdk.io.fs.CreateOptions;
+import org.apache.beam.sdk.io.fs.MatchResult;
+import org.apache.beam.sdk.io.fs.ResolveOptions;
+import org.apache.beam.sdk.io.fs.ResourceId;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
+
+/** A read-only {@link FileSystem} implementation looking up resources using a ClassLoader. */
+public class ClassLoaderFileSystem extends FileSystem {
+
+ public static final String SCHEMA = "classpath";
+ private static final String PREFIX = SCHEMA + "://";
+
+ ClassLoaderFileSystem() {}
+
+ @Override
+ protected List match(List specs) throws IOException {
+ throw new UnsupportedOperationException("Un-globbable filesystem.");
+ }
+
+ @Override
+ protected WritableByteChannel create(
+ ClassLoaderResourceId resourceId, CreateOptions createOptions) throws IOException {
+ throw new UnsupportedOperationException("Read-only filesystem.");
+ }
+
+ @Override
+ protected ReadableByteChannel open(ClassLoaderResourceId resourceId) throws IOException {
+ ClassLoader classLoader = getClass().getClassLoader();
+ InputStream inputStream =
+ classLoader.getResourceAsStream(resourceId.path.substring(PREFIX.length()));
+ if (inputStream == null) {
+
+ throw new IOException(
+ "Unable to load "
+ + resourceId.path
+ + " with "
+ + classLoader
+ + " URL "
+ + classLoader.getResource(resourceId.path.substring(PREFIX.length())));
+ }
+ return Channels.newChannel(inputStream);
+ }
+
+ @Override
+ protected void copy(
+ List srcResourceIds, List destResourceIds)
+ throws IOException {
+ throw new UnsupportedOperationException("Read-only filesystem.");
+ }
+
+ @Override
+ protected void rename(
+ List srcResourceIds, List destResourceIds)
+ throws IOException {
+ throw new UnsupportedOperationException("Read-only filesystem.");
+ }
+
+ @Override
+ protected void delete(Collection resourceIds) throws IOException {
+ throw new UnsupportedOperationException("Read-only filesystem.");
+ }
+
+ @Override
+ protected ClassLoaderResourceId matchNewResource(String path, boolean isDirectory) {
+ return new ClassLoaderResourceId(path);
+ }
+
+ @Override
+ protected String getScheme() {
+ return SCHEMA;
+ }
+
+ public static class ClassLoaderResourceId implements ResourceId {
+
+ private final String path;
+
+ private ClassLoaderResourceId(String path) {
+ checkArgument(path.startsWith(PREFIX), path);
+ this.path = path;
+ }
+
+ @Override
+ public ClassLoaderResourceId resolve(String other, ResolveOptions resolveOptions) {
+ if (other.startsWith(PREFIX)) {
+ return new ClassLoaderResourceId(other);
+ } else if (other.startsWith("/")) {
+ return new ClassLoaderResourceId(SCHEMA + ":/" + other);
+ } else {
+ return new ClassLoaderResourceId(path + "/" + other);
+ }
+ }
+
+ @Override
+ public ClassLoaderResourceId getCurrentDirectory() {
+ int ix = path.lastIndexOf('/');
+ if (ix <= PREFIX.length()) {
+ return new ClassLoaderResourceId(PREFIX);
+ } else {
+ return new ClassLoaderResourceId(path.substring(0, ix));
+ }
+ }
+
+ @Override
+ public String getScheme() {
+ return SCHEMA;
+ }
+
+ @Nullable
+ @Override
+ public String getFilename() {
+ return path;
+ }
+
+ @Override
+ public boolean isDirectory() {
+ return false;
+ }
+ }
+
+ /** {@link AutoService} registrar for the {@link ClassLoaderFileSystem}. */
+ @AutoService(FileSystemRegistrar.class)
+ @Experimental(Experimental.Kind.FILESYSTEM)
+ public static class ClassLoaderFileSystemRegistrar implements FileSystemRegistrar {
+ @Override
+ public Iterable fromOptions(@Nullable PipelineOptions options) {
+ return ImmutableList.of(new ClassLoaderFileSystem());
+ }
+ }
+}
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/logicaltypes/Date.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/logicaltypes/Date.java
new file mode 100644
index 0000000000000..d942340db5d27
--- /dev/null
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/logicaltypes/Date.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.schemas.logicaltypes;
+
+import java.time.LocalDate;
+import org.apache.beam.sdk.schemas.Schema;
+
+/**
+ * A date without a time-zone.
+ *
+ *
It cannot represent an instant on the time-line without additional information such as an
+ * offset or time-zone.
+ *
+ *
Its input type is a {@link LocalDate}, and base type is a {@link Long} that represents a
+ * incrementing count of days where day 0 is 1970-01-01 (ISO).
+ */
+public class Date implements Schema.LogicalType {
+
+ @Override
+ public String getIdentifier() {
+ return "beam:logical_type:date:v1";
+ }
+
+ // unused
+ @Override
+ public Schema.FieldType getArgumentType() {
+ return Schema.FieldType.STRING;
+ }
+
+ // unused
+ @Override
+ public String getArgument() {
+ return "";
+ }
+
+ @Override
+ public Schema.FieldType getBaseType() {
+ return Schema.FieldType.INT64;
+ }
+
+ @Override
+ public Long toBaseType(LocalDate input) {
+ return input == null ? null : input.toEpochDay();
+ }
+
+ @Override
+ public LocalDate toInputType(Long base) {
+ return base == null ? null : LocalDate.ofEpochDay(base);
+ }
+}
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/logicaltypes/SqlTypes.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/logicaltypes/SqlTypes.java
new file mode 100644
index 0000000000000..4dbab48251596
--- /dev/null
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/logicaltypes/SqlTypes.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.schemas.logicaltypes;
+
+import java.time.LocalDate;
+import org.apache.beam.sdk.schemas.Schema.LogicalType;
+
+/** Beam {@link org.apache.beam.sdk.schemas.Schema.LogicalType}s corresponding to SQL data types. */
+public class SqlTypes {
+
+ private SqlTypes() {}
+
+ /** Beam LogicalType corresponding to ZetaSQL/CalciteSQL DATE type. */
+ public static final LogicalType DATE = new Date();
+}
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/ClassLoaderFileSystemTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/ClassLoaderFileSystemTest.java
new file mode 100644
index 0000000000000..2667196fb3383
--- /dev/null
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/ClassLoaderFileSystemTest.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io;
+
+import static java.nio.channels.Channels.newInputStream;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.channels.ReadableByteChannel;
+import org.apache.beam.sdk.io.fs.ResolveOptions;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class ClassLoaderFileSystemTest {
+
+ private static final String SOME_CLASS =
+ "classpath://org/apache/beam/sdk/io/ClassLoaderFileSystem.class";
+
+ @Test
+ public void testOpen() throws IOException {
+ ClassLoaderFileSystem filesystem = new ClassLoaderFileSystem();
+ ReadableByteChannel channel = filesystem.open(filesystem.matchNewResource(SOME_CLASS, false));
+ checkIsClass(channel);
+ }
+
+ @Test
+ public void testRegistrar() throws IOException {
+ ReadableByteChannel channel = FileSystems.open(FileSystems.matchNewResource(SOME_CLASS, false));
+ checkIsClass(channel);
+ }
+
+ @Test
+ public void testResolve() throws IOException {
+ ClassLoaderFileSystem filesystem = new ClassLoaderFileSystem();
+ ClassLoaderFileSystem.ClassLoaderResourceId original =
+ filesystem.matchNewResource(SOME_CLASS, false);
+ ClassLoaderFileSystem.ClassLoaderResourceId parent = original.getCurrentDirectory();
+ ClassLoaderFileSystem.ClassLoaderResourceId grandparent = parent.getCurrentDirectory();
+ assertEquals("classpath://org/apache/beam/sdk", grandparent.getFilename());
+ ClassLoaderFileSystem.ClassLoaderResourceId resource =
+ grandparent
+ .resolve("io", ResolveOptions.StandardResolveOptions.RESOLVE_DIRECTORY)
+ .resolve(
+ "ClassLoaderFileSystem.class", ResolveOptions.StandardResolveOptions.RESOLVE_FILE);
+ ReadableByteChannel channel = filesystem.open(resource);
+ checkIsClass(channel);
+ }
+
+ public void checkIsClass(ReadableByteChannel channel) throws IOException {
+ FileSystems.setDefaultPipelineOptions(PipelineOptionsFactory.create());
+ InputStream inputStream = newInputStream(channel);
+ byte[] magic = new byte[4];
+ inputStream.read(magic);
+ assertArrayEquals(magic, new byte[] {(byte) 0xCA, (byte) 0xFE, (byte) 0xBA, (byte) 0xBE});
+ }
+}
diff --git a/sdks/java/extensions/sql/datacatalog/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/datacatalog/SchemaUtils.java b/sdks/java/extensions/sql/datacatalog/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/datacatalog/SchemaUtils.java
index 8e76f4cc7bb1e..0ce9f335bc895 100644
--- a/sdks/java/extensions/sql/datacatalog/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/datacatalog/SchemaUtils.java
+++ b/sdks/java/extensions/sql/datacatalog/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/datacatalog/SchemaUtils.java
@@ -28,6 +28,7 @@
import org.apache.beam.sdk.schemas.Schema;
import org.apache.beam.sdk.schemas.Schema.Field;
import org.apache.beam.sdk.schemas.Schema.FieldType;
+import org.apache.beam.sdk.schemas.logicaltypes.SqlTypes;
import org.apache.beam.vendor.calcite.v1_20_0.com.google.common.base.Strings;
import org.apache.beam.vendor.calcite.v1_20_0.com.google.common.collect.ImmutableMap;
@@ -38,7 +39,7 @@ class SchemaUtils {
ImmutableMap.builder()
.put("BOOL", FieldType.BOOLEAN)
.put("BYTES", FieldType.BYTES)
- .put("DATE", FieldType.logicalType(new CalciteUtils.DateType()))
+ .put("DATE", FieldType.logicalType(SqlTypes.DATE))
.put("DATETIME", FieldType.DATETIME)
.put("DOUBLE", FieldType.DOUBLE)
.put("FLOAT", FieldType.DOUBLE)
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamCalcRel.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamCalcRel.java
index b9821aa1f237c..25392ff7cc5ee 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamCalcRel.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamCalcRel.java
@@ -20,13 +20,13 @@
import static org.apache.beam.sdk.schemas.Schema.FieldType;
import static org.apache.beam.sdk.schemas.Schema.TypeName;
import static org.apache.beam.vendor.calcite.v1_20_0.com.google.common.base.Preconditions.checkArgument;
-import static org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.avatica.util.DateTimeUtils.MILLIS_PER_DAY;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.lang.reflect.Modifier;
import java.lang.reflect.Type;
import java.math.BigDecimal;
+import java.time.LocalDate;
import java.util.AbstractList;
import java.util.AbstractMap;
import java.util.Arrays;
@@ -39,11 +39,11 @@
import org.apache.beam.sdk.extensions.sql.impl.planner.BeamJavaTypeFactory;
import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils;
import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils.CharType;
-import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils.DateType;
import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils.TimeType;
import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils.TimeWithLocalTzType;
import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils.TimestampWithLocalTzType;
import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.logicaltypes.SqlTypes;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.ParDo;
@@ -315,7 +315,7 @@ private static Expression castOutput(Expression value, FieldType toType) {
private static Expression castOutputTime(Expression value, FieldType toType) {
Expression valueDateTime = value;
- // First, convert to millis
+ // First, convert to millis (except for DATE type)
if (CalciteUtils.TIMESTAMP.typesEqual(toType)
|| CalciteUtils.NULLABLE_TIMESTAMP.typesEqual(toType)) {
if (value.getType() == java.sql.Timestamp.class) {
@@ -331,13 +331,16 @@ private static Expression castOutputTime(Expression value, FieldType toType) {
if (value.getType() == java.sql.Date.class) {
valueDateTime = Expressions.call(BuiltInMethod.DATE_TO_INT.method, valueDateTime);
}
- valueDateTime = Expressions.multiply(valueDateTime, Expressions.constant(MILLIS_PER_DAY));
} else {
throw new UnsupportedOperationException("Unknown DateTime type " + toType);
}
- // Second, convert to joda Instant
- valueDateTime = Expressions.new_(Instant.class, valueDateTime);
+ // Second, convert to joda Instant (or LocalDate for DATE type)
+ if (CalciteUtils.DATE.typesEqual(toType) || CalciteUtils.NULLABLE_DATE.typesEqual(toType)) {
+ valueDateTime = Expressions.call(LocalDate.class, "ofEpochDay", valueDateTime);
+ } else {
+ valueDateTime = Expressions.new_(Instant.class, valueDateTime);
+ }
// Third, make conversion conditional on non-null input.
if (!((Class) value.getType()).isPrimitive()) {
@@ -371,9 +374,9 @@ private static class InputGetterImpl implements RexToLixTranslator.InputGetter {
.put(TypeName.ROW, Row.class)
.build();
- private static final Map LOGICAL_TYPE_CONVERSION_MAP =
+ private static final Map LOGICAL_TYPE_TO_BASE_TYPE_MAP =
ImmutableMap.builder()
- .put(DateType.IDENTIFIER, ReadableInstant.class)
+ .put(SqlTypes.DATE.getIdentifier(), Long.class)
.put(TimeType.IDENTIFIER, ReadableInstant.class)
.put(TimeWithLocalTzType.IDENTIFIER, ReadableInstant.class)
.put(TimestampWithLocalTzType.IDENTIFIER, ReadableInstant.class)
@@ -406,7 +409,7 @@ private static Expression value(
if (storageType == Object.class) {
convertTo = Object.class;
} else if (fromType.getTypeName().isLogicalType()) {
- convertTo = LOGICAL_TYPE_CONVERSION_MAP.get(fromType.getLogicalType().getIdentifier());
+ convertTo = LOGICAL_TYPE_TO_BASE_TYPE_MAP.get(fromType.getLogicalType().getIdentifier());
} else {
convertTo = TYPE_CONVERSION_MAP.get(fromType.getTypeName());
}
@@ -427,18 +430,13 @@ private static Expression value(
private static Expression value(Expression value, Schema.FieldType type) {
if (type.getTypeName().isLogicalType()) {
- Expression millisField = Expressions.call(value, "getMillis");
String logicalId = type.getLogicalType().getIdentifier();
- if (logicalId.equals(TimeType.IDENTIFIER)) {
- return nullOr(value, Expressions.convert_(millisField, int.class));
- } else if (logicalId.equals(DateType.IDENTIFIER)) {
- value =
- nullOr(
- value,
- Expressions.convert_(
- Expressions.divide(millisField, Expressions.constant(MILLIS_PER_DAY)),
- int.class));
- } else if (!logicalId.equals(CharType.IDENTIFIER)) {
+ if (TimeType.IDENTIFIER.equals(logicalId)) {
+ return nullOr(
+ value, Expressions.convert_(Expressions.call(value, "getMillis"), int.class));
+ } else if (SqlTypes.DATE.getIdentifier().equals(logicalId)) {
+ value = nullOr(value, value);
+ } else if (!CharType.IDENTIFIER.equals(logicalId)) {
throw new UnsupportedOperationException(
"Unknown LogicalType " + type.getLogicalType().getIdentifier());
}
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamEnumerableConverter.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamEnumerableConverter.java
index 11820a0df9439..1a688df933307 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamEnumerableConverter.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamEnumerableConverter.java
@@ -18,9 +18,9 @@
package org.apache.beam.sdk.extensions.sql.impl.rel;
import static org.apache.beam.vendor.calcite.v1_20_0.com.google.common.base.Preconditions.checkArgument;
-import static org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.avatica.util.DateTimeUtils.MILLIS_PER_DAY;
import java.io.IOException;
+import java.time.LocalDate;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@@ -36,7 +36,6 @@
import org.apache.beam.sdk.PipelineResult;
import org.apache.beam.sdk.PipelineResult.State;
import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils.CharType;
-import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils.DateType;
import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils.TimeType;
import org.apache.beam.sdk.io.FileSystems;
import org.apache.beam.sdk.metrics.Counter;
@@ -50,6 +49,7 @@
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.runners.TransformHierarchy.Node;
import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.logicaltypes.SqlTypes;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.values.PCollection;
@@ -303,11 +303,15 @@ private static Object fieldToAvatica(Schema.FieldType type, Object beamValue) {
switch (type.getTypeName()) {
case LOGICAL_TYPE:
String logicalId = type.getLogicalType().getIdentifier();
- if (logicalId.equals(TimeType.IDENTIFIER)) {
+ if (TimeType.IDENTIFIER.equals(logicalId)) {
return (int) ((ReadableInstant) beamValue).getMillis();
- } else if (logicalId.equals(DateType.IDENTIFIER)) {
- return (int) (((ReadableInstant) beamValue).getMillis() / MILLIS_PER_DAY);
- } else if (logicalId.equals(CharType.IDENTIFIER)) {
+ } else if (SqlTypes.DATE.getIdentifier().equals(logicalId)) {
+ if (beamValue instanceof Long) { // base type
+ return ((Long) beamValue).intValue();
+ } else { // input type
+ return (int) (((LocalDate) beamValue).toEpochDay());
+ }
+ } else if (CharType.IDENTIFIER.equals(logicalId)) {
return beamValue;
} else {
throw new UnsupportedOperationException("Unknown DateTime type " + logicalId);
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/schema/BeamTableUtils.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/schema/BeamTableUtils.java
index f3c2704cdbc4a..aa8767464ef78 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/schema/BeamTableUtils.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/schema/BeamTableUtils.java
@@ -22,7 +22,10 @@
import java.io.IOException;
import java.io.StringWriter;
import java.math.BigDecimal;
+import java.time.Instant;
+import java.time.LocalDate;
import java.util.ArrayList;
+import java.util.GregorianCalendar;
import java.util.List;
import java.util.stream.IntStream;
import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils;
@@ -114,8 +117,17 @@ public static Object autoCastField(Schema.Field field, Object rawObj) {
} else {
return rawObj;
}
+ } else if (CalciteUtils.DATE.typesEqual(type) || CalciteUtils.NULLABLE_DATE.typesEqual(type)) {
+ if (rawObj instanceof GregorianCalendar) { // used by the SQL CLI
+ GregorianCalendar calendar = (GregorianCalendar) rawObj;
+ return Instant.ofEpochMilli(calendar.getTimeInMillis())
+ .atZone(calendar.getTimeZone().toZoneId())
+ .toLocalDate();
+ } else {
+ return LocalDate.ofEpochDay((Integer) rawObj);
+ }
} else if (CalciteUtils.isDateTimeType(type)) {
- // Internal representation of DateType in Calcite is convertible to Joda's Datetime.
+ // Internal representation of Date in Calcite is convertible to Joda's Datetime.
return new DateTime(rawObj);
} else if (type.getTypeName().isNumericType()
&& ((rawObj instanceof String)
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/utils/CalciteUtils.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/utils/CalciteUtils.java
index e0b994d5815f7..8326567377548 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/utils/CalciteUtils.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/utils/CalciteUtils.java
@@ -25,6 +25,7 @@
import org.apache.beam.sdk.schemas.Schema.FieldType;
import org.apache.beam.sdk.schemas.Schema.TypeName;
import org.apache.beam.sdk.schemas.logicaltypes.PassThroughLogicalType;
+import org.apache.beam.sdk.schemas.logicaltypes.SqlTypes;
import org.apache.beam.vendor.calcite.v1_20_0.com.google.common.collect.BiMap;
import org.apache.beam.vendor.calcite.v1_20_0.com.google.common.collect.ImmutableBiMap;
import org.apache.beam.vendor.calcite.v1_20_0.com.google.common.collect.ImmutableMap;
@@ -43,15 +44,6 @@ public class CalciteUtils {
// SQL has schema types that do not directly correspond to Beam Schema types. We define
// LogicalTypes to represent each of these types.
- /** A LogicalType corresponding to DATE. */
- public static class DateType extends PassThroughLogicalType {
- public static final String IDENTIFIER = "SqlDateType";
-
- public DateType() {
- super(IDENTIFIER, FieldType.STRING, "", FieldType.DATETIME);
- }
- }
-
/** A LogicalType corresponding to TIME. */
public static class TimeType extends PassThroughLogicalType {
public static final String IDENTIFIER = "SqlTimeType";
@@ -96,7 +88,7 @@ public static boolean isDateTimeType(FieldType fieldType) {
if (fieldType.getTypeName().isLogicalType()) {
String logicalId = fieldType.getLogicalType().getIdentifier();
- return logicalId.equals(DateType.IDENTIFIER)
+ return logicalId.equals(SqlTypes.DATE.getIdentifier())
|| logicalId.equals(TimeType.IDENTIFIER)
|| logicalId.equals(TimeWithLocalTzType.IDENTIFIER)
|| logicalId.equals(TimestampWithLocalTzType.IDENTIFIER);
@@ -128,9 +120,9 @@ public static boolean isStringType(FieldType fieldType) {
public static final FieldType VARBINARY = FieldType.BYTES;
public static final FieldType VARCHAR = FieldType.STRING;
public static final FieldType CHAR = FieldType.logicalType(new CharType());
- public static final FieldType DATE = FieldType.logicalType(new DateType());
+ public static final FieldType DATE = FieldType.logicalType(SqlTypes.DATE);
public static final FieldType NULLABLE_DATE =
- FieldType.logicalType(new DateType()).withNullable(true);
+ FieldType.logicalType(SqlTypes.DATE).withNullable(true);
public static final FieldType TIME = FieldType.logicalType(new TimeType());
public static final FieldType NULLABLE_TIME =
FieldType.logicalType(new TimeType()).withNullable(true);
@@ -205,12 +197,16 @@ public static SqlTypeName toSqlTypeName(FieldType type) {
return SqlTypeName.MAP;
default:
SqlTypeName typeName = BEAM_TO_CALCITE_TYPE_MAPPING.get(type.withNullable(false));
- if (typeName != null) {
- return typeName;
- } else {
+ if (typeName == null) {
// This will happen e.g. if looking up a STRING type, and metadata isn't set to say which
// type of SQL string we want. In this case, use the default mapping.
- return BEAM_TO_CALCITE_DEFAULT_MAPPING.get(type);
+ typeName = BEAM_TO_CALCITE_DEFAULT_MAPPING.get(type);
+ }
+ if (typeName == null) {
+ throw new IllegalArgumentException(
+ String.format("Cannot find a matching Calcite SqlTypeName for Beam type: %s", type));
+ } else {
+ return typeName;
}
}
}
diff --git a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamComplexTypeTest.java b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamComplexTypeTest.java
index 8b3d7e502d26e..7cfcb951ca56c 100644
--- a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamComplexTypeTest.java
+++ b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamComplexTypeTest.java
@@ -17,6 +17,7 @@
*/
package org.apache.beam.sdk.extensions.sql;
+import java.time.LocalDate;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
@@ -26,6 +27,7 @@
import org.apache.beam.sdk.extensions.sql.meta.provider.test.TestBoundedTable;
import org.apache.beam.sdk.schemas.Schema;
import org.apache.beam.sdk.schemas.Schema.FieldType;
+import org.apache.beam.sdk.schemas.logicaltypes.SqlTypes;
import org.apache.beam.sdk.testing.PAssert;
import org.apache.beam.sdk.testing.TestPipeline;
import org.apache.beam.sdk.transforms.Create;
@@ -373,40 +375,6 @@ public void testNullInnerRow() {
pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
- @Test
- public void testLogicalTypes() {
- DateTime dateTime = DateTime.parse("2020-02-02T00:00:00");
-
- Schema inputRowSchema =
- Schema.builder()
- .addField("timeTypeField", FieldType.logicalType(new DummySqlTimeType()))
- .addField("dateTypeField", FieldType.logicalType(new DummySqlDateType()))
- .build();
-
- Row row =
- Row.withSchema(inputRowSchema)
- .addValues(dateTime.getMillis(), dateTime.getMillis())
- .build();
-
- Schema outputRowSchema =
- Schema.builder()
- .addField("timeTypeField", FieldType.DATETIME)
- .addNullableField("dateTypeField", FieldType.DATETIME)
- .build();
-
- PCollection outputRow =
- pipeline
- .apply(Create.of(row).withRowSchema(inputRowSchema))
- .apply(
- SqlTransform.query(
- "SELECT timeTypeField, dateTypeField FROM PCOLLECTION GROUP BY timeTypeField, dateTypeField"));
-
- PAssert.that(outputRow)
- .containsInAnyOrder(Row.withSchema(outputRowSchema).addValues(dateTime, dateTime).build());
-
- pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
- }
-
private static class DummySqlTimeType implements Schema.LogicalType {
@Override
public String getIdentifier() {
@@ -439,38 +407,6 @@ public Long toInputType(Instant base) {
}
}
- private static class DummySqlDateType implements Schema.LogicalType {
- @Override
- public String getIdentifier() {
- return "SqlDateType";
- }
-
- @Override
- public FieldType getArgumentType() {
- return FieldType.STRING;
- }
-
- @Override
- public String getArgument() {
- return "";
- }
-
- @Override
- public Schema.FieldType getBaseType() {
- return Schema.FieldType.DATETIME;
- }
-
- @Override
- public Instant toBaseType(Long input) {
- return (input == null ? null : new Instant((long) input));
- }
-
- @Override
- public Long toInputType(Instant base) {
- return (base == null ? null : base.getMillis());
- }
- }
-
@Test
public void testNullDatetimeFields() {
Instant current = new Instant(1561671380000L); // Long value corresponds to 27/06/2019
@@ -483,14 +419,13 @@ public void testNullDatetimeFields() {
.addField("timeTypeField", FieldType.logicalType(new DummySqlTimeType()))
.addNullableField(
"nullableTimeTypeField", FieldType.logicalType(new DummySqlTimeType()))
- .addField("dateTypeField", FieldType.logicalType(new DummySqlDateType()))
- .addNullableField(
- "nullableDateTypeField", FieldType.logicalType(new DummySqlDateType()))
+ .addField("dateTypeField", FieldType.logicalType(SqlTypes.DATE))
+ .addNullableField("nullableDateTypeField", FieldType.logicalType(SqlTypes.DATE))
.build();
Row dateTimeRow =
Row.withSchema(dateTimeFieldSchema)
- .addValues(current, null, date.getMillis(), null, current.getMillis(), null)
+ .addValues(current, null, date.getMillis(), null, LocalDate.of(2019, 6, 27), null)
.build();
PCollection outputRow =
diff --git a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamSqlCastTest.java b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamSqlCastTest.java
index 01872a57dc71e..8f5fe6500c22e 100644
--- a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamSqlCastTest.java
+++ b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamSqlCastTest.java
@@ -17,16 +17,15 @@
*/
package org.apache.beam.sdk.extensions.sql;
-import static org.apache.beam.sdk.schemas.Schema.FieldType.DATETIME;
-import static org.joda.time.DateTimeZone.UTC;
-
+import java.time.LocalDate;
+import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils;
import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.logicaltypes.SqlTypes;
import org.apache.beam.sdk.testing.PAssert;
import org.apache.beam.sdk.testing.TestPipeline;
import org.apache.beam.sdk.transforms.Create;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.Row;
-import org.joda.time.DateTime;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
@@ -48,7 +47,10 @@ public void testCastToDate() {
.withRowSchema(INPUT_ROW_SCHEMA));
Schema resultType =
- Schema.builder().addInt32Field("f_int").addNullableField("f_date", DATETIME).build();
+ Schema.builder()
+ .addInt32Field("f_int")
+ .addNullableField("f_date", CalciteUtils.DATE)
+ .build();
PCollection result =
input.apply(
@@ -64,7 +66,7 @@ public void testCastToDate() {
PAssert.that(result)
.containsInAnyOrder(
- Row.withSchema(resultType).addValues(1, new DateTime(2018, 10, 18, 0, 0, UTC)).build());
+ Row.withSchema(resultType).addValues(1, LocalDate.of(2018, 10, 18)).build());
pipeline.run();
}
@@ -76,7 +78,11 @@ public void testCastToDateWithCase() {
Create.of(Row.withSchema(INPUT_ROW_SCHEMA).addValues(1).addValue("20181018").build())
.withRowSchema(INPUT_ROW_SCHEMA));
- Schema resultType = Schema.builder().addInt32Field("f_int").addDateTimeField("f_date").build();
+ Schema resultType =
+ Schema.builder()
+ .addInt32Field("f_int")
+ .addLogicalTypeField("f_date", SqlTypes.DATE)
+ .build();
PCollection result =
input.apply(
@@ -96,7 +102,7 @@ public void testCastToDateWithCase() {
PAssert.that(result)
.containsInAnyOrder(
- Row.withSchema(resultType).addValues(1, new DateTime(2018, 10, 18, 0, 0, UTC)).build());
+ Row.withSchema(resultType).addValues(1, LocalDate.of(2018, 10, 18)).build());
pipeline.run();
}
diff --git a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamSqlCliTest.java b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamSqlCliTest.java
index 214c4fa225e4e..5672849622dcb 100644
--- a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamSqlCliTest.java
+++ b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamSqlCliTest.java
@@ -28,6 +28,7 @@
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
+import java.time.LocalDate;
import java.util.stream.Stream;
import org.apache.beam.sdk.extensions.sql.impl.ParseException;
import org.apache.beam.sdk.extensions.sql.meta.Table;
@@ -268,7 +269,7 @@ public void test_time_types() throws Exception {
assertEquals(3, row.getFieldCount());
// test DATE field
- assertEquals("2018-11-01", row.getDateTime("f_date").toString("yyyy-MM-dd"));
+ assertEquals("2018-11-01", row.getLogicalTypeValue("f_date", LocalDate.class).toString());
// test TIME field
assertEquals("15:23:59.000", row.getDateTime("f_time").toString("HH:mm:ss.SSS"));
// test TIMESTAMP field
diff --git a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamSqlDslSqlStdOperatorsTest.java b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamSqlDslSqlStdOperatorsTest.java
index 28ac9f41a159f..87cc62570d47c 100644
--- a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamSqlDslSqlStdOperatorsTest.java
+++ b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/BeamSqlDslSqlStdOperatorsTest.java
@@ -17,7 +17,6 @@
*/
package org.apache.beam.sdk.extensions.sql;
-import static org.apache.beam.sdk.extensions.sql.utils.DateTimeUtils.parseDate;
import static org.apache.beam.sdk.extensions.sql.utils.DateTimeUtils.parseTime;
import static org.apache.beam.sdk.extensions.sql.utils.DateTimeUtils.parseTimestampWithUTCTimeZone;
import static org.hamcrest.Matchers.equalTo;
@@ -33,6 +32,7 @@
import java.lang.reflect.Method;
import java.math.BigDecimal;
import java.math.RoundingMode;
+import java.time.LocalDate;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashSet;
@@ -40,6 +40,7 @@
import java.util.Random;
import java.util.Set;
import java.util.stream.Collectors;
+import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils;
import org.apache.beam.sdk.extensions.sql.integrationtest.BeamSqlBuiltinFunctionsIntegrationTestBase;
import org.apache.beam.sdk.schemas.Schema;
import org.apache.beam.sdk.schemas.Schema.FieldType;
@@ -1196,9 +1197,18 @@ public void testDatetimeInfixPlus() {
.addExpr(
"TIMESTAMP '1984-01-19 01:02:03' + INTERVAL '2' YEAR",
parseTimestampWithUTCTimeZone("1986-01-19 01:02:03"))
- .addExpr("DATE '1984-04-19' + INTERVAL '2' DAY", parseDate("1984-04-21"))
- .addExpr("DATE '1984-04-19' + INTERVAL '1' MONTH", parseDate("1984-05-19"))
- .addExpr("DATE '1984-04-19' + INTERVAL '3' YEAR", parseDate("1987-04-19"))
+ .addExpr(
+ "DATE '1984-04-19' + INTERVAL '2' DAY",
+ LocalDate.parse("1984-04-21"),
+ CalciteUtils.DATE)
+ .addExpr(
+ "DATE '1984-04-19' + INTERVAL '1' MONTH",
+ LocalDate.parse("1984-05-19"),
+ CalciteUtils.DATE)
+ .addExpr(
+ "DATE '1984-04-19' + INTERVAL '3' YEAR",
+ LocalDate.parse("1987-04-19"),
+ CalciteUtils.DATE)
.addExpr("TIME '14:28:30' + INTERVAL '15' SECOND", parseTime("14:28:45"))
.addExpr("TIME '14:28:30.239' + INTERVAL '4' MINUTE", parseTime("14:32:30.239"))
.addExpr("TIME '14:28:30.2' + INTERVAL '4' HOUR", parseTime("18:28:30.2"));
@@ -1317,9 +1327,18 @@ public void testTimestampMinusInterval() {
.addExpr(
"TIMESTAMP '1984-01-19 01:01:58' - INTERVAL '1' YEAR",
parseTimestampWithUTCTimeZone("1983-01-19 01:01:58"))
- .addExpr("DATE '1984-04-19' - INTERVAL '2' DAY", parseDate("1984-04-17"))
- .addExpr("DATE '1984-04-19' - INTERVAL '1' MONTH", parseDate("1984-03-19"))
- .addExpr("DATE '1984-04-19' - INTERVAL '3' YEAR", parseDate("1981-04-19"))
+ .addExpr(
+ "DATE '1984-04-19' - INTERVAL '2' DAY",
+ LocalDate.parse("1984-04-17"),
+ CalciteUtils.DATE)
+ .addExpr(
+ "DATE '1984-04-19' - INTERVAL '1' MONTH",
+ LocalDate.parse("1984-03-19"),
+ CalciteUtils.DATE)
+ .addExpr(
+ "DATE '1984-04-19' - INTERVAL '3' YEAR",
+ LocalDate.parse("1981-04-19"),
+ CalciteUtils.DATE)
.addExpr("TIME '14:28:30' - INTERVAL '15' SECOND", parseTime("14:28:15"))
.addExpr("TIME '14:28:30.239' - INTERVAL '4' MINUTE", parseTime("14:24:30.239"))
.addExpr("TIME '14:28:30.2' - INTERVAL '4' HOUR", parseTime("10:28:30.2"));
diff --git a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/integrationtest/BeamSqlDateFunctionsIntegrationTest.java b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/integrationtest/BeamSqlDateFunctionsIntegrationTest.java
index 0342cf03beb68..c25d70e08823b 100644
--- a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/integrationtest/BeamSqlDateFunctionsIntegrationTest.java
+++ b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/integrationtest/BeamSqlDateFunctionsIntegrationTest.java
@@ -21,6 +21,7 @@
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
+import java.time.LocalDate;
import java.util.Iterator;
import org.apache.beam.sdk.extensions.sql.SqlTransform;
import org.apache.beam.sdk.testing.PAssert;
@@ -71,8 +72,12 @@ public Void apply(Iterable input) {
assertTrue(millis - row.getDateTime(1).getMillis() > -1000);
// CURRENT_DATE
- assertTrue(millis - row.getDateTime(2).getMillis() < MILLIS_PER_DAY);
- assertTrue(millis - row.getDateTime(2).getMillis() > -MILLIS_PER_DAY);
+ assertTrue(
+ millis - row.getLogicalTypeValue(2, LocalDate.class).toEpochDay() * MILLIS_PER_DAY
+ < MILLIS_PER_DAY);
+ assertTrue(
+ millis - row.getLogicalTypeValue(2, LocalDate.class).toEpochDay() * MILLIS_PER_DAY
+ > -MILLIS_PER_DAY);
// CURRENT_TIME
assertTrue(timeMillis - row.getDateTime(3).getMillis() < 1000);
diff --git a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/SqlStdOperatorMappingTable.java b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/SqlStdOperatorMappingTable.java
index 22b2de97caaf8..aa807fe80933d 100644
--- a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/SqlStdOperatorMappingTable.java
+++ b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/SqlStdOperatorMappingTable.java
@@ -92,13 +92,28 @@ public class SqlStdOperatorMappingTable {
FunctionSignatureId.FN_IFNULL,
FunctionSignatureId.FN_NULLIF,
+ // Date functions
+ FunctionSignatureId.FN_CURRENT_DATE, // current_date
+ FunctionSignatureId.FN_EXTRACT_FROM_DATE, // $extract
+ FunctionSignatureId.FN_DATE_FROM_YEAR_MONTH_DAY, // date
+ FunctionSignatureId.FN_DATE_FROM_TIMESTAMP, // date
+ // FunctionSignatureId.FN_DATE_FROM_DATETIME, // date
+ FunctionSignatureId.FN_DATE_ADD_DATE, // date_add
+ FunctionSignatureId.FN_DATE_SUB_DATE, // date_sub
+ FunctionSignatureId.FN_DATE_DIFF_DATE, // date_diff
+ FunctionSignatureId.FN_DATE_TRUNC_DATE, // date_trunc
+ FunctionSignatureId.FN_FORMAT_DATE, // format_date
+ FunctionSignatureId.FN_PARSE_DATE, // parse_date
+ FunctionSignatureId.FN_UNIX_DATE, // unix_date
+ FunctionSignatureId.FN_DATE_FROM_UNIX_DATE, // date_from_unix_date
+
// Timestamp functions
FunctionSignatureId.FN_CURRENT_TIMESTAMP, // current_timestamp
FunctionSignatureId.FN_EXTRACT_FROM_TIMESTAMP, // $extract
FunctionSignatureId.FN_STRING_FROM_TIMESTAMP, // string
FunctionSignatureId.FN_TIMESTAMP_FROM_STRING, // timestamp
FunctionSignatureId.FN_TIMESTAMP_FROM_DATE, // timestamp
- // FunctionSignatureId.FN_TIMESTAMP_FROM_DATETIME // timestamp
+ // FunctionSignatureId.FN_TIMESTAMP_FROM_DATETIME, // timestamp
FunctionSignatureId.FN_TIMESTAMP_ADD, // timestamp_add
FunctionSignatureId.FN_TIMESTAMP_SUB, // timestamp_sub
FunctionSignatureId.FN_TIMESTAMP_DIFF, // timestamp_diff
@@ -115,13 +130,9 @@ public class SqlStdOperatorMappingTable {
FunctionSignatureId.FN_TIMESTAMP_FROM_UNIX_MILLIS_INT64, // timestamp_from_unix_millis
// FunctionSignatureId.FN_TIMESTAMP_FROM_UNIX_MICROS_INT64, // timestamp_from_unix_micros
- // Date/Time/Datetime functions
- FunctionSignatureId.FN_EXTRACT_FROM_DATE,
+ // Time/Datetime functions
FunctionSignatureId.FN_EXTRACT_FROM_DATETIME,
- FunctionSignatureId.FN_EXTRACT_FROM_TIME,
- FunctionSignatureId.FN_DATE_FROM_YEAR_MONTH_DAY
- // TODO: FunctionSignatureId.FN_DATE_FROM_TIMESTAMP
- );
+ FunctionSignatureId.FN_EXTRACT_FROM_TIME);
// todo: Some of operators defined here are later overridden in ZetaSQLPlannerImpl.
// We should remove them from this table and add generic way to provide custom
@@ -314,11 +325,6 @@ public class SqlStdOperatorMappingTable {
// .put("sha256")
// .put("sha512")
- // date functions
- // .put("date_add", SqlStdOperatorTable.DATETIME_PLUS)
- // .put("date_sub", SqlStdOperatorTable.MINUS_DATE)
- .put("date", SqlOperators.DATE_OP)
-
// time functions
// .put("time_add", SqlStdOperatorTable.DATETIME_PLUS)
// .put("time_sub", SqlStdOperatorTable.MINUS_DATE)
diff --git a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/TestInput.java b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/TestInput.java
index a30825c2281d7..df5ff9b7d096e 100644
--- a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/TestInput.java
+++ b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/TestInput.java
@@ -18,20 +18,18 @@
package org.apache.beam.sdk.extensions.sql.zetasql;
import java.nio.charset.StandardCharsets;
+import java.time.LocalDate;
import java.util.Arrays;
-import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils.DateType;
-import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils.TimeType;
import org.apache.beam.sdk.extensions.sql.meta.provider.test.TestBoundedTable;
import org.apache.beam.sdk.schemas.Schema;
import org.apache.beam.sdk.schemas.Schema.FieldType;
+import org.apache.beam.sdk.schemas.logicaltypes.SqlTypes;
import org.apache.beam.sdk.values.Row;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap;
/** TestInput. */
class TestInput {
- public static final FieldType DATE = FieldType.logicalType(new DateType());
- public static final FieldType TIME = FieldType.logicalType(new TimeType());
public static final TestBoundedTable BASIC_TABLE_ONE =
TestBoundedTable.of(
@@ -155,20 +153,6 @@ class TestInput {
DateTimeUtils.parseTimestampWithUTCTimeZone("2018-07-01 21:26:13"),
7L);
- public static final TestBoundedTable TIME_TABLE =
- TestBoundedTable.of(
- Schema.builder()
- .addNullableField("f_date", DATE)
- .addNullableField("f_time", TIME)
- .addNullableField("f_timestamp", FieldType.DATETIME)
- .addNullableField("f_timestamp_with_time_zone", FieldType.DATETIME)
- .build())
- .addRows(
- DateTimeUtils.parseTimestampWithUTCTimeZone("2018-07-11 00:00:00"),
- DateTimeUtils.parseTimestampWithUTCTimeZone("1970-01-01 12:33:59.348"),
- DateTimeUtils.parseTimestampWithUTCTimeZone("2018-12-20 23:59:59.999"),
- DateTimeUtils.parseTimestampWithTimeZone("2018-12-10 10:38:59-1000"));
-
public static final TestBoundedTable TABLE_ALL_NULL =
TestBoundedTable.of(
Schema.builder()
@@ -249,6 +233,16 @@ class TestInput {
ImmutableMap.of("MAP_KEY_1", "MAP_VALUE_1"),
Row.withSchema(structSchema).addValues(1L, "data1").build());
+ private static final Schema TABLE_WTH_DATE_SCHEMA =
+ Schema.builder()
+ .addLogicalTypeField("date_field", SqlTypes.DATE)
+ .addStringField("str_field")
+ .build();
+ public static final TestBoundedTable TABLE_WITH_DATE =
+ TestBoundedTable.of(TABLE_WTH_DATE_SCHEMA)
+ .addRows(LocalDate.of(2008, 12, 25), "str1")
+ .addRows(LocalDate.of(2020, 04, 07), "str2");
+
public static byte[] stringToBytes(String s) {
return s.getBytes(StandardCharsets.UTF_8);
}
diff --git a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlUtils.java b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlUtils.java
index f74e35ff245c0..63c89cbfa95a8 100644
--- a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlUtils.java
+++ b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlUtils.java
@@ -25,15 +25,16 @@
import com.google.zetasql.TypeFactory;
import com.google.zetasql.Value;
import com.google.zetasql.ZetaSQLType.TypeKind;
+import java.time.LocalDate;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.beam.sdk.annotations.Internal;
-import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils.DateType;
import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils.TimeType;
import org.apache.beam.sdk.schemas.Schema;
import org.apache.beam.sdk.schemas.Schema.Field;
import org.apache.beam.sdk.schemas.Schema.FieldType;
+import org.apache.beam.sdk.schemas.logicaltypes.SqlTypes;
import org.apache.beam.sdk.values.Row;
import org.apache.beam.vendor.calcite.v1_20_0.org.apache.calcite.sql.type.SqlTypeName;
import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.math.LongMath;
@@ -52,7 +53,8 @@ public final class ZetaSqlUtils {
private ZetaSqlUtils() {}
- public static SqlTypeName zetaSqlTypeToCalciteType(TypeKind zetaSqlType) {
+ // Type conversion: ZetaSQL => Calcite
+ public static SqlTypeName zetaSqlTypeToCalciteTypeName(TypeKind zetaSqlType) {
switch (zetaSqlType) {
case TYPE_INT64:
return SqlTypeName.BIGINT;
@@ -62,6 +64,8 @@ public static SqlTypeName zetaSqlTypeToCalciteType(TypeKind zetaSqlType) {
return SqlTypeName.DOUBLE;
case TYPE_STRING:
return SqlTypeName.VARCHAR;
+ case TYPE_DATE:
+ return SqlTypeName.DATE;
case TYPE_TIMESTAMP:
return SqlTypeName.TIMESTAMP;
case TYPE_BOOL:
@@ -70,10 +74,11 @@ public static SqlTypeName zetaSqlTypeToCalciteType(TypeKind zetaSqlType) {
return SqlTypeName.VARBINARY;
// TODO[BEAM-9179] Add conversion code for ARRAY and ROW types
default:
- throw new IllegalArgumentException("Unsupported ZetaSQL type: " + zetaSqlType.name());
+ throw new UnsupportedOperationException("Unknown ZetaSQL type: " + zetaSqlType.name());
}
}
+ // Type conversion: Beam => ZetaSQL
public static Type beamFieldTypeToZetaSqlType(FieldType fieldType) {
switch (fieldType.getTypeName()) {
case INT64:
@@ -93,31 +98,22 @@ public static Type beamFieldTypeToZetaSqlType(FieldType fieldType) {
case BYTES:
return TypeFactory.createSimpleType(TypeKind.TYPE_BYTES);
case ARRAY:
- return createZetaSqlArrayTypeFromBeamElementFieldType(fieldType.getCollectionElementType());
+ return beamElementFieldTypeToZetaSqlArrayType(fieldType.getCollectionElementType());
case ROW:
- return createZetaSqlStructTypeFromBeamSchema(fieldType.getRowSchema());
+ return beamSchemaToZetaSqlStructType(fieldType.getRowSchema());
case LOGICAL_TYPE:
- switch (fieldType.getLogicalType().getIdentifier()) {
- case DateType.IDENTIFIER:
- return TypeFactory.createSimpleType(TypeKind.TYPE_DATE);
- case TimeType.IDENTIFIER:
- return TypeFactory.createSimpleType(TypeKind.TYPE_TIME);
- default:
- throw new IllegalArgumentException(
- "Unsupported Beam logical type: " + fieldType.getLogicalType().getIdentifier());
- }
+ return beamLogicalTypeToZetaSqlType(fieldType.getLogicalType().getIdentifier());
default:
throw new UnsupportedOperationException(
- "Unsupported Beam fieldType: " + fieldType.getTypeName());
+ "Unknown Beam fieldType: " + fieldType.getTypeName());
}
}
- private static ArrayType createZetaSqlArrayTypeFromBeamElementFieldType(
- FieldType elementFieldType) {
+ private static ArrayType beamElementFieldTypeToZetaSqlArrayType(FieldType elementFieldType) {
return TypeFactory.createArrayType(beamFieldTypeToZetaSqlType(elementFieldType));
}
- public static StructType createZetaSqlStructTypeFromBeamSchema(Schema schema) {
+ public static StructType beamSchemaToZetaSqlStructType(Schema schema) {
return TypeFactory.createStructType(
schema.getFields().stream()
.map(ZetaSqlUtils::beamFieldToZetaSqlStructField)
@@ -128,6 +124,17 @@ private static StructField beamFieldToZetaSqlStructField(Field field) {
return new StructField(field.getName(), beamFieldTypeToZetaSqlType(field.getType()));
}
+ private static Type beamLogicalTypeToZetaSqlType(String identifier) {
+ if (SqlTypes.DATE.getIdentifier().equals(identifier)) {
+ return TypeFactory.createSimpleType(TypeKind.TYPE_DATE);
+ } else if (TimeType.IDENTIFIER.equals(identifier)) {
+ return TypeFactory.createSimpleType(TypeKind.TYPE_TIME);
+ } else {
+ throw new UnsupportedOperationException("Unknown Beam logical type: " + identifier);
+ }
+ }
+
+ // Value conversion: Beam => ZetaSQL
public static Value javaObjectToZetaSqlValue(Object object, FieldType fieldType) {
if (object == null) {
return Value.createNullValue(beamFieldTypeToZetaSqlType(fieldType));
@@ -153,9 +160,11 @@ public static Value javaObjectToZetaSqlValue(Object object, FieldType fieldType)
(List