Skip to content

Commit

Permalink
Additional new Python Katas (apache#11078)
Browse files Browse the repository at this point in the history
* Add WithKeys lesson in Python Katas

* Add Windowing Adding Timestamp section using ParDo lesson

* Fix window import statement

* Add Windowing Fixed Time Window lesson

* Add WithKeys lesson in Python Katas

* Add Windowing Adding Timestamp section using ParDo lesson

* Fix window import statement

* Add Windowing Fixed Time Window lesson

* Update course on Stepik

* Add missing license header
  • Loading branch information
henryken committed Mar 10, 2020
1 parent d84b80b commit 0259a20
Show file tree
Hide file tree
Showing 30 changed files with 648 additions and 9 deletions.
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
id: 238438
update_date: Wed, 19 Jun 2019 09:57:03 UTC
update_date: Mon, 09 Mar 2020 14:44:48 UTC
unit: 210898
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
id: 238437
update_date: Wed, 19 Jun 2019 09:56:48 UTC
update_date: Mon, 09 Mar 2020 14:44:44 UTC
unit: 210897
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http:https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

type: edu
files:
- name: task.py
visible: true
placeholders:
- offset: 966
length: 37
placeholder_text: TODO()
- name: tests.py
visible: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id: 1124221
update_date: Mon, 09 Mar 2020 14:34:20 UTC
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http:https://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->

<html>
<h2>WithKeys</h2>
<p>
<b>Kata:</b> Convert each fruit name into a KV of its first letter and itself, e.g.
<code>apple => ('a', 'apple')</code>
</p>
<br>
<div class="hint">
Use <a href="https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.util.html#apache_beam.transforms.util.WithKeys">
WithKeys</a>.
</div>
</html>
27 changes: 27 additions & 0 deletions learning/katas/python/Common Transforms/WithKeys/WithKeys/task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http:https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import apache_beam as beam

from log_elements import LogElements

p = beam.Pipeline()

(p | beam.Create(['apple', 'banana', 'cherry', 'durian', 'guava', 'melon'])
| beam.WithKeys(lambda word: word[0:1])
| LogElements())

p.run()
49 changes: 49 additions & 0 deletions learning/katas/python/Common Transforms/WithKeys/WithKeys/tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http:https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from test_helper import failed, passed, \
get_answer_placeholders, get_file_output, test_is_not_empty, \
test_answer_placeholders_text_deleted


def test_filter():
placeholders = get_answer_placeholders()
placeholder = placeholders[0]

if 'beam.WithKeys' in placeholder:
passed()
else:
failed('Use beam.WithKeys')


def test_output():
output = get_file_output()

answers = ["('a', 'apple')", "('b', 'banana')", "('c', 'cherry')",
"('d', 'durian')", "('g', 'guava')", "('m', 'melon')"]

if all(kv in output for kv in answers):
passed()
else:
failed('Incorrect output. ' +
'Convert into a KV by its first letter and itself.')


if __name__ == '__main__':
test_is_not_empty()
test_answer_placeholders_text_deleted()
test_filter()
test_output()
21 changes: 21 additions & 0 deletions learning/katas/python/Common Transforms/WithKeys/lesson-info.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http:https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

content:
- WithKeys
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id: 316601
update_date: Mon, 09 Mar 2020 14:44:52 UTC
unit: 299309
1 change: 1 addition & 0 deletions learning/katas/python/Common Transforms/section-info.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@
content:
- Filter
- Aggregation
- WithKeys
2 changes: 1 addition & 1 deletion learning/katas/python/Examples/section-remote-info.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
id: 85647
update_date: Thu, 13 Jun 2019 14:16:54 UTC
update_date: Mon, 09 Mar 2020 14:34:14 UTC
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http:https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

type: edu
files:
- name: task.py
visible: true
placeholders:
- offset: 1211
length: 163
placeholder_text: TODO()
- offset: 1740
length: 30
placeholder_text: TODO()
- name: tests.py
visible: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id: 1124219
update_date: Mon, 09 Mar 2020 14:33:58 UTC
47 changes: 47 additions & 0 deletions learning/katas/python/Windowing/Adding Timestamp/ParDo/task.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http:https://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->

<html>
<h2>Adding Timestamp - ParDo</h2>
<p>
Bounded sources (such as a file from TextIO) do not provide timestamps for elements. If you need
timestamps, you must add them to your PCollection’s elements.
</p>
<p>
You can assign new timestamps to the elements of a PCollection by applying a ParDo transform that
outputs new elements with timestamps that you set.
</p>
<p>
<b>Kata:</b> Please assign each element a timestamp based on the the <code>Event.date</code>.
</p>
<br>
<div class="hint">
Use <a href="https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.ParDo">
ParDo</a> with
<a href="https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.DoFn">DoFn</a>.
</div>
<div class="hint">
Use <a href="https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.window.html#apache_beam.transforms.window.TimestampedValue">
beam.window.TimestampedValue</a> to assign timestamp to the element.
</div>
<div class="hint">
Refer to the Beam Programming Guide
<a href="https://beam.apache.org/documentation/programming-guide/#adding-timestamps-to-a-pcollections-elements">
"Adding timestamps to a PCollection’s elements"</a> section for more information.
</div>
</html>
55 changes: 55 additions & 0 deletions learning/katas/python/Windowing/Adding Timestamp/ParDo/task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http:https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import datetime
import time

import apache_beam as beam
from apache_beam.transforms import window

from log_elements import LogElements


class Event:
def __init__(self, id, event, date):
self.id = id
self.event = event
self.date = date

def __str__(self) -> str:
return f'Event({self.id}, {self.event}, {self.date})'


class AddTimestampDoFn(beam.DoFn):

def process(self, element, **kwargs):
unix_timestamp = time.mktime(element.date.timetuple())
yield window.TimestampedValue(element, unix_timestamp)


p = beam.Pipeline()

(p | beam.Create([
Event('1', 'book-order', datetime.date(2020, 3, 4)),
Event('2', 'pencil-order', datetime.date(2020, 3, 5)),
Event('3', 'paper-order', datetime.date(2020, 3, 6)),
Event('4', 'pencil-order', datetime.date(2020, 3, 7)),
Event('5', 'book-order', datetime.date(2020, 3, 8)),
])
| beam.ParDo(AddTimestampDoFn())
| LogElements(with_timestamp=True))

p.run()
64 changes: 64 additions & 0 deletions learning/katas/python/Windowing/Adding Timestamp/ParDo/tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http:https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from test_helper import failed, passed, \
get_answer_placeholders, get_file_output, test_is_not_empty, \
test_answer_placeholders_text_deleted


def test_dofn_process_method():
placeholders = get_answer_placeholders()
placeholder = placeholders[0]

if 'def process(self,' in placeholder:
passed()
else:
failed('Override "process" method')


def test_pardo():
placeholders = get_answer_placeholders()
placeholder = placeholders[1]

if 'beam.ParDo(AddTimestampDoFn())' in placeholder:
passed()
else:
failed('Use beam.ParDo')


def test_output():
output = get_file_output()

answers = [
"Event(1, book-order, 2020-03-04), timestamp='2020-03-03T16:00:00Z'",
"Event(2, pencil-order, 2020-03-05), timestamp='2020-03-04T16:00:00Z'",
"Event(3, paper-order, 2020-03-06), timestamp='2020-03-05T16:00:00Z'",
"Event(4, pencil-order, 2020-03-07), timestamp='2020-03-06T16:00:00Z'",
"Event(5, book-order, 2020-03-08), timestamp='2020-03-07T16:00:00Z'"
]

if all(line in output for line in answers):
passed()
else:
failed("Incorrect output. Assign timestamp based on the Event.date.")


if __name__ == '__main__':
test_is_not_empty()
test_answer_placeholders_text_deleted()
test_dofn_process_method()
test_pardo()
test_output()
Loading

0 comments on commit 0259a20

Please sign in to comment.