1.0.0

vardecab · Mar 7, 2021 · e2deaf4 · e2deaf4
1 parent 9869d00
commit e2deaf4
Show file tree

Hide file tree

Showing 4 changed files with 89 additions and 68 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,5 @@
 _old
-output
+output/*
+!output/.gitkeep
 TODO
 .DS_Store
diff --git a/README.md b/README.md
@@ -1,25 +1,24 @@
 # kindle-words
 
-🚧🚧🚧 As of Jan 2021, the script **does not** work [due to a bug in the underlying API](https://github.com/ssut/py-googletrans/issues/264). As soon as the bug in the API is fixed, I'll update the script. 🚧🚧🚧
-
 > Do something useful with your Kindle notes :) This script extracts individual words from `My Clippings` file hidden on your Kindle e-reader, translates them using Google Translate and exports the pair "original word" → "translation" into a `.txt` file from which you can learn these words or import them into an application such as [Quizlet](https://quizlet.com/).
 
 <!-- ## Screenshots -->
 
 <!-- ## How to use -->
 
 ## Roadmap
-
-- Use DeepL rather than Google Translate. 
-- Dictionary definitions.
-- <del>Improve regex formula to better deal with words that have special characters.</del>
-- <del>Extract single words from source file.</del>
-- <del>Output list line by line.</del> 
-- <del>Use API to translate words.</del>
-- <del>Skip the same words on subsequent imports.</del>
+ 
+- 🎯 Dictionary definitions.
+- ✅ <del>Improve regex formula to better deal with words that have special characters.</del>
+- ✅ <del>Extract single words from source file.</del>
+- ✅ <del>Output list line by line.</del> 
+- ✅ <del>Use API to translate words.</del>
+- ✅ <del>Skip the same words on subsequent imports.</del>
+- ❌ Use DeepL rather than Google Translate.
 
 ## Release History
 
+- 1.0.0: Using new API - [deep-translator](https://github.com/nidhaloff/deep-translator). 
 - 0.12.5: Bug in the API discovered.
 - 0.12.4: Cleared up the code for better readability.
 - 0.12.3: Fixes to `regex` formula so it also takes words with `,` & `—`.
@@ -47,10 +46,15 @@ Using [SemVer](http:https://semver.org/).
 GNU General Public License v3.0, see [LICENSE.md](https://github.com/vardecab/umbrella/blob/master/LICENSE).
 
 ## Acknowledgements
-
-- https://stackabuse.com/text-translation-with-google-translate-api-in-python/
-- https://pypi.org/project/googletrans
-- https://regex101.com/
-- https://stackoverflow.com/questions/56995919/change-python-3-7-default-encoding-from-cp1252-to-cp65001-aka-utf-8
-- https://www.geeksforgeeks.org/print-lists-in-python-4-different-ways/
-- https://stackabuse.com/writing-to-a-file-with-pythons-print-function/
+### Packages
+- [deep-translator](https://github.com/nidhaloff/deep-translator)
+- [inputimeout](https://pypi.org/project/inputimeout/)
+### Articles
+- [Text Translation with Google Translate API in Python](https://stackabuse.com/text-translation-with-google-translate-api-in-python/)
+- [Change python 3.7 default encoding from cp1252 to cp65001 aka UTF-8](https://stackoverflow.com/questions/56995919/change-python-3-7-default-encoding-from-cp1252-to-cp65001-aka-utf-8)
+- [Print lists in Python](https://www.geeksforgeeks.org/print-lists-in-python-4-different-ways/)
+- [Writing to a File with Python's print() Function](https://stackabuse.com/writing-to-a-file-with-pythons-print-function/)
+- [Python switch case](https://www.journaldev.com/15642/python-switch-case)
+- [Using .write function with multiple arguments for writing to a txt file - Python](https://stackoverflow.com/questions/47425891/using-write-function-with-multiple-arguments-for-writing-to-a-txt-file-python)
+### Tools
+- [regex101](https://regex101.com/)
diff --git a/output/.gitkeep b/output/.gitkeep
diff --git a/script.py b/script.py
@@ -1,28 +1,22 @@
-# kindle-words
-# 0.12.4
-# !FIX: doesn't work due to https://github.com/ssut/py-googletrans/issues/264 
+### current date & time
+from datetime import datetime # have current date & time in exported files' names
+this_run_datetime = datetime.strftime(datetime.now(), '%y%m%d-%H%M%S') # eg 210120-173112; https://www.w3schools.com/python/python_datetime.asp
 
-### import libs 
-import re # regex; extract words
-from googletrans import Translator # Google Translate API; https://pypi.org/project/googletrans
-import os # create new folders
-
-### current date 
-from datetime import datetime # have current date in exported files' names
-today_date = datetime.strftime(datetime.now(), '%y%m%d-%f') # https://www.w3schools.com/python/python_datetime.asp
-
-### script runtime
+### start + run time
 import time
 start_time = time.time() 
+print("Starting...")
 
-### fix for "'charmap' codec can't encode character (...)" problem; https://stackoverflow.com/questions/56995919/change-python-3-7-default-encoding-from-cp1252-to-cp65001-aka-utf-8
+### *NOTE: fix for "'charmap' codec can't encode character (...)" problem; https://stackoverflow.com/questions/56995919/change-python-3-7-default-encoding-from-cp1252-to-cp65001-aka-utf-8
 import sys
 import io
 sys.stdout = io.TextIOWrapper(sys.stdout.detach(), encoding = 'utf-8')
 sys.stderr = io.TextIOWrapper(sys.stderr.detach(), encoding = 'utf-8')
 
-### path to file; https://www.journaldev.com/15642/python-switch-case
-path = {
+### path to file: https://www.journaldev.com/15642/python-switch-case
+from sys import platform # check platform (Windows/Linux/macOS)
+if platform == 'win32':
+ path = {
  'C' : r'C:\documents\My Clippings.txt',
  'c' : r'C:\documents\My Clippings.txt',
  'D' : r'D:\documents\My Clippings.txt',
@@ -31,10 +25,12 @@
  'e' : r'E:\documents\My Clippings.txt',
  'F' : r'F:\documents\My Clippings.txt',
  'f' : r'F:\documents\My Clippings.txt'
-}
+ }
+# elif platform == "darwin": # macOS
+ # path = {} # TODO
 
 ### input timeout
-from inputimeout import inputimeout, TimeoutOccurred # input timeout; https://pypi.org/project/inputimeout/
+from inputimeout import inputimeout, TimeoutOccurred # input timeout: https://pypi.org/project/inputimeout/
 
 # select source language
 try:
@@ -50,63 +46,83 @@
  print ("Time ran out, selecting default target language (pl)...")
  select_target_language = 'pl'
 
-# select Kindle driver letter
+# select Kindle drive letter
 try:
  kindle_drive_letter = inputimeout(prompt="Enter the drive letter that is assigned to your Kindle (C/D/E/F): ", timeout=3)
  with io.open(path.get(kindle_drive_letter,r'x:\documents\My Clippings.txt'), "r", encoding="utf-8") as source_file: 
  read_source_file = source_file.read()
 except TimeoutOccurred:
- print ("Time ran out, selecting default drive (D)...")
- # with io.open(r'_old/test.', "r", encoding="utf-8") as source_file: # *NOTE: test
- with io.open(r'D:\documents\My Clippings.txt', "r", encoding="utf-8") as source_file: # *NOTE: prod 
+ print ("Time ran out.")
+ # *NOTE: test
+ # with io.open(r'./_old/test.txt', "r", encoding="utf-8") as source_file: 
+ # print('Selecting test file...')
+ # read_source_file = source_file.read()
+ # *NOTE: prod 
+ with io.open(r'D:\documents\My Clippings.txt', "r", encoding="utf-8") as source_file: 
+ print('Selecting default drive (D)...')
  read_source_file = source_file.read()
 
 ### regex formula 
+import re # regex; extract words
 regex_find_single_words = re.compile(r"^[\w'’“\-\.\,\—]+$", re.MULTILINE) # experiment; version to include , & —
 
 ### find single words in the source file 
+print('Looking for words...')
 single_words_with_special_characters = re.findall(regex_find_single_words,read_source_file)
-# print ("Single words: ", single_words) # 🐛 debug (with duplicates)
-print ("With dupes: ", len(single_words_with_special_characters)) # 🐛 debug (how many words in the list)
+# print ("Single words: ", single_words) # debug (with duplicates)
+print ("Found", len(single_words_with_special_characters), 'words.') # debug (how many words in the list)
 
 ### remove duplicates from the list
-single_words_with_special_characters = list(dict.fromkeys(single_words_with_special_characters))
-print ("Without dupes: ", len(single_words_with_special_characters)) # 🐛 debug (how many words in the list)
+print('Removing duplicates...')
+original_words = list(dict.fromkeys(single_words_with_special_characters)) # final list of original words without duplicates etc.
+print ("There are", len(original_words), 'words.') # debug (how many words in the list)
 
 ## print single words line by line & export file 
-# print (*single_words_with_special_characters, sep = "\n") # 🐛 debug
-output_lines = '\n'.join(map(str, single_words_with_special_characters)) # https://www.geeksforgeeks.org/print-lists-in-python-4-different-ways/
-if not os.path.isdir("/output/" + today_date):
- os.mkdir("output/" + today_date)
- print ("Folder created: " + today_date)
-with open(r"output/" + today_date + "/output-imperfect-" + today_date + ".txt", "w", encoding="utf-8") as output: 
+import os # create new folders
+# print (*single_words_with_special_characters, sep = "\n") # debug
+print('Creating a folder & exporting words to a file...')
+output_lines = '\n'.join(map(str, original_words)) # https://www.geeksforgeeks.org/print-lists-in-python-4-different-ways/
+if not os.path.isdir("/output/" + this_run_datetime):
+ os.mkdir("output/" + this_run_datetime)
+ print ("Folder created: " + this_run_datetime)
+with open(r"output/" + this_run_datetime + "/output-" + this_run_datetime + ".txt", "w", encoding="utf-8") as output: 
  output.write(output_lines.lower())
 
 ### take single words with special characters from the file and remove unnecessary chars (eg ".-)
-with io.open(r"output/" + today_date + "/output-imperfect-" + today_date + ".txt", "r", encoding="utf-8") as source_file: 
+print('Removing unnecessary characters...')
+with io.open(r"output/" + this_run_datetime + "/output-" + this_run_datetime + ".txt", "r", encoding="utf-8") as source_file: 
  read_source_file = source_file.read()
 
 single_words = re.findall(r"\b\w*[-'’]\w*\b|\w+",read_source_file)
 output_lines = '\n'.join(map(str, single_words))
-with open(r"output/" + today_date + "/output-perfect-" + today_date + ".txt", "w", encoding="utf-8") as output: 
+with open(r"output/" + this_run_datetime + "/output-original_words-" + this_run_datetime + ".txt", "w", encoding="utf-8") as output: 
  output.write(output_lines.lower())
 
 ### translation
-Translator = Translator()
-
-# print directly to the file: https://stackabuse.com/writing-to-a-file-with-pythons-print-function/
-original_stdout = sys.stdout # save a reference to the original standard output
-# with open(r"C:\Users\x\Desktop\kindle-words_export.txt", "w", encoding="utf-8") as export_translations: # "a" → append, "w" → write
-with open(r"output/" + today_date + "/kindle-words_export-" + today_date + ".txt", "w", encoding="utf-8") as export_translations: # NOTE: "a" → append, "w" → write
- translations = Translator.translate(single_words, src=select_source_language, dest=select_target_language) # NOTE: / FIXME: black box - whole thing is ran inside which means progress bar won't work
- # for translation in tqdm(translations):
- # counter = 1 # progress
- for translation in translations:
- # print ("Word:", str(counter), "/", len(single_words)) # step in the process; progress
- sys.stdout = export_translations # output to the file above
- print (translation.origin, ' -> ', translation.text)
- sys.stdout = original_stdout # reset the standard output to its original value
- # counter += 1 # progress 
+from deep_translator import GoogleTranslator
+
+print('Translating...')
+translated = GoogleTranslator(source=select_source_language, target=select_target_language).translate_file(r"output/" + this_run_datetime + "/output-original_words-" + this_run_datetime + ".txt")
+# print(type(translated)) # debug
+with open(r"output/" + this_run_datetime + "/output-translated_words-" + this_run_datetime + ".txt", "w", encoding="utf-8") as export_translations: 
+ for word in translated:
+ # print(translated)
+ export_translations.write(word)
+print('Translated, nice!')
+
+### export a pair: original → translated 
+with open(r"output/" + this_run_datetime + "/output-translated_words-" + this_run_datetime + ".txt", "r", encoding="utf-8") as import_translations:
+ translated_words = import_translations.read().splitlines()
+# print(len(translated_words)) # debug; check if == 
+
+with open(r"output/" + this_run_datetime + "/kindle-words_export-" + this_run_datetime + ".txt", "w", encoding="utf-8") as export_pairs:
+ for original, translated in zip(original_words, translated_words):
+ # print(str(original + " → " + translated)) # debug 
+ export_pairs.write((str(original + " → " + translated + "\n"))) # write() can't take more than 1 argument so we need to str()
+print('Final file exported!')
 
 ### runtime 
-print("Script runtime: %.2f seconds" % (time.time() - start_time), "with", len(single_words), "translations.") 
+end_time = time.time() # run time end 
+run_time = round(end_time-start_time,2)
+print("Script run time:", run_time, "seconds.")
+# print("Script run time:", run_time, "seconds.""with", len(single_words), "translations.")