Skip to content

Commit

Permalink
1.6
Browse files Browse the repository at this point in the history
  • Loading branch information
vardecab committed Mar 7, 2023
1 parent fe956a0 commit 39088e8
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 23 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# kindle-words

![Not Maintained](https://img.shields.io/badge/Maintenance%20Level-Not%20Maintained-yellow.svg)

<b>As of early 2023, it's likely the script doesn't work due to API changes.</b>

<br><hr><br>

![](https://img.shields.io/badge/platform-Windows%20%7C%20macOS-blue)

> Do something useful with your Kindle notes :) This script extracts individual words from `My Clippings` file hidden on your Kindle e-reader, translates them using Google Translate and exports the pair "original word" → "translation" into a `.txt` file from which you can learn these words or import them into an application such as [Quizlet](https://quizlet.com/).
Expand Down Expand Up @@ -72,6 +78,7 @@ python script.py

## Release History

- 1.6: Added backup functionality to save `My Clippings` file locally
- 1.5.2: Added one more rule to clean the data.
- 1.5.1: Fixed Windows 10 notifications bug.
- 1.5: Added language detection to skip translation of words already in desired language.
Expand Down
83 changes: 60 additions & 23 deletions script.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,28 @@
### start + run time
# ==================================== #
# kindle-words #
# ==================================== #

# --------- start + run time --------- #
import time
start_time = time.time()
print("Starting...")

### *NOTE: fix for "'charmap' codec can't encode character (...)" problem; https://stackoverflow.com/questions/56995919/change-python-3-7-default-encoding-from-cp1252-to-cp65001-aka-utf-8
# *NOTE: fix for "'charmap' codec can't encode character (...)" problem; https://stackoverflow.com/questions/56995919/change-python-3-7-default-encoding-from-cp1252-to-cp65001-aka-utf-8
import sys
import io
sys.stdout = io.TextIOWrapper(sys.stdout.detach(), encoding = 'utf-8')
sys.stderr = io.TextIOWrapper(sys.stderr.detach(), encoding = 'utf-8')

### notifications
# imports
# ----------- notifications ---------- #
from sys import platform # check platform (Windows/macOS)
if platform == "darwin":
import pync # macOS notifications
elif platform == 'win32':
from win10toast_click import ToastNotifier # Windows 10 notifications
toaster = ToastNotifier() # initialize win10toast

### path to file: https://www.journaldev.com/15642/python-switch-case
# ----------- path to file ----------- #
# https://www.journaldev.com/15642/python-switch-case
if platform == 'win32':
path = {
'C' : r'C:\documents\My Clippings.txt',
Expand All @@ -31,27 +35,27 @@
'f' : r'F:\documents\My Clippings.txt'
}

### input timeout
# ----------- input timeout ---------- #
from inputimeout import inputimeout, TimeoutOccurred # input timeout: https://pypi.org/project/inputimeout/
# timeout_time = 0 # *NOTE: test
timeout_time = 5 # *NOTE: prod
print(f'Script will wait {timeout_time} seconds for the input and then will continue with a default value.')

# select source language
# ------ select source language ------ #
try:
select_source_language = inputimeout(prompt="Enter the source language (default: en): ", timeout=timeout_time)
except TimeoutOccurred:
print ("Time ran out, selecting default source language (en)...")
select_source_language = 'en'

# select target language
# ------ select target language ------ #
try:
select_target_language = inputimeout(prompt="Enter the target language (default: pl): ", timeout=timeout_time)
except TimeoutOccurred:
print ("Time ran out, selecting default target language (pl)...")
select_target_language = 'pl'

# select Kindle drive letter
# ---- select Kindle drive letter ---- #
if platform == 'win32': # Windows
try:
kindle_drive_letter = inputimeout(prompt="Enter the drive letter that is assigned to your Kindle (C/D/E/F) (default: D): ", timeout=timeout_time)
Expand Down Expand Up @@ -92,14 +96,47 @@
print ("Looks like this name doesn't work. Try a different one next time. Exiting...")
exit()

### create output & data folders
# ---------- backup the file --------- #
import shutil
from datetime import datetime # add current date to file name
import os

this_run_datetime = datetime.strftime(datetime.now(), '%y%m%d') # eg. 210120

# Windows location
try:
sourceLocation = path.get(kindle_drive_letter,r'x:\documents\My Clippings.txt')
moveToLocation = r"G:\My Drive\Betterment\Books\#Books' Notes"
shutil.copy2(sourceLocation, moveToLocation)
except:
pass

try:
os.rename(moveToLocation + "\My Clippings.txt", moveToLocation + "\My Clippings - " + this_run_datetime + ".txt")
except:
pass

# macOS
try:
sourceLocation = path.get(kindle_drive_letter,r'x:\documents\My Clippings.txt')
moveToLocation = r"/Volumes/GoogleDrive/My Drive/Betterment/Books/#Books' Notes"
shutil.copy2(sourceLocation, moveToLocation)
except:
pass

try:
os.rename(moveToLocation + "/My Clippings.txt", moveToLocation + "/My Clippings - " + this_run_datetime + ".txt")
except:
pass

# --- create output & data folders --- #
import os
if not os.path.exists('output'):
os.makedirs('output')
if not os.path.exists('data'):
os.makedirs('data')

### show the last word that was added on the previous run
# --- show the last word that was added on the previous run --- #
try:
with open('output/output-original_words.txt', 'r') as file:
lines = file.read().splitlines()
Expand All @@ -109,7 +146,7 @@
except FileNotFoundError:
print('First run - no file to load data.')

### list cleanup
# ----------- list cleanup ----------- #
read_source_file = [x for x in read_source_file if not any(x1.isdigit() for x1 in x)] # remove numbers
read_source_file = [word.replace('\n','') for word in read_source_file] # remove character
read_source_file = [word.replace(',','') for word in read_source_file] # remove character
Expand All @@ -126,7 +163,7 @@
read_source_file = [word.replace('‘','') for word in read_source_file] # remove character
read_source_file = [word.replace('==========','') for word in read_source_file] # remove characters

### add single words to the new list aka remove sentences etc.
# --- add single words to the new list aka remove sentences etc. --- #
single_words = [] # new list
for element in range(len(read_source_file)):
if len(read_source_file[element].split()) == 1: # only single words
Expand All @@ -139,7 +176,7 @@
single_words = list(dict.fromkeys(single_words)) # remove duplicates; https://www.w3schools.com/python/python_howto_remove_duplicates.asp
print ("There are", len(single_words), 'unique words in My Clippings file.')

### skip words already in target_language
# -- skip words already in target_language -- #
from langdetect import detect # language detection

words = []
Expand All @@ -149,7 +186,7 @@
words.append(single_words[word])
print(f'Without words already in {select_target_language.upper()}, there are {len(words)} words.')

### open saved list
# ---------- open saved list --------- #
import pickle
try:
with open ('data/saved_location', 'rb') as file_import:
Expand All @@ -158,7 +195,7 @@
except FileNotFoundError:
print('First run - no file to load data.')

### comparison
# ------------ comparison ------------ #
try:
difference = set(words) - set(saved_list) # what's new in words[]
if len(saved_list) == 0:
Expand All @@ -173,15 +210,15 @@
with open(r"output/output-original_words.txt", "w", encoding="utf-8") as output:
output.write(output_lines.lower())

### translation
# ------------ translation ----------- #
# split list to smaller lists to get around 5000-character-limit of deep-translator package
chunks = [to_translate[x:x+250] for x in range(0, len(to_translate), 250)] # split into sublists of 250 words each
print('List of words was split into:', len(chunks), 'chunk(s) for translation.') # debug; how many sublists are in this master list

from deep_translator import GoogleTranslator
print('Translating...')

### export a pair: original → translated
# -- export a pair: original → translated -- #
counter = 0
while counter <= len(chunks)-1: # -1 to make it work when len(chunks) == 1 and chunks[0] is the only one
translated_list = [] # new list
Expand All @@ -193,31 +230,31 @@
counter += 1
print('Words are translated & final file is exported!')

### notifications
# ----------- notifications ---------- #
if platform == "darwin":
pync.notify(f'Translated {len(to_translate)} words.', title='kindle-words', contentImage="https://i.postimg.cc/3R0tLQ3H/translation.png", sound="Funk") # appIcon="" doesn't work, using contentImage instead
elif platform == "win32":
toaster.show_toast(msg=f'Translated {len(to_translate)} words.', title="kindle-words", icon_path="./icons/translation.ico", duration=None, threaded=True) # duration=None - leave notification in Notification Center; threaded=True - rest of the script will be allowed to be executed while the notification is still active

### export list for future comparison
# --- export list for future comparison --- #
with open('data/saved_location', 'wb') as file_export:
pickle.dump(words, file_export)

### runtime
# -------------- runtime ------------- #
end_time = time.time() # run time end
run_time = round(end_time-start_time,2)
print(len(to_translate), 'words were translated in:', run_time, "seconds (" + str(round(run_time/60,2)), "minutes).")

else:
### notifications
# ----------- notifications ---------- #
if platform == "darwin":
pync.notify(f'Nothing new to translate.', title='kindle-words', contentImage="https://i.postimg.cc/3R0tLQ3H/translation.png", sound="Funk") # appIcon="" doesn't work, using contentImage instead
elif platform == "win32":
toaster.show_toast(msg=f'Nothing new to translate.', title="kindle-words", icon_path="./icons/translation.ico", duration=None, threaded=True) # duration=None - leave notification in Notification Center; threaded=True - rest of the script will be allowed to be executed while the notification is still active

print('Nothing new to translate. Exiting...')

### runtime
# -------------- runtime ------------- #
end_time = time.time() # run time end
run_time = round(end_time-start_time,2)
print("Script run time:", run_time, "seconds. That's", round(run_time/60,2), "minutes.")

0 comments on commit 39088e8

Please sign in to comment.