-
-
Notifications
You must be signed in to change notification settings - Fork 5
/
i18n.sh
executable file
·307 lines (257 loc) · 10.4 KB
/
i18n.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
#!/usr/bin/env bash
# Fail on errors.
set -eu -o pipefail
DIR_TOOLS=`dirname $0`
# Try to make the path names nice and relative to the root
# directory. The paths do end up in the resulting .po files, so we
# don't want this script to change the .po output based on where it is
# executed from.
PREVIOUS_CWD=`pwd`
cd ${DIR_TOOLS}/../
DIR_SRC=.
DIR_BUILD=${DIR_SRC}/build
DIR_PO=${DIR_SRC}/po
###################################################
# Generate PO (gettext) files from Markdown files #
###################################################
function md2po {
echo "Converting .md source into .pot files"
if [ -d ${DIR_BUILD} ]; then rm -r ${DIR_BUILD}; fi
generate_pot_file _docs
generate_pot_file _posts
generate_pot_file _pages
rm -r ${DIR_BUILD}
PAGES_POT=${DIR_PO}/_pages.pot
DOCS_POT=${DIR_PO}/_docs.pot
DOCS_TMP_POT=${DIR_PO}/_docs.pot
echo "Merging ${PAGES_POT} into ${DOCS_POT}"
msgcat -o ${DOCS_TMP_POT} ${PAGES_POT} ${DOCS_POT}
cp ${DOCS_TMP_POT} ${DOCS_POT}
rm ${DOCS_TMP_POT} ${PAGES_POT}
update_po_files _docs
update_po_files _posts
}
#
# Usage: generate_pot_file SRC_TYPE
#
# Where SRC_TYPE is either _posts, _pages, or _docs
# (i.e. directories with .md files that are translated into a single
# .pot file)
#
# This will:
# * Copy the original .md files, after stripping their metadata, to a temporary build directory.
# * For each .md file it invokes po4a to extract the strings into a .pot file.
# * Once all .md files have had their strings extracted, they are combined into a single .pot file using msgcat.
# * This .pot file is the thing which will end up getting translated.
#
function generate_pot_file {
SRC_TYPE=$1
SRC_SUBDIR=${DIR_SRC}/${SRC_TYPE}
BUILD_SUBDIR=${DIR_BUILD}/${SRC_TYPE}/md
DIR_BUILD_PO=${DIR_BUILD}/${SRC_TYPE}/po
OUT_PO_FILE=${DIR_PO}/${SRC_TYPE}.pot
echo "Generating .pot files for $SRC_TYPE:"
cp_md_strip_frontmatter_dir ${SRC_SUBDIR} ${BUILD_SUBDIR}
for MD in ${BUILD_SUBDIR}/*.md; do
FILE=`basename ${MD}`
NAME=${FILE%.*}
echo "Extracting strings from $MD"
mkdir -p ${DIR_BUILD_PO}
# For some reason these need to be .pot files instead of .po
# files for msgcat below to work correctly.
po4a-gettextize -f text -o markdown -L utf-8 -M utf-8 -m ${MD} -p ${DIR_BUILD_PO}/${NAME}.pot
done
echo "Combining .pot files into $OUT_PO_FILE"
mkdir -p `dirname ${OUT_PO_FILE}`
msgcat -o ${OUT_PO_FILE} ${DIR_BUILD_PO}/*.pot
}
#
# Updates each translated .po file with new/changed strings from the
# original English .po file. Uses `msgmerge` and supports fuzzy
# string matching.
#
function update_po_files {
SRC_TYPE=$1
PO=${DIR_PO}/${SRC_TYPE}.pot
if [ `check_for_po ${SRC_TYPE}` = true ]; then
for I18N_PO in ${DIR_PO}/${SRC_TYPE}.*.po; do
# The VERSION_CONTROL environment variable prevents a
# backup file from being written to ${SRC_TYPE}.LANG.po~
echo "Updating ${I18N_PO} with any changes from main .po file ${PO}."
VERSION_CONTROL=none msgmerge -U ${I18N_PO} ${PO}
done
fi
}
########################################
# Generate Markdown from i18n PO files #
########################################
function po2md {
echo "Converting .po files back into .md source"
if [ -d ${DIR_BUILD} ]; then rm -r ${DIR_BUILD}; fi
#generate_md_files _docs _docs
#generate_md_files _posts _posts
generate_md_files _pages _docs
rm -r "${DIR_BUILD}"
}
#
# Usage: generate_md_files SRC_TYPE POT_TYPE
#
# Where SRC_TYPE is either _posts, _pages, or _docs
# (i.e. directories with .md files that are translated into a single
# .po file) POT_TYPE is either _posts or _docs (i.e. files in
# po/POT_TYPE.pot and po/POT_TYPE.LANG.po)
#
# This will:
# * Copy the original .md files, after stripping their metadata, to a temporary build directory.
# * Uses these stripped .md files as the "master" document for po4a.
# * Iterate over each .po file, figuring out which language it is for based on its filename.
# * For each .md/language pair it invokes po4a to assemble a translated .md (still with stripped front matter)
# * Fetches the frontmatter from the original source .md file, and combines it with the translated, stripped .md file.
# * This is then output into the final translated .md file.
#
function generate_md_files {
SRC_TYPE=$1
POT_TYPE=$2
SRC_SUBDIR=${DIR_SRC}/${SRC_TYPE}
BUILD_SUBDIR=${DIR_BUILD}/${SRC_TYPE}
echo "Converting .md files (from $BUILD_SUBDIR) based on .po files..."
cp_md_strip_frontmatter_dir ${SRC_SUBDIR} ${BUILD_SUBDIR}/md
if [ `check_for_po ${POT_TYPE}` = true ]; then
for PO in ${DIR_PO}/${POT_TYPE}.*.po; do
PO_FILE=`basename ${PO}`
LANG=`echo ${PO_FILE} | sed -e "s/${POT_TYPE}\.\(.*\)\.po/\1/"`
OUT_DIR_I18N_MD=${DIR_SRC}/${SRC_TYPE}/${LANG}
BUILD_DIR_I18N_MD=${BUILD_SUBDIR}/${LANG}
echo "Generating $LANG translations from $PO_FILE..."
rm -rf ${OUT_DIR_I18N_MD}
mkdir -p ${OUT_DIR_I18N_MD} ${BUILD_DIR_I18N_MD}
for MD in ${BUILD_SUBDIR}/md/*.md; do
MD_FILE=`basename ${MD}`
OUT_TMP_MD_FILE=${BUILD_DIR_I18N_MD}/${MD_FILE}
OUT_MD_FILE=${OUT_DIR_I18N_MD}/${MD_FILE}
# Extract a the .po file for the translated markdown
# file. Count up how many translated strings there are
# for this file. If none, then don't bother converting
# (it will just take up space in our repo and make it
# harder to see what is actually translated). Need to
# take the `realpath`, becuase msggrep will fail with
# "./build/..." but succeed with "build/..."
SRC_MD_FILE=`realpath --relative-to . ${BUILD_SUBDIR}`/md/${MD_FILE}
TRANSLATED=`msggrep --location=${SRC_MD_FILE} ${DIR_PO}/${PO_FILE} | msgattrib --translated | wc -l`
if [ ${TRANSLATED} == "0" ]; then
echo "Ignoring untranslated $OUT_MD_FILE"
continue;
fi
echo "Translating $OUT_MD_FILE"
po4a-translate -f text -o markdown -L utf-8 -M utf-8 -m ${MD} -p ${PO} -l ${OUT_TMP_MD_FILE} -k 0
# Extract the front matter from the source and add it
# to the top of the final i18n .md file (after
# stripping the "# [TITLE]" line we added
# earlier). This is used to replace the "title:" from
# the translated .md file and replace it with the i18n
# "title:". In the process we ensure that the
# frontmatter contains the correct `lang:` attribute.
TITLE=`head -n 1 ${OUT_TMP_MD_FILE} | sed 's/^# //'`
extract_frontmatter ${SRC_SUBDIR}/${MD_FILE} | sed "s/^title:.*/title: $TITLE\nlang: $LANG/" >> ${OUT_MD_FILE}
# Finally, copy the translated .md file with no
# frontmatter, and without the "# Title" we previously
# injected into there either, into the final .md file.
tail -n +2 ${OUT_TMP_MD_FILE} >> ${OUT_MD_FILE}
done
done
fi
}
#################################################
# Helper functions used by both po2md and md2po #
#################################################
#
# Helper to check if there are any SRC_TYPE.LANG.po files. This helps
# to not try and iterate over the files if they don't exist
#
# Usage: check_for_po SRC_TYPE
#
function check_for_po {
SRC_TYPE=$1
if compgen -G "$DIR_PO/$SRC_TYPE.*.po" > /dev/null;
then
echo true
else
echo false
fi
}
#
# A helper function for generate_po_files and generate_md_files
# because they both need to do the same thing. That is, they both
# need to strip the frontmatter, then add back in a pseudo "# Title"
# line, where "Title" is read from the frontmatters "title: "
# attribute, and then write to a temporary build directory.
#
# Usage: cp_md_strip_frontmatter_dir SRC_MD_DIR BUILD_MD_DIR
#
# Where SRC_MD_DIR contains .md files with frontmatter (delinieated
# by ---) and BUILD_MD_DIR iw where the resulting .md files are to
# be copied, after stripping their frontmatter.
#
function cp_md_strip_frontmatter_dir {
SRC_MD_DIR=$1
BUILD_MD_DIR=$2
echo "Copying .md files and stripping front matter..."
mkdir -p ${BUILD_MD_DIR}
for MD in ${SRC_MD_DIR}/*.md; do
FILE=`basename ${MD}`
SRC_MD_FILE=${SRC_MD_DIR}/${FILE}
BUILD_MD_FILE=${BUILD_MD_DIR}/${FILE}
# We cheat, by stripping the front matter, and replacing it
# with "# Title" (where Title is taken from the "title: "
# attribute in the frontmatter we are stripping. Then we can
# remove that line when it comes time to reassemble the
# translated files again. This ensures that po4a is able to
# make the title available for translation, and also that it
# is alongside the rest of the document when Weblate shows
# each of the strings that belong to a document.
TITLE=`extract_frontmatter ${SRC_MD_FILE} | grep 'title:' | sed 's/title:\s*//'`
echo "# $TITLE" > ${BUILD_MD_FILE}
# Strip front-matter from .md files and write to temporary
# location. http:https://stackoverflow.com/a/28222257/2391921
sed '1 { /^---/ { :a N; /\n---/! ba; d} }' ${SRC_MD_FILE} >> ${BUILD_MD_FILE}
done
}
function extract_frontmatter {
FILE=$1
# See http:https://stackoverflow.com/a/7167115/2391921 for matching
# multiline strings with grep The -z flag replaces new lines with
# NUL resulting in "Binary file matches" rather than more useful
# output (i.e. the actual matching content). The -a switch makes
# grep interpret the output like text again. For some reason
# though on my machine there is still a NUL byte at the end which
# trips up this script, so sed replaces it with a newline.
grep -Pzao '(?s)---.*?---\n' ${FILE} | sed 's/\x00/\n/'
}
################################
# CLI interface to this script #
################################
function print_usage {
cat << EOT
Internationalization script for F-Droid Jekyll website.
Usage:
i18n.sh po2md
Convert all .md source files into .po files ready to be translated.
i18n.sh md2po
Convert all translated .po files into localized .md files.
EOT
cd ${PREVIOUS_CWD}
exit 0
}
if test $# -lt 1
then
print_usage
else
case "$1" in
md2po) md2po
;;
po2md) po2md
;;
*) print_usage
esac
fi
cd ${PREVIOUS_CWD}