-
Notifications
You must be signed in to change notification settings - Fork 401
/
i18n.py
354 lines (297 loc) · 12 KB
/
i18n.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
# i18n.py
#
# Copyright (C) 2012-2016 Red Hat, Inc.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions of
# the GNU General Public License v.2, or (at your option) any later version.
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY expressed or implied, including the implied warranties of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details. You should have received a copy of the
# GNU General Public License along with this program; if not, write to the
# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA. Any Red Hat trademarks that are incorporated in the
# source code or documentation are not subject to the GNU General Public
# License and may only be used or replicated with the express permission of
# Red Hat, Inc.
#
from __future__ import print_function
from __future__ import unicode_literals
from dnf.pycomp import unicode
import dnf
import locale
import os
import signal
import sys
import unicodedata
"""
Centralize i18n stuff here. Must be unittested.
"""
class UnicodeStream(object):
def __init__(self, stream, encoding):
self.stream = stream
self.encoding = encoding
def write(self, s):
if not isinstance(s, str):
s = (s.decode(self.encoding, 'replace') if dnf.pycomp.PY3 else
s.encode(self.encoding, 'replace'))
try:
self.stream.write(s)
except UnicodeEncodeError:
s_bytes = s.encode(self.stream.encoding, 'backslashreplace')
if hasattr(self.stream, 'buffer'):
self.stream.buffer.write(s_bytes)
else:
s = s_bytes.decode(self.stream.encoding, 'ignore')
self.stream.write(s)
def __getattr__(self, name):
return getattr(self.stream, name)
def _full_ucd_support(encoding):
"""Return true if encoding can express any Unicode character.
Even if an encoding can express all accented letters in the given language,
we can't generally settle for it in DNF since sometimes we output special
characters like the registered trademark symbol (U+00AE) and surprisingly
many national non-unicode encodings, including e.g. ASCII and ISO-8859-2,
don't contain it.
"""
if encoding is None:
return False
lower = encoding.lower()
if lower.startswith('utf-') or lower.startswith('utf_'):
return True
return False
def _guess_encoding():
""" Take the best shot at the current system's string encoding. """
encoding = locale.getpreferredencoding(False)
return 'utf-8' if encoding.startswith("ANSI") else encoding
def setup_locale():
try:
dnf.pycomp.setlocale(locale.LC_ALL, '')
except locale.Error:
# default to C.UTF-8 or C locale if we got a failure.
try:
dnf.pycomp.setlocale(locale.LC_ALL, 'C.UTF-8')
os.environ['LC_ALL'] = 'C.UTF-8'
except locale.Error:
dnf.pycomp.setlocale(locale.LC_ALL, 'C')
os.environ['LC_ALL'] = 'C'
print('Failed to set locale, defaulting to {}'.format(os.environ['LC_ALL']),
file=sys.stderr)
def setup_stdout():
""" Check that stdout is of suitable encoding and handle the situation if
not.
Returns True if stdout was of suitable encoding already and no changes
were needed.
"""
stdout = sys.stdout
if not stdout.isatty():
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
try:
encoding = stdout.encoding
except AttributeError:
encoding = None
if not _full_ucd_support(encoding):
sys.stdout = UnicodeStream(stdout, _guess_encoding())
return False
return True
def ucd_input(ucstring):
# :api, deprecated in 2.0.0, will be erased when python2 is abandoned
""" It uses print instead of passing the prompt to raw_input.
raw_input doesn't encode the passed string and the output
goes into stderr
"""
print(ucstring, end='')
return dnf.pycomp.raw_input()
def ucd(obj):
# :api, deprecated in 2.0.0, will be erased when python2 is abandoned
""" Like the builtin unicode() but tries to use a reasonable encoding. """
if dnf.pycomp.PY3:
if dnf.pycomp.is_py3bytes(obj):
return str(obj, _guess_encoding(), errors='ignore')
elif isinstance(obj, str):
return obj
return str(obj)
else:
if isinstance(obj, dnf.pycomp.unicode):
return obj
if hasattr(obj, '__unicode__'):
# see the doc for the unicode() built-in. The logic here is: if obj
# implements __unicode__, let it take a crack at it, but handle the
# situation if it fails:
try:
return dnf.pycomp.unicode(obj)
except UnicodeError:
pass
return dnf.pycomp.unicode(str(obj), _guess_encoding(), errors='ignore')
# functions for formatting output according to terminal width,
# They should be used instead of build-in functions to count on different
# widths of Unicode characters
def _exact_width_char(uchar):
return 2 if unicodedata.east_asian_width(uchar) in ('W', 'F') else 1
def chop_str(msg, chop=None):
""" Return the textual width of a Unicode string, chopping it to
a specified value. This is what you want to use instead of %.*s, as it
does the "right" thing with regard to different Unicode character width
Eg. "%.*s" % (10, msg) <= becomes => "%s" % (chop_str(msg, 10)) """
if chop is None:
return exact_width(msg), msg
width = 0
chopped_msg = ""
for char in msg:
char_width = _exact_width_char(char)
if width + char_width > chop:
break
chopped_msg += char
width += char_width
return width, chopped_msg
def exact_width(msg):
""" Calculates width of char at terminal screen
(Asian char counts for two) """
return sum(_exact_width_char(c) for c in msg)
def fill_exact_width(msg, fill, chop=None, left=True, prefix='', suffix=''):
""" Expand a msg to a specified "width" or chop to same.
Expansion can be left or right. This is what you want to use instead of
%*.*s, as it does the "right" thing with regard to different Unicode
character width.
prefix and suffix should be used for "invisible" bytes, like
highlighting.
Examples:
``"%-*.*s" % (10, 20, msg)`` becomes
``"%s" % (fill_exact_width(msg, 10, 20))``.
``"%20.10s" % (msg)`` becomes
``"%s" % (fill_exact_width(msg, 20, 10, left=False))``.
``"%s%.10s%s" % (pre, msg, suf)`` becomes
``"%s" % (fill_exact_width(msg, 0, 10, prefix=pre, suffix=suf))``.
"""
width, msg = chop_str(msg, chop)
if width >= fill:
if prefix or suffix:
msg = ''.join([prefix, msg, suffix])
else:
extra = " " * (fill - width)
if left:
msg = ''.join([prefix, msg, suffix, extra])
else:
msg = ''.join([extra, prefix, msg, suffix])
return msg
def textwrap_fill(text, width=70, initial_indent='', subsequent_indent=''):
""" Works like we want textwrap.wrap() to work, uses Unicode strings
and doesn't screw up lists/blocks/etc. """
def _indent_at_beg(line):
count = 0
byte = 'X'
for byte in line:
if byte != ' ':
break
count += 1
if byte not in ("-", "*", ".", "o", '\xe2'):
return count, 0
list_chr = chop_str(line[count:], 1)[1]
if list_chr in ("-", "*", ".", "o",
"\u2022", "\u2023", "\u2218"):
nxt = _indent_at_beg(line[count+len(list_chr):])
nxt = nxt[1] or nxt[0]
if nxt:
return count, count + 1 + nxt
return count, 0
text = text.rstrip('\n')
lines = text.replace('\t', ' ' * 8).split('\n')
ret = []
indent = initial_indent
wrap_last = False
csab = 0
cspc_indent = 0
for line in lines:
line = line.rstrip(' ')
(lsab, lspc_indent) = (csab, cspc_indent)
(csab, cspc_indent) = _indent_at_beg(line)
force_nl = False # We want to stop wrapping under "certain" conditions:
if wrap_last and cspc_indent: # if line starts a list or
force_nl = True
if wrap_last and csab == len(line): # is empty line
force_nl = True
# if line doesn't continue a list and is "block indented"
if wrap_last and not lspc_indent:
if csab >= 4 and csab != lsab:
force_nl = True
if force_nl:
ret.append(indent.rstrip(' '))
indent = subsequent_indent
wrap_last = False
if csab == len(line): # empty line, remove spaces to make it easier.
line = ''
if wrap_last:
line = line.lstrip(' ')
cspc_indent = lspc_indent
if exact_width(indent + line) <= width:
wrap_last = False
ret.append(indent + line)
indent = subsequent_indent
continue
wrap_last = True
words = line.split(' ')
line = indent
spcs = cspc_indent
if not spcs and csab >= 4:
spcs = csab
for word in words:
if (width < exact_width(line + word)) and \
(exact_width(line) > exact_width(subsequent_indent)):
ret.append(line.rstrip(' '))
line = subsequent_indent + ' ' * spcs
line += word
line += ' '
indent = line.rstrip(' ') + ' '
if wrap_last:
ret.append(indent.rstrip(' '))
return '\n'.join(ret)
def select_short_long(width, msg_short, msg_long):
""" Automatically selects the short (abbreviated) or long (full) message
depending on whether we have enough screen space to display the full
message or not. If a caller by mistake passes a long string as
msg_short and a short string as a msg_long this function recognizes
the mistake and swaps the arguments. This function is especially useful
in the i18n context when you cannot predict how long are the translated
messages.
Limitations:
1. If msg_short is longer than width you will still get an overflow.
This function does not abbreviate the string.
2. You are not obliged to provide an actually abbreviated string, it is
perfectly correct to pass the same string twice if you don't want
any abbreviation. However, if you provide two different strings but
having the same width this function is unable to recognize which one
is correct and you should assume that it is unpredictable which one
is returned.
Example:
``select_short_long (10, _("Repo"), _("Repository"))``
will return "Repository" in English but the results in other languages
may be different. """
width_short = exact_width(msg_short)
width_long = exact_width(msg_long)
# If we have two strings of the same width:
if width_short == width_long:
return msg_long
# If the short string is wider than the long string:
elif width_short > width_long:
return msg_short if width_short <= width else msg_long
# The regular case:
else:
return msg_long if width_long <= width else msg_short
def translation(name):
# :api, deprecated in 2.0.0, will be erased when python2 is abandoned
""" Easy gettext translations setup based on given domain name """
setup_locale()
def ucd_wrapper(fnc):
return lambda *w: ucd(fnc(*w))
t = dnf.pycomp.gettext.translation(name, fallback=True)
return map(ucd_wrapper, dnf.pycomp.gettext_setup(t))
def pgettext(context, message):
result = _(context + chr(4) + message)
if "\004" in result:
return message
else:
return result
# setup translations
_, P_ = translation("dnf")
C_ = pgettext