Skip to content

Commit

Permalink
added test for _encode_utf16_str
Browse files Browse the repository at this point in the history
  • Loading branch information
decalage2 committed Jan 2, 2024
1 parent bd62737 commit a0a70a5
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 1 deletion.
8 changes: 7 additions & 1 deletion olefile/olefile.py
Original file line number Diff line number Diff line change
Expand Up @@ -833,6 +833,7 @@ def __init__(self, entry, sid, ole_file):
log.debug(' - sect: %Xh' % self.isectStart)
log.debug(' - SID left: %d, right: %d, child: %d' % (self.sid_left,
self.sid_right, self.sid_child))
log.debug(' - name_utf16: %r' % self.name_utf16)

# sizeHigh is only used for 4K sectors, it should be zero for 512 bytes
# sectors, BUT apparently some implementations set it as 0xFFFFFFFF, 1
Expand Down Expand Up @@ -1255,7 +1256,12 @@ def _encode_utf16_str(self, s, errors='replace'):
# an encoding has been specified for path names:
s = s.decode(self.path_encoding, errors)
# Else we assume that s is a unicode string
return s.encode('UTF-16LE', errors)
s_utf16 = s.encode('UTF-16LE', errors)
# remove the BOM if present (2 bytes FFFE in little-endian):
# => not needed with UTF-16LE codec, only UTF-16 adds the BOM
# if s_utf16.startswith(b'\xff\xfe'):
# s_utf16 = s_utf16[2:]
return s_utf16


def open(self, filename, write_mode=False):
Expand Down
15 changes: 15 additions & 0 deletions tests/test_olefile.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,21 @@ def test_write_sect_too_large(self):
with self.assertRaises(ValueError):
ole.write_sect(ole.nb_sect - 1, b"x" * size)

def test_encode_utf16_str(self):
"test _encode_utf16_str"
# open OLE file with no path encoding (=Unicode)
with olefile.OleFileIO(self.ole_file, path_encoding=None) as ole:
# Test encoding various unicode strings
# Note: there should be no BOM (FFFE) in the encoded string
s = ole._encode_utf16_str(u'a')
self.assertEqual(s, b'a\x00')
# a Unicode code point that is encoded with 2 bytes in UTF-16:
s = ole._encode_utf16_str(u'\u265E') # 265E=BLACK CHESS KNIGHT
self.assertEqual(s, b'\x5E\x26')
# a Unicode code point that is encoded with 4 bytes in UTF-16:
s = ole._encode_utf16_str(u'\U0001F609') # 1F609=WINKING FACE
self.assertEqual(s, b'\x3D\xD8\x09\xDE')


class FileHandleCloseTest(unittest.TestCase):
"""Test file handles are closed correctly."""
Expand Down

0 comments on commit a0a70a5

Please sign in to comment.