Skip to content

Commit

Permalink
Fix bug. Read binary content that contains \x00 will break in read_cs…
Browse files Browse the repository at this point in the history
…tring(). (#12)

* Fix Bug: If \x00 is the content of file, jfs.read() and FileIO.read() will not truncate it

* add test case for the scenario that content has \x00

* add test content in test_read_content_with_zero()

* shorten the CONTENT_WITH_ZERO constant

* if the jfs_ function return size, use buf.raw[:size] instead of read_cstring()

* add test_pread(), test case for pread.

* revert read_cstring()
  • Loading branch information
HugoAhoy committed Sep 2, 2021
1 parent 3545e3e commit 4a99014
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 7 deletions.
13 changes: 6 additions & 7 deletions juicefs/juicefs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
create_statvfs_result,
create_summary,
parse_xattrs,
read_cstring,
)
from juicefs.utils import check_juicefs_error, create_os_error

Expand Down Expand Up @@ -234,8 +233,8 @@ def readlink(self, path: str) -> str:
"""
bufsize = 4096
buf = create_string_buffer(bufsize)
self._lib.readlink[1](path, buf, bufsize)
return read_cstring(BytesIO(buf.raw)).decode()
size = self._lib.readlink[1](path, buf, bufsize)
return buf.raw[:size].decode()

def open(self, path: str, flags: int, mode: int = DEFAULT_FILE_MODE) -> int: # fd
"""Open the file *path* and set various flags according to *flags* and
Expand Down Expand Up @@ -306,17 +305,17 @@ def read(self, fd: int, size: int) -> bytes:
If the end of the file referred to by fd has been reached, an empty bytes object is returned.
"""
buf = create_string_buffer(size)
self._lib[fd].read[0](buf, size)
return bytes(read_cstring(BytesIO(buf.raw)))
size = self._lib[fd].read[0](buf, size)
return buf.raw[:size]

def pread(self, fd: int, size: int, offset: int) -> bytes:
"""Read from a file descriptor, *fd*, at a position of *offset*.
It will read up to *size* number of bytes. The file offset remains unchanged.
"""
buf = create_string_buffer(size)
self._lib[fd].pread[0](buf, size, offset)
return bytes(read_cstring(BytesIO(buf.raw)))
size = self._lib[fd].pread[0](buf, size, offset)
return buf.raw[:size]

def write(self, fd: int, content: bytes) -> int:
"""Write the *content* to file descriptor *fd*.
Expand Down
36 changes: 36 additions & 0 deletions tests/test_juicefs.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ def os_dirname(jfs):

CONTENT = b"text"
CONTENT2 = b"text/text"
CONTENT_WITH_ZERO = (
b"\x05\x00\x00\x00\x00\x00\x00\x00\x98\x00\x00\x00\x00\x00\x00\x00-\x01"
)


def test_rename(jfs, filename, filename2, dirname):
Expand Down Expand Up @@ -738,6 +741,39 @@ def test_summary(jfs, dirname):
assert summary.dirs == 2


def test_pread(jfs, filename):
jfs.create(filename)
fdw = jfs.open(filename, os.O_WRONLY)
jfs.write(fdw, CONTENT_WITH_ZERO)
jfs.close(fdw)

fdr = jfs.open(filename, os.O_RDONLY)
for offset in range(5):
for size in range(len(CONTENT_WITH_ZERO) + 5):
start_pos = min(offset, len(CONTENT_WITH_ZERO))
stop_pos = min(offset + size, len(CONTENT_WITH_ZERO))
assert jfs.pread(fdr, size, offset) == CONTENT_WITH_ZERO[start_pos:stop_pos]
jfs.close(fdr)


def test_read_content_with_zero(jfs, filename):
jfs.create(filename)
fdw = jfs.open(filename, os.O_WRONLY)
content_len = jfs.write(fdw, CONTENT_WITH_ZERO)
assert content_len == len(CONTENT_WITH_ZERO)
jfs.close(fdw)

fdr = jfs.open(filename, os.O_RDONLY)
assert jfs.read(fdr, 7) == CONTENT_WITH_ZERO[:7]
assert jfs.read(fdr, len(CONTENT_WITH_ZERO) + 10) == CONTENT_WITH_ZERO[7:]
jfs.close(fdr)

from juicefs.io import open as _jfs_open

with _jfs_open(jfs, filename, "rb") as f:
f.read() == CONTENT_WITH_ZERO


# TODO: read-write mode not supported in this version
# def test_read_write_wbp(jfs, filename):
# TEXT = b"hello world"
Expand Down

0 comments on commit 4a99014

Please sign in to comment.