From 0426d6b8f98b282e027fd95fa52342bf1672a5fe Mon Sep 17 00:00:00 2001 From: David Cottrell Date: Tue, 24 Aug 2021 16:23:38 +0100 Subject: [PATCH] ENH Add check_call_in_cache method to check cache without calling function (#820) Co-authored-by: cottrell Co-authored-by: Olivier Grisel Co-authored-by: Thomas Moreau --- CHANGES.rst | 3 +++ doc/memory.rst | 2 +- joblib/memory.py | 30 +++++++++++++++++++++++------- joblib/test/test_memory.py | 13 +++++++++++++ 4 files changed, 40 insertions(+), 8 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index f9ad25f7c..c1e8c5290 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -17,6 +17,9 @@ Development version 1.0.1 ----- +- Add check_call_in_cache method to check cache without calling function. + https://github.com/joblib/joblib/pull/820 + - dask: avoid redundant scattering of large arguments to make a more efficient use of the network resources and avoid crashing dask with "OSError: [Errno 55] No buffer space available" diff --git a/doc/memory.rst b/doc/memory.rst index 0b9fbaef7..0cc701a1f 100644 --- a/doc/memory.rst +++ b/doc/memory.rst @@ -427,7 +427,7 @@ objects that, in addition of behaving like normal functions, expose methods useful for cache exploration and management. .. autoclass:: MemorizedFunc - :members: __init__, call, clear + :members: __init__, call, clear, check_call_in_cache .. diff --git a/joblib/memory.py b/joblib/memory.py index 93124ee9e..b660f1479 100644 --- a/joblib/memory.py +++ b/joblib/memory.py @@ -19,7 +19,6 @@ import traceback import warnings import inspect -import sys import weakref from tokenize import open as open_py_source @@ -33,7 +32,6 @@ from ._store_backends import StoreBackendBase, FileSystemStoreBackend - FIRST_LINE_TEXT = "# first line:" # TODO: The following object should have a data store object as a sub @@ -136,7 +134,6 @@ def _store_backend_factory(backend, location, verbose=0, backend_options=None): "supported by joblib. Returning None instead.".format( location.__class__.__name__), UserWarning) - return None @@ -361,6 +358,12 @@ def clear(self, warn=True): # Argument "warn" is for compatibility with MemorizedFunc.clear pass + def call(self, *args, **kwargs): + return self.func(*args, **kwargs) + + def check_call_in_cache(self, *args, **kwargs): + return False + ############################################################################### # class `MemorizedFunc` @@ -606,6 +609,21 @@ def __getstate__(self): return state + def check_call_in_cache(self, *args, **kwargs): + """Check if function call is in the memory cache. + + Does not call the function or do any work besides func inspection + and arg hashing. + + Returns + ------- + is_call_in_cache: bool + Whether or not the result of the function has been cached + for the input arguments that have been passed. + """ + func_id, args_id = self._get_output_identifiers(*args, **kwargs) + return self.store_backend.contains_item((func_id, args_id)) + # ------------------------------------------------------------------------ # Private interface # ------------------------------------------------------------------------ @@ -683,8 +701,8 @@ def _check_previous_func_code(self, stacklevel=2): extract_first_line( self.store_backend.get_cached_func_code([func_id])) except (IOError, OSError): # some backend can also raise OSError - self._write_func_code(func_code, first_line) - return False + self._write_func_code(func_code, first_line) + return False if old_func_code == func_code: return True @@ -821,8 +839,6 @@ def _persist_input(self, duration, args, kwargs, this_duration_limit=0.5): % this_duration, stacklevel=5) return metadata - # XXX: Need a method to check if results are available. - # ------------------------------------------------------------------------ # Private `object` interface # ------------------------------------------------------------------------ diff --git a/joblib/test/test_memory.py b/joblib/test/test_memory.py index 89d61d4b4..6f749667d 100644 --- a/joblib/test/test_memory.py +++ b/joblib/test/test_memory.py @@ -609,6 +609,19 @@ def test_persistence(tmpdir): gp(1) +def test_check_call_in_cache(tmpdir): + for func in (MemorizedFunc(f, tmpdir.strpath), + Memory(location=tmpdir.strpath, verbose=0).cache(f)): + result = func.check_call_in_cache(2) + assert not result + assert isinstance(result, bool) + assert func(2) == 5 + result = func.check_call_in_cache(2) + assert result + assert isinstance(result, bool) + func.clear() + + def test_call_and_shelve(tmpdir): # Test MemorizedFunc outputting a reference to cache.