updates

2025-12-01 06:50:10 +02:00
parent 91f51bc6fe
commit 62c1fe5951
4682 changed files with 544807 additions and 31208 deletions
--- a/Backend/venv/lib/python3.12/site-packages/joblib/hashing.py
+++ b/Backend/venv/lib/python3.12/site-packages/joblib/hashing.py
@@ -0,0 +1,270 @@
+"""
+Fast cryptographic hash of Python objects, with a special case for fast
+hashing of numpy arrays.
+"""
+
+# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
+# Copyright (c) 2009 Gael Varoquaux
+# License: BSD Style, 3 clauses.
+
+import decimal
+import hashlib
+import io
+import pickle
+import struct
+import sys
+import types
+
+Pickler = pickle._Pickler
+
+
+class _ConsistentSet(object):
+    """Class used to ensure the hash of Sets is preserved
+    whatever the order of its items.
+    """
+
+    def __init__(self, set_sequence):
+        # Forces order of elements in set to ensure consistent hash.
+        try:
+            # Trying first to order the set assuming the type of elements is
+            # consistent and orderable.
+            # This fails on python 3 when elements are unorderable
+            # but we keep it in a try as it's faster.
+            self._sequence = sorted(set_sequence)
+        except (TypeError, decimal.InvalidOperation):
+            # If elements are unorderable, sorting them using their hash.
+            # This is slower but works in any case.
+            self._sequence = sorted((hash(e) for e in set_sequence))
+
+
+class _MyHash(object):
+    """Class used to hash objects that won't normally pickle"""
+
+    def __init__(self, *args):
+        self.args = args
+
+
+class Hasher(Pickler):
+    """A subclass of pickler, to do cryptographic hashing, rather than
+    pickling. This is used to produce a unique hash of the given
+    Python object that is not necessarily cryptographically secure.
+    """
+
+    def __init__(self, hash_name="md5"):
+        self.stream = io.BytesIO()
+        # By default we want a pickle protocol that only changes with
+        # the major python version and not the minor one
+        protocol = 3
+        Pickler.__init__(self, self.stream, protocol=protocol)
+        # Initialise the hash obj
+        self._hash = hashlib.new(hash_name, usedforsecurity=False)
+
+    def hash(self, obj, return_digest=True):
+        try:
+            self.dump(obj)
+        except pickle.PicklingError as e:
+            e.args += ("PicklingError while hashing %r: %r" % (obj, e),)
+            raise
+        dumps = self.stream.getvalue()
+        self._hash.update(dumps)
+        if return_digest:
+            return self._hash.hexdigest()
+
+    def save(self, obj):
+        if isinstance(obj, (types.MethodType, type({}.pop))):
+            # the Pickler cannot pickle instance methods; here we decompose
+            # them into components that make them uniquely identifiable
+            if hasattr(obj, "__func__"):
+                func_name = obj.__func__.__name__
+            else:
+                func_name = obj.__name__
+            inst = obj.__self__
+            if type(inst) is type(pickle):
+                obj = _MyHash(func_name, inst.__name__)
+            elif inst is None:
+                # type(None) or type(module) do not pickle
+                obj = _MyHash(func_name, inst)
+            else:
+                cls = obj.__self__.__class__
+                obj = _MyHash(func_name, inst, cls)
+        Pickler.save(self, obj)
+
+    def memoize(self, obj):
+        # We want hashing to be sensitive to value instead of reference.
+        # For example we want ['aa', 'aa'] and ['aa', 'aaZ'[:2]]
+        # to hash to the same value and that's why we disable memoization
+        # for strings
+        if isinstance(obj, (bytes, str)):
+            return
+        Pickler.memoize(self, obj)
+
+    # The dispatch table of the pickler is not accessible in Python
+    # 3, as these lines are only bugware for IPython, we skip them.
+    def save_global(self, obj, name=None, pack=struct.pack):
+        # We have to override this method in order to deal with objects
+        # defined interactively in IPython that are not injected in
+        # __main__
+        kwargs = dict(name=name, pack=pack)
+        del kwargs["pack"]
+        try:
+            Pickler.save_global(self, obj, **kwargs)
+        except pickle.PicklingError:
+            Pickler.save_global(self, obj, **kwargs)
+            module = getattr(obj, "__module__", None)
+            if module == "__main__":
+                my_name = name
+                if my_name is None:
+                    my_name = obj.__name__
+                mod = sys.modules[module]
+                if not hasattr(mod, my_name):
+                    # IPython doesn't inject the variables define
+                    # interactively in __main__
+                    setattr(mod, my_name, obj)
+
+    dispatch = Pickler.dispatch.copy()
+    # builtin
+    dispatch[type(len)] = save_global
+    # type
+    dispatch[type(object)] = save_global
+    # classobj
+    dispatch[type(Pickler)] = save_global
+    # function
+    dispatch[type(pickle.dump)] = save_global
+
+    # We use *args in _batch_setitems signature because _batch_setitems has an
+    # additional 'obj' argument in Python 3.14
+    def _batch_setitems(self, items, *args):
+        # forces order of keys in dict to ensure consistent hash.
+        try:
+            # Trying first to compare dict assuming the type of keys is
+            # consistent and orderable.
+            # This fails on python 3 when keys are unorderable
+            # but we keep it in a try as it's faster.
+            Pickler._batch_setitems(self, iter(sorted(items)), *args)
+        except TypeError:
+            # If keys are unorderable, sorting them using their hash. This is
+            # slower but works in any case.
+            Pickler._batch_setitems(
+                self, iter(sorted((hash(k), v) for k, v in items)), *args
+            )
+
+    def save_set(self, set_items):
+        # forces order of items in Set to ensure consistent hash
+        Pickler.save(self, _ConsistentSet(set_items))
+
+    dispatch[type(set())] = save_set
+
+
+class NumpyHasher(Hasher):
+    """Special case the hasher for when numpy is loaded."""
+
+    def __init__(self, hash_name="md5", coerce_mmap=False):
+        """
+        Parameters
+        ----------
+        hash_name: string
+            The hash algorithm to be used
+        coerce_mmap: boolean
+            Make no difference between np.memmap and np.ndarray
+            objects.
+        """
+        self.coerce_mmap = coerce_mmap
+        Hasher.__init__(self, hash_name=hash_name)
+        # delayed import of numpy, to avoid tight coupling
+        import numpy as np
+
+        self.np = np
+        if hasattr(np, "getbuffer"):
+            self._getbuffer = np.getbuffer
+        else:
+            self._getbuffer = memoryview
+
+    def save(self, obj):
+        """Subclass the save method, to hash ndarray subclass, rather
+        than pickling them. Off course, this is a total abuse of
+        the Pickler class.
+        """
+        if isinstance(obj, self.np.ndarray) and not obj.dtype.hasobject:
+            # Compute a hash of the object
+            # The update function of the hash requires a c_contiguous buffer.
+            if obj.shape == ():
+                # 0d arrays need to be flattened because viewing them as bytes
+                # raises a ValueError exception.
+                obj_c_contiguous = obj.flatten()
+            elif obj.flags.c_contiguous:
+                obj_c_contiguous = obj
+            elif obj.flags.f_contiguous:
+                obj_c_contiguous = obj.T
+            else:
+                # Cater for non-single-segment arrays: this creates a
+                # copy, and thus alleviates this issue.
+                # XXX: There might be a more efficient way of doing this
+                obj_c_contiguous = obj.flatten()
+
+            # memoryview is not supported for some dtypes, e.g. datetime64, see
+            # https://github.com/numpy/numpy/issues/4983. The
+            # workaround is to view the array as bytes before
+            # taking the memoryview.
+            self._hash.update(self._getbuffer(obj_c_contiguous.view(self.np.uint8)))
+
+            # We store the class, to be able to distinguish between
+            # Objects with the same binary content, but different
+            # classes.
+            if self.coerce_mmap and isinstance(obj, self.np.memmap):
+                # We don't make the difference between memmap and
+                # normal ndarrays, to be able to reload previously
+                # computed results with memmap.
+                klass = self.np.ndarray
+            else:
+                klass = obj.__class__
+            # We also return the dtype and the shape, to distinguish
+            # different views on the same data with different dtypes.
+
+            # The object will be pickled by the pickler hashed at the end.
+            obj = (klass, ("HASHED", obj.dtype, obj.shape, obj.strides))
+        elif isinstance(obj, self.np.dtype):
+            # numpy.dtype consistent hashing is tricky to get right. This comes
+            # from the fact that atomic np.dtype objects are interned:
+            # ``np.dtype('f4') is np.dtype('f4')``. The situation is
+            # complicated by the fact that this interning does not resist a
+            # simple pickle.load/dump roundtrip:
+            # ``pickle.loads(pickle.dumps(np.dtype('f4'))) is not
+            # np.dtype('f4') Because pickle relies on memoization during
+            # pickling, it is easy to
+            # produce different hashes for seemingly identical objects, such as
+            # ``[np.dtype('f4'), np.dtype('f4')]``
+            # and ``[np.dtype('f4'), pickle.loads(pickle.dumps('f4'))]``.
+            # To prevent memoization from interfering with hashing, we isolate
+            # the serialization (and thus the pickle memoization) of each dtype
+            # using each time a different ``pickle.dumps`` call unrelated to
+            # the current Hasher instance.
+            self._hash.update("_HASHED_DTYPE".encode("utf-8"))
+            self._hash.update(pickle.dumps(obj))
+            return
+        Hasher.save(self, obj)
+
+
+def hash(obj, hash_name="md5", coerce_mmap=False):
+    """Quick calculation of a hash to identify uniquely Python objects
+    containing numpy arrays.
+
+    Parameters
+    ----------
+    hash_name: 'md5' or 'sha1'
+        Hashing algorithm used. sha1 is supposedly safer, but md5 is
+        faster.
+    coerce_mmap: boolean
+        Make no difference between np.memmap and np.ndarray
+    """
+    valid_hash_names = ("md5", "sha1")
+    if hash_name not in valid_hash_names:
+        raise ValueError(
+            "Valid options for 'hash_name' are {}. Got hash_name={!r} instead.".format(
+                valid_hash_names, hash_name
+            )
+        )
+    if "numpy" in sys.modules:
+        hasher = NumpyHasher(hash_name=hash_name, coerce_mmap=coerce_mmap)
+    else:
+        hasher = Hasher(hash_name=hash_name)
+    return hasher.hash(obj)