updates
This commit is contained in:
756
Backend/venv/lib/python3.12/site-packages/joblib/numpy_pickle.py
Normal file
756
Backend/venv/lib/python3.12/site-packages/joblib/numpy_pickle.py
Normal file
@@ -0,0 +1,756 @@
|
||||
"""Utilities for fast persistence of big data, with optional compression."""
|
||||
|
||||
# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
|
||||
# Copyright (c) 2009 Gael Varoquaux
|
||||
# License: BSD Style, 3 clauses.
|
||||
|
||||
import io
|
||||
import os
|
||||
import pickle
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
|
||||
from .backports import make_memmap
|
||||
from .compressor import (
|
||||
_COMPRESSORS,
|
||||
LZ4_NOT_INSTALLED_ERROR,
|
||||
BinaryZlibFile,
|
||||
BZ2CompressorWrapper,
|
||||
GzipCompressorWrapper,
|
||||
LZ4CompressorWrapper,
|
||||
LZMACompressorWrapper,
|
||||
XZCompressorWrapper,
|
||||
ZlibCompressorWrapper,
|
||||
lz4,
|
||||
register_compressor,
|
||||
)
|
||||
|
||||
# For compatibility with old versions of joblib, we need ZNDArrayWrapper
|
||||
# to be visible in the current namespace.
|
||||
from .numpy_pickle_compat import (
|
||||
NDArrayWrapper,
|
||||
ZNDArrayWrapper, # noqa: F401
|
||||
load_compatibility,
|
||||
)
|
||||
from .numpy_pickle_utils import (
|
||||
BUFFER_SIZE,
|
||||
Pickler,
|
||||
Unpickler,
|
||||
_ensure_native_byte_order,
|
||||
_read_bytes,
|
||||
_reconstruct,
|
||||
_validate_fileobject_and_memmap,
|
||||
_write_fileobject,
|
||||
)
|
||||
|
||||
# Register supported compressors
|
||||
register_compressor("zlib", ZlibCompressorWrapper())
|
||||
register_compressor("gzip", GzipCompressorWrapper())
|
||||
register_compressor("bz2", BZ2CompressorWrapper())
|
||||
register_compressor("lzma", LZMACompressorWrapper())
|
||||
register_compressor("xz", XZCompressorWrapper())
|
||||
register_compressor("lz4", LZ4CompressorWrapper())
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Utility objects for persistence.
|
||||
|
||||
# For convenience, 16 bytes are used to be sure to cover all the possible
|
||||
# dtypes' alignments. For reference, see:
|
||||
# https://numpy.org/devdocs/dev/alignment.html
|
||||
NUMPY_ARRAY_ALIGNMENT_BYTES = 16
|
||||
|
||||
|
||||
class NumpyArrayWrapper(object):
|
||||
"""An object to be persisted instead of numpy arrays.
|
||||
|
||||
This object is used to hack into the pickle machinery and read numpy
|
||||
array data from our custom persistence format.
|
||||
More precisely, this object is used for:
|
||||
* carrying the information of the persisted array: subclass, shape, order,
|
||||
dtype. Those ndarray metadata are used to correctly reconstruct the array
|
||||
with low level numpy functions.
|
||||
* determining if memmap is allowed on the array.
|
||||
* reading the array bytes from a file.
|
||||
* reading the array using memorymap from a file.
|
||||
* writing the array bytes to a file.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
subclass: numpy.ndarray subclass
|
||||
Determine the subclass of the wrapped array.
|
||||
shape: numpy.ndarray shape
|
||||
Determine the shape of the wrapped array.
|
||||
order: {'C', 'F'}
|
||||
Determine the order of wrapped array data. 'C' is for C order, 'F' is
|
||||
for fortran order.
|
||||
dtype: numpy.ndarray dtype
|
||||
Determine the data type of the wrapped array.
|
||||
allow_mmap: bool
|
||||
Determine if memory mapping is allowed on the wrapped array.
|
||||
Default: False.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
subclass,
|
||||
shape,
|
||||
order,
|
||||
dtype,
|
||||
allow_mmap=False,
|
||||
numpy_array_alignment_bytes=NUMPY_ARRAY_ALIGNMENT_BYTES,
|
||||
):
|
||||
"""Constructor. Store the useful information for later."""
|
||||
self.subclass = subclass
|
||||
self.shape = shape
|
||||
self.order = order
|
||||
self.dtype = dtype
|
||||
self.allow_mmap = allow_mmap
|
||||
# We make numpy_array_alignment_bytes an instance attribute to allow us
|
||||
# to change our mind about the default alignment and still load the old
|
||||
# pickles (with the previous alignment) correctly
|
||||
self.numpy_array_alignment_bytes = numpy_array_alignment_bytes
|
||||
|
||||
def safe_get_numpy_array_alignment_bytes(self):
|
||||
# NumpyArrayWrapper instances loaded from joblib <= 1.1 pickles don't
|
||||
# have an numpy_array_alignment_bytes attribute
|
||||
return getattr(self, "numpy_array_alignment_bytes", None)
|
||||
|
||||
def write_array(self, array, pickler):
|
||||
"""Write array bytes to pickler file handle.
|
||||
|
||||
This function is an adaptation of the numpy write_array function
|
||||
available in version 1.10.1 in numpy/lib/format.py.
|
||||
"""
|
||||
# Set buffer size to 16 MiB to hide the Python loop overhead.
|
||||
buffersize = max(16 * 1024**2 // array.itemsize, 1)
|
||||
if array.dtype.hasobject:
|
||||
# We contain Python objects so we cannot write out the data
|
||||
# directly. Instead, we will pickle it out with version 5 of the
|
||||
# pickle protocol.
|
||||
pickle.dump(array, pickler.file_handle, protocol=5)
|
||||
else:
|
||||
numpy_array_alignment_bytes = self.safe_get_numpy_array_alignment_bytes()
|
||||
if numpy_array_alignment_bytes is not None:
|
||||
current_pos = pickler.file_handle.tell()
|
||||
pos_after_padding_byte = current_pos + 1
|
||||
padding_length = numpy_array_alignment_bytes - (
|
||||
pos_after_padding_byte % numpy_array_alignment_bytes
|
||||
)
|
||||
# A single byte is written that contains the padding length in
|
||||
# bytes
|
||||
padding_length_byte = int.to_bytes(
|
||||
padding_length, length=1, byteorder="little"
|
||||
)
|
||||
pickler.file_handle.write(padding_length_byte)
|
||||
|
||||
if padding_length != 0:
|
||||
padding = b"\xff" * padding_length
|
||||
pickler.file_handle.write(padding)
|
||||
|
||||
for chunk in pickler.np.nditer(
|
||||
array,
|
||||
flags=["external_loop", "buffered", "zerosize_ok"],
|
||||
buffersize=buffersize,
|
||||
order=self.order,
|
||||
):
|
||||
pickler.file_handle.write(chunk.tobytes("C"))
|
||||
|
||||
def read_array(self, unpickler, ensure_native_byte_order):
|
||||
"""Read array from unpickler file handle.
|
||||
|
||||
This function is an adaptation of the numpy read_array function
|
||||
available in version 1.10.1 in numpy/lib/format.py.
|
||||
"""
|
||||
if len(self.shape) == 0:
|
||||
count = 1
|
||||
else:
|
||||
# joblib issue #859: we cast the elements of self.shape to int64 to
|
||||
# prevent a potential overflow when computing their product.
|
||||
shape_int64 = [unpickler.np.int64(x) for x in self.shape]
|
||||
count = unpickler.np.multiply.reduce(shape_int64)
|
||||
# Now read the actual data.
|
||||
if self.dtype.hasobject:
|
||||
# The array contained Python objects. We need to unpickle the data.
|
||||
array = pickle.load(unpickler.file_handle)
|
||||
else:
|
||||
numpy_array_alignment_bytes = self.safe_get_numpy_array_alignment_bytes()
|
||||
if numpy_array_alignment_bytes is not None:
|
||||
padding_byte = unpickler.file_handle.read(1)
|
||||
padding_length = int.from_bytes(padding_byte, byteorder="little")
|
||||
if padding_length != 0:
|
||||
unpickler.file_handle.read(padding_length)
|
||||
|
||||
# This is not a real file. We have to read it the
|
||||
# memory-intensive way.
|
||||
# crc32 module fails on reads greater than 2 ** 32 bytes,
|
||||
# breaking large reads from gzip streams. Chunk reads to
|
||||
# BUFFER_SIZE bytes to avoid issue and reduce memory overhead
|
||||
# of the read. In non-chunked case count < max_read_count, so
|
||||
# only one read is performed.
|
||||
max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, self.dtype.itemsize)
|
||||
|
||||
array = unpickler.np.empty(count, dtype=self.dtype)
|
||||
for i in range(0, count, max_read_count):
|
||||
read_count = min(max_read_count, count - i)
|
||||
read_size = int(read_count * self.dtype.itemsize)
|
||||
data = _read_bytes(unpickler.file_handle, read_size, "array data")
|
||||
array[i : i + read_count] = unpickler.np.frombuffer(
|
||||
data, dtype=self.dtype, count=read_count
|
||||
)
|
||||
del data
|
||||
|
||||
if self.order == "F":
|
||||
array.shape = self.shape[::-1]
|
||||
array = array.transpose()
|
||||
else:
|
||||
array.shape = self.shape
|
||||
|
||||
if ensure_native_byte_order:
|
||||
# Detect byte order mismatch and swap as needed.
|
||||
array = _ensure_native_byte_order(array)
|
||||
|
||||
return array
|
||||
|
||||
def read_mmap(self, unpickler):
|
||||
"""Read an array using numpy memmap."""
|
||||
current_pos = unpickler.file_handle.tell()
|
||||
offset = current_pos
|
||||
numpy_array_alignment_bytes = self.safe_get_numpy_array_alignment_bytes()
|
||||
|
||||
if numpy_array_alignment_bytes is not None:
|
||||
padding_byte = unpickler.file_handle.read(1)
|
||||
padding_length = int.from_bytes(padding_byte, byteorder="little")
|
||||
# + 1 is for the padding byte
|
||||
offset += padding_length + 1
|
||||
|
||||
if unpickler.mmap_mode == "w+":
|
||||
unpickler.mmap_mode = "r+"
|
||||
|
||||
marray = make_memmap(
|
||||
unpickler.filename,
|
||||
dtype=self.dtype,
|
||||
shape=self.shape,
|
||||
order=self.order,
|
||||
mode=unpickler.mmap_mode,
|
||||
offset=offset,
|
||||
)
|
||||
# update the offset so that it corresponds to the end of the read array
|
||||
unpickler.file_handle.seek(offset + marray.nbytes)
|
||||
|
||||
if (
|
||||
numpy_array_alignment_bytes is None
|
||||
and current_pos % NUMPY_ARRAY_ALIGNMENT_BYTES != 0
|
||||
):
|
||||
message = (
|
||||
f"The memmapped array {marray} loaded from the file "
|
||||
f"{unpickler.file_handle.name} is not byte aligned. "
|
||||
"This may cause segmentation faults if this memmapped array "
|
||||
"is used in some libraries like BLAS or PyTorch. "
|
||||
"To get rid of this warning, regenerate your pickle file "
|
||||
"with joblib >= 1.2.0. "
|
||||
"See https://github.com/joblib/joblib/issues/563 "
|
||||
"for more details"
|
||||
)
|
||||
warnings.warn(message)
|
||||
|
||||
return marray
|
||||
|
||||
def read(self, unpickler, ensure_native_byte_order):
|
||||
"""Read the array corresponding to this wrapper.
|
||||
|
||||
Use the unpickler to get all information to correctly read the array.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
unpickler: NumpyUnpickler
|
||||
ensure_native_byte_order: bool
|
||||
If true, coerce the array to use the native endianness of the
|
||||
host system.
|
||||
|
||||
Returns
|
||||
-------
|
||||
array: numpy.ndarray
|
||||
|
||||
"""
|
||||
# When requested, only use memmap mode if allowed.
|
||||
if unpickler.mmap_mode is not None and self.allow_mmap:
|
||||
assert not ensure_native_byte_order, (
|
||||
"Memmaps cannot be coerced to a given byte order, "
|
||||
"this code path is impossible."
|
||||
)
|
||||
array = self.read_mmap(unpickler)
|
||||
else:
|
||||
array = self.read_array(unpickler, ensure_native_byte_order)
|
||||
|
||||
# Manage array subclass case
|
||||
if hasattr(array, "__array_prepare__") and self.subclass not in (
|
||||
unpickler.np.ndarray,
|
||||
unpickler.np.memmap,
|
||||
):
|
||||
# We need to reconstruct another subclass
|
||||
new_array = _reconstruct(self.subclass, (0,), "b")
|
||||
return new_array.__array_prepare__(array)
|
||||
else:
|
||||
return array
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Pickler classes
|
||||
|
||||
|
||||
class NumpyPickler(Pickler):
|
||||
"""A pickler to persist big data efficiently.
|
||||
|
||||
The main features of this object are:
|
||||
* persistence of numpy arrays in a single file.
|
||||
* optional compression with a special care on avoiding memory copies.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
fp: file
|
||||
File object handle used for serializing the input object.
|
||||
protocol: int, optional
|
||||
Pickle protocol used. Default is pickle.DEFAULT_PROTOCOL.
|
||||
"""
|
||||
|
||||
dispatch = Pickler.dispatch.copy()
|
||||
|
||||
def __init__(self, fp, protocol=None):
|
||||
self.file_handle = fp
|
||||
self.buffered = isinstance(self.file_handle, BinaryZlibFile)
|
||||
|
||||
# By default we want a pickle protocol that only changes with
|
||||
# the major python version and not the minor one
|
||||
if protocol is None:
|
||||
protocol = pickle.DEFAULT_PROTOCOL
|
||||
|
||||
Pickler.__init__(self, self.file_handle, protocol=protocol)
|
||||
# delayed import of numpy, to avoid tight coupling
|
||||
try:
|
||||
import numpy as np
|
||||
except ImportError:
|
||||
np = None
|
||||
self.np = np
|
||||
|
||||
def _create_array_wrapper(self, array):
|
||||
"""Create and returns a numpy array wrapper from a numpy array."""
|
||||
order = (
|
||||
"F" if (array.flags.f_contiguous and not array.flags.c_contiguous) else "C"
|
||||
)
|
||||
allow_mmap = not self.buffered and not array.dtype.hasobject
|
||||
|
||||
kwargs = {}
|
||||
try:
|
||||
self.file_handle.tell()
|
||||
except io.UnsupportedOperation:
|
||||
kwargs = {"numpy_array_alignment_bytes": None}
|
||||
|
||||
wrapper = NumpyArrayWrapper(
|
||||
type(array),
|
||||
array.shape,
|
||||
order,
|
||||
array.dtype,
|
||||
allow_mmap=allow_mmap,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
return wrapper
|
||||
|
||||
def save(self, obj):
|
||||
"""Subclass the Pickler `save` method.
|
||||
|
||||
This is a total abuse of the Pickler class in order to use the numpy
|
||||
persistence function `save` instead of the default pickle
|
||||
implementation. The numpy array is replaced by a custom wrapper in the
|
||||
pickle persistence stack and the serialized array is written right
|
||||
after in the file. Warning: the file produced does not follow the
|
||||
pickle format. As such it can not be read with `pickle.load`.
|
||||
"""
|
||||
if self.np is not None and type(obj) in (
|
||||
self.np.ndarray,
|
||||
self.np.matrix,
|
||||
self.np.memmap,
|
||||
):
|
||||
if type(obj) is self.np.memmap:
|
||||
# Pickling doesn't work with memmapped arrays
|
||||
obj = self.np.asanyarray(obj)
|
||||
|
||||
# The array wrapper is pickled instead of the real array.
|
||||
wrapper = self._create_array_wrapper(obj)
|
||||
Pickler.save(self, wrapper)
|
||||
|
||||
# A framer was introduced with pickle protocol 4 and we want to
|
||||
# ensure the wrapper object is written before the numpy array
|
||||
# buffer in the pickle file.
|
||||
# See https://www.python.org/dev/peps/pep-3154/#framing to get
|
||||
# more information on the framer behavior.
|
||||
if self.proto >= 4:
|
||||
self.framer.commit_frame(force=True)
|
||||
|
||||
# And then array bytes are written right after the wrapper.
|
||||
wrapper.write_array(obj, self)
|
||||
return
|
||||
|
||||
return Pickler.save(self, obj)
|
||||
|
||||
|
||||
class NumpyUnpickler(Unpickler):
|
||||
"""A subclass of the Unpickler to unpickle our numpy pickles.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
mmap_mode: str
|
||||
The memorymap mode to use for reading numpy arrays.
|
||||
file_handle: file_like
|
||||
File object to unpickle from.
|
||||
ensure_native_byte_order: bool
|
||||
If True, coerce the array to use the native endianness of the
|
||||
host system.
|
||||
filename: str
|
||||
Name of the file to unpickle from. It should correspond to file_handle.
|
||||
This parameter is required when using mmap_mode.
|
||||
np: module
|
||||
Reference to numpy module if numpy is installed else None.
|
||||
|
||||
"""
|
||||
|
||||
dispatch = Unpickler.dispatch.copy()
|
||||
|
||||
def __init__(self, filename, file_handle, ensure_native_byte_order, mmap_mode=None):
|
||||
# The next line is for backward compatibility with pickle generated
|
||||
# with joblib versions less than 0.10.
|
||||
self._dirname = os.path.dirname(filename)
|
||||
|
||||
self.mmap_mode = mmap_mode
|
||||
self.file_handle = file_handle
|
||||
# filename is required for numpy mmap mode.
|
||||
self.filename = filename
|
||||
self.compat_mode = False
|
||||
self.ensure_native_byte_order = ensure_native_byte_order
|
||||
Unpickler.__init__(self, self.file_handle)
|
||||
try:
|
||||
import numpy as np
|
||||
except ImportError:
|
||||
np = None
|
||||
self.np = np
|
||||
|
||||
def load_build(self):
|
||||
"""Called to set the state of a newly created object.
|
||||
|
||||
We capture it to replace our place-holder objects, NDArrayWrapper or
|
||||
NumpyArrayWrapper, by the array we are interested in. We
|
||||
replace them directly in the stack of pickler.
|
||||
NDArrayWrapper is used for backward compatibility with joblib <= 0.9.
|
||||
"""
|
||||
Unpickler.load_build(self)
|
||||
|
||||
# For backward compatibility, we support NDArrayWrapper objects.
|
||||
if isinstance(self.stack[-1], (NDArrayWrapper, NumpyArrayWrapper)):
|
||||
if self.np is None:
|
||||
raise ImportError(
|
||||
"Trying to unpickle an ndarray, but numpy didn't import correctly"
|
||||
)
|
||||
array_wrapper = self.stack.pop()
|
||||
# If any NDArrayWrapper is found, we switch to compatibility mode,
|
||||
# this will be used to raise a DeprecationWarning to the user at
|
||||
# the end of the unpickling.
|
||||
if isinstance(array_wrapper, NDArrayWrapper):
|
||||
self.compat_mode = True
|
||||
_array_payload = array_wrapper.read(self)
|
||||
else:
|
||||
_array_payload = array_wrapper.read(self, self.ensure_native_byte_order)
|
||||
|
||||
self.stack.append(_array_payload)
|
||||
|
||||
# Be careful to register our new method.
|
||||
dispatch[pickle.BUILD[0]] = load_build
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Utility functions
|
||||
|
||||
|
||||
def dump(value, filename, compress=0, protocol=None):
|
||||
"""Persist an arbitrary Python object into one file.
|
||||
|
||||
Read more in the :ref:`User Guide <persistence>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
value: any Python object
|
||||
The object to store to disk.
|
||||
filename: str, pathlib.Path, or file object.
|
||||
The file object or path of the file in which it is to be stored.
|
||||
The compression method corresponding to one of the supported filename
|
||||
extensions ('.z', '.gz', '.bz2', '.xz' or '.lzma') will be used
|
||||
automatically.
|
||||
compress: int from 0 to 9 or bool or 2-tuple, optional
|
||||
Optional compression level for the data. 0 or False is no compression.
|
||||
Higher value means more compression, but also slower read and
|
||||
write times. Using a value of 3 is often a good compromise.
|
||||
See the notes for more details.
|
||||
If compress is True, the compression level used is 3.
|
||||
If compress is a 2-tuple, the first element must correspond to a string
|
||||
between supported compressors (e.g 'zlib', 'gzip', 'bz2', 'lzma'
|
||||
'xz'), the second element must be an integer from 0 to 9, corresponding
|
||||
to the compression level.
|
||||
protocol: int, optional
|
||||
Pickle protocol, see pickle.dump documentation for more details.
|
||||
|
||||
Returns
|
||||
-------
|
||||
filenames: list of strings
|
||||
The list of file names in which the data is stored. If
|
||||
compress is false, each array is stored in a different file.
|
||||
|
||||
See Also
|
||||
--------
|
||||
joblib.load : corresponding loader
|
||||
|
||||
Notes
|
||||
-----
|
||||
Memmapping on load cannot be used for compressed files. Thus
|
||||
using compression can significantly slow down loading. In
|
||||
addition, compressed files take up extra memory during
|
||||
dump and load.
|
||||
|
||||
"""
|
||||
|
||||
if Path is not None and isinstance(filename, Path):
|
||||
filename = str(filename)
|
||||
|
||||
is_filename = isinstance(filename, str)
|
||||
is_fileobj = hasattr(filename, "write")
|
||||
|
||||
compress_method = "zlib" # zlib is the default compression method.
|
||||
if compress is True:
|
||||
# By default, if compress is enabled, we want the default compress
|
||||
# level of the compressor.
|
||||
compress_level = None
|
||||
elif isinstance(compress, tuple):
|
||||
# a 2-tuple was set in compress
|
||||
if len(compress) != 2:
|
||||
raise ValueError(
|
||||
"Compress argument tuple should contain exactly 2 elements: "
|
||||
"(compress method, compress level), you passed {}".format(compress)
|
||||
)
|
||||
compress_method, compress_level = compress
|
||||
elif isinstance(compress, str):
|
||||
compress_method = compress
|
||||
compress_level = None # Use default compress level
|
||||
compress = (compress_method, compress_level)
|
||||
else:
|
||||
compress_level = compress
|
||||
|
||||
if compress_method == "lz4" and lz4 is None:
|
||||
raise ValueError(LZ4_NOT_INSTALLED_ERROR)
|
||||
|
||||
if (
|
||||
compress_level is not None
|
||||
and compress_level is not False
|
||||
and compress_level not in range(10)
|
||||
):
|
||||
# Raising an error if a non valid compress level is given.
|
||||
raise ValueError(
|
||||
'Non valid compress level given: "{}". Possible values are {}.'.format(
|
||||
compress_level, list(range(10))
|
||||
)
|
||||
)
|
||||
|
||||
if compress_method not in _COMPRESSORS:
|
||||
# Raising an error if an unsupported compression method is given.
|
||||
raise ValueError(
|
||||
'Non valid compression method given: "{}". Possible values are {}.'.format(
|
||||
compress_method, _COMPRESSORS
|
||||
)
|
||||
)
|
||||
|
||||
if not is_filename and not is_fileobj:
|
||||
# People keep inverting arguments, and the resulting error is
|
||||
# incomprehensible
|
||||
raise ValueError(
|
||||
"Second argument should be a filename or a file-like object, "
|
||||
"%s (type %s) was given." % (filename, type(filename))
|
||||
)
|
||||
|
||||
if is_filename and not isinstance(compress, tuple):
|
||||
# In case no explicit compression was requested using both compression
|
||||
# method and level in a tuple and the filename has an explicit
|
||||
# extension, we select the corresponding compressor.
|
||||
|
||||
# unset the variable to be sure no compression level is set afterwards.
|
||||
compress_method = None
|
||||
for name, compressor in _COMPRESSORS.items():
|
||||
if filename.endswith(compressor.extension):
|
||||
compress_method = name
|
||||
|
||||
if compress_method in _COMPRESSORS and compress_level == 0:
|
||||
# we choose the default compress_level in case it was not given
|
||||
# as an argument (using compress).
|
||||
compress_level = None
|
||||
|
||||
if compress_level != 0:
|
||||
with _write_fileobject(
|
||||
filename, compress=(compress_method, compress_level)
|
||||
) as f:
|
||||
NumpyPickler(f, protocol=protocol).dump(value)
|
||||
elif is_filename:
|
||||
with open(filename, "wb") as f:
|
||||
NumpyPickler(f, protocol=protocol).dump(value)
|
||||
else:
|
||||
NumpyPickler(filename, protocol=protocol).dump(value)
|
||||
|
||||
# If the target container is a file object, nothing is returned.
|
||||
if is_fileobj:
|
||||
return
|
||||
|
||||
# For compatibility, the list of created filenames (e.g with one element
|
||||
# after 0.10.0) is returned by default.
|
||||
return [filename]
|
||||
|
||||
|
||||
def _unpickle(fobj, ensure_native_byte_order, filename="", mmap_mode=None):
|
||||
"""Internal unpickling function."""
|
||||
# We are careful to open the file handle early and keep it open to
|
||||
# avoid race-conditions on renames.
|
||||
# That said, if data is stored in companion files, which can be
|
||||
# the case with the old persistence format, moving the directory
|
||||
# will create a race when joblib tries to access the companion
|
||||
# files.
|
||||
unpickler = NumpyUnpickler(
|
||||
filename, fobj, ensure_native_byte_order, mmap_mode=mmap_mode
|
||||
)
|
||||
obj = None
|
||||
try:
|
||||
obj = unpickler.load()
|
||||
if unpickler.compat_mode:
|
||||
warnings.warn(
|
||||
"The file '%s' has been generated with a "
|
||||
"joblib version less than 0.10. "
|
||||
"Please regenerate this pickle file." % filename,
|
||||
DeprecationWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
except UnicodeDecodeError as exc:
|
||||
# More user-friendly error message
|
||||
new_exc = ValueError(
|
||||
"You may be trying to read with "
|
||||
"python 3 a joblib pickle generated with python 2. "
|
||||
"This feature is not supported by joblib."
|
||||
)
|
||||
new_exc.__cause__ = exc
|
||||
raise new_exc
|
||||
return obj
|
||||
|
||||
|
||||
def load_temporary_memmap(filename, mmap_mode, unlink_on_gc_collect):
|
||||
from ._memmapping_reducer import JOBLIB_MMAPS, add_maybe_unlink_finalizer
|
||||
|
||||
with open(filename, "rb") as f:
|
||||
with _validate_fileobject_and_memmap(f, filename, mmap_mode) as (
|
||||
fobj,
|
||||
validated_mmap_mode,
|
||||
):
|
||||
# Memmap are used for interprocess communication, which should
|
||||
# keep the objects untouched. We pass `ensure_native_byte_order=False`
|
||||
# to remain consistent with the loading behavior of non-memmaped arrays
|
||||
# in workers, where the byte order is preserved.
|
||||
# Note that we do not implement endianness change for memmaps, as this
|
||||
# would result in inconsistent behavior.
|
||||
obj = _unpickle(
|
||||
fobj,
|
||||
ensure_native_byte_order=False,
|
||||
filename=filename,
|
||||
mmap_mode=validated_mmap_mode,
|
||||
)
|
||||
|
||||
JOBLIB_MMAPS.add(obj.filename)
|
||||
if unlink_on_gc_collect:
|
||||
add_maybe_unlink_finalizer(obj)
|
||||
return obj
|
||||
|
||||
|
||||
def load(filename, mmap_mode=None, ensure_native_byte_order="auto"):
|
||||
"""Reconstruct a Python object from a file persisted with joblib.dump.
|
||||
|
||||
Read more in the :ref:`User Guide <persistence>`.
|
||||
|
||||
WARNING: joblib.load relies on the pickle module and can therefore
|
||||
execute arbitrary Python code. It should therefore never be used
|
||||
to load files from untrusted sources.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filename: str, pathlib.Path, or file object.
|
||||
The file object or path of the file from which to load the object
|
||||
mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional
|
||||
If not None, the arrays are memory-mapped from the disk. This
|
||||
mode has no effect for compressed files. Note that in this
|
||||
case the reconstructed object might no longer match exactly
|
||||
the originally pickled object.
|
||||
ensure_native_byte_order: bool, or 'auto', default=='auto'
|
||||
If True, ensures that the byte order of the loaded arrays matches the
|
||||
native byte ordering (or _endianness_) of the host system. This is not
|
||||
compatible with memory-mapped arrays and using non-null `mmap_mode`
|
||||
parameter at the same time will raise an error. The default 'auto'
|
||||
parameter is equivalent to True if `mmap_mode` is None, else False.
|
||||
|
||||
Returns
|
||||
-------
|
||||
result: any Python object
|
||||
The object stored in the file.
|
||||
|
||||
See Also
|
||||
--------
|
||||
joblib.dump : function to save an object
|
||||
|
||||
Notes
|
||||
-----
|
||||
|
||||
This function can load numpy array files saved separately during the
|
||||
dump. If the mmap_mode argument is given, it is passed to np.load and
|
||||
arrays are loaded as memmaps. As a consequence, the reconstructed
|
||||
object might not match the original pickled object. Note that if the
|
||||
file was saved with compression, the arrays cannot be memmapped.
|
||||
"""
|
||||
if ensure_native_byte_order == "auto":
|
||||
ensure_native_byte_order = mmap_mode is None
|
||||
|
||||
if ensure_native_byte_order and mmap_mode is not None:
|
||||
raise ValueError(
|
||||
"Native byte ordering can only be enforced if 'mmap_mode' parameter "
|
||||
f"is set to None, but got 'mmap_mode={mmap_mode}' instead."
|
||||
)
|
||||
|
||||
if Path is not None and isinstance(filename, Path):
|
||||
filename = str(filename)
|
||||
|
||||
if hasattr(filename, "read"):
|
||||
fobj = filename
|
||||
filename = getattr(fobj, "name", "")
|
||||
with _validate_fileobject_and_memmap(fobj, filename, mmap_mode) as (fobj, _):
|
||||
obj = _unpickle(fobj, ensure_native_byte_order=ensure_native_byte_order)
|
||||
else:
|
||||
with open(filename, "rb") as f:
|
||||
with _validate_fileobject_and_memmap(f, filename, mmap_mode) as (
|
||||
fobj,
|
||||
validated_mmap_mode,
|
||||
):
|
||||
if isinstance(fobj, str):
|
||||
# if the returned file object is a string, this means we
|
||||
# try to load a pickle file generated with an version of
|
||||
# Joblib so we load it with joblib compatibility function.
|
||||
return load_compatibility(fobj)
|
||||
|
||||
# A memory-mapped array has to be mapped with the endianness
|
||||
# it has been written with. Other arrays are coerced to the
|
||||
# native endianness of the host system.
|
||||
obj = _unpickle(
|
||||
fobj,
|
||||
ensure_native_byte_order=ensure_native_byte_order,
|
||||
filename=filename,
|
||||
mmap_mode=validated_mmap_mode,
|
||||
)
|
||||
|
||||
return obj
|
||||
Reference in New Issue
Block a user