Source code for syncopy.io.load_spy_container

# -*- coding: utf-8 -*-
#
# Load data from Syncopy containers
#

# Builtin/3rd party package imports
import os
import json
import h5py
import sys
import numpy as np
from glob import glob

# Local imports
from syncopy.shared.filetypes import FILE_EXT
from syncopy.shared.parsers import io_parser, data_parser, filename_parser, array_parser
from syncopy.shared.errors import (
    SPYTypeError,
    SPYValueError,
    SPYIOError,
    SPYError,
    SPYWarning,
)
from syncopy.io.utils import hash_file, startInfoDict
import syncopy.datatype as spd
import syncopy as spy

# to allow loading older spy containers
legacy_not_required = ["info"]

__all__ = ["load"]


[docs]def load(filename, tag=None, dataclass=None, checksum=False, mode="r+", out=None): """ Load Syncopy data object(s) from disk Either loads single files within or outside of '.spy'-containers or loads multiple objects from a single '.spy'-container. Loading from containers can be further controlled by imposing restrictions on object class(es) (via `dataclass`) and file-name tag(s) (via `tag`). Parameters ---------- filename : str Either path to Syncopy container folder (`*.spy`, if omitted, the extension '.spy' will be appended) or name of data or metadata file. If `filename` points to a container and no further specifications are provided, the entire contents of the container is loaded. Otherwise, specific objects may be selected using the `dataclass` or `tag` keywords (see below). tag : None or str or list If `filename` points to a container, `tag` may be used to filter objects by filename-`tag`. Multiple tags can be provided using a list, e.g., ``tag = ['experiment1', 'experiment2']``. Can be combined with `dataclass` (see below). Invalid if `filename` points to a single file. dataclass : None or str or list If provided, only objects of provided dataclass are loaded from disk. Available options are '.analog', '.spectral', .spike' and '.event' (as listed in ``spy.FILE_EXT["data"]``). Multiple class specifications can be provided using a list, e.g., ``dataclass = ['.analog', '.spike']``. Can be combined with `tag` (see above) and is also valid if `filename` points to a single file (e.g., to ensure loaded object is of a specific type). checksum : bool If `True`, checksum-matching is performed on loaded object(s) to ensure data-integrity (impairs performance particularly when loading large files). mode : str Data access mode of loaded objects (can be 'r' for read-only, 'r+' or 'w' for read/write access). out : Syncopy data object Empty object to be filled with data loaded from disk. Has to match the type of the on-disk file (e.g., ``filename = 'mydata.analog'`` requires `out` to be a :class:`syncopy.AnalogData` object). Can only be used when loading single objects from disk (`out` is ignored when multiple files are loaded from a container). Returns ------- Nothing : None If a single file is loaded and `out` was provided, `out` is filled with data loaded from disk, i.e., :func:`syncopy.load` does **not** create a new object obj : Syncopy data object If a single file is loaded and `out` was `None`, :func:`syncopy.load` returns a new object. objdict : dict If multiple files are loaded, :func:`syncopy.load` creates a new object for each file and places them in a dictionary whose keys are the base-names (sans path) of the corresponding files. Notes ----- All of Syncopy's classes offer (limited) support for data loading upon object creation. Just as the class method ``.save`` can be used as a shortcut for :func:`syncopy.save`, Syncopy objects can be created from Syncopy data-files upon creation, e.g., >>> adata = spy.AnalogData('/path/to/session1.analog') creates a new :class:`syncopy.AnalogData` object and immediately fills it with data loaded from the file "/path/to/session1.analog". Since only one object can be created at a time, this loading shortcut only supports single file specifications (i.e., ``spy.AnalogData("container.spy")`` is invalid). Examples -------- Load all objects found in the spy-container "sessionName" (the extension ".spy" may or may not be provided) >>> objectDict = spy.load("sessionName") >>> # --> returns a dict with base-filenames as keys Load all :class:`syncopy.AnalogData` and :class:`syncopy.SpectralData` objects from the spy-container "sessionName" >>> objectDict = spy.load("sessionName.spy", dataclass=['analog', 'spectral']) Load a specific :class:`syncopy.AnalogData` object from the above spy-container >>> obj = spy.load("sessionName.spy/sessionName_someTag.analog") This is equivalent to >>> obj = spy.AnalogData("sessionName.spy/sessionName_someTag.analog") If the "sessionName" spy-container only contains one object with the tag "someTag", the above call is equivalent to >>> obj = spy.load("sessionName.spy", tag="someTag") If there are multiple objects of different types using the same tag "someTag", the above call can be further narrowed down to only load the requested :class:`syncopy.AnalogData` object >>> obj = spy.load("sessionName.spy", tag="someTag", dataclass="analog") See also -------- syncopy.save : save syncopy object on disk """ # Ensure `filename` is either a valid .spy container or data file: if `filename` # is a directory w/o '.spy' extension, append it if not isinstance(filename, str): raise SPYTypeError(filename, varname="filename", expected="str") if len(os.path.splitext(os.path.abspath(os.path.expanduser(filename)))[1]) == 0: filename += FILE_EXT["dir"] try: fileInfo = filename_parser(filename) except Exception as exc: raise exc if tag is not None: if isinstance(tag, str): tags = [tag] else: tags = tag try: array_parser(tags, varname="tag", ntype=str) except Exception as exc: raise exc if fileInfo["filename"] is not None: raise SPYError("Only containers can be loaded with `tag` keyword!") for tk in range(len(tags)): tags[tk] = "*" + tags[tk] + "*" else: tags = "*" # If `dataclass` was provided, format it for our needs (e.g. 'spike' -> ['.spike']) if dataclass is not None: if isinstance(dataclass, str): dataclass = [dataclass] try: array_parser(dataclass, varname="dataclass", ntype=str) except Exception as exc: raise exc dataclass = ["." + dclass if not dclass.startswith(".") else dclass for dclass in dataclass] extensions = set(dataclass).intersection(FILE_EXT["data"]) if len(extensions) == 0: lgl = "extension(s) '" + "or '".join(ext + "' " for ext in FILE_EXT["data"]) raise SPYValueError(legal=lgl, varname="dataclass", actual=str(dataclass)) # Avoid any misunderstandings here... if not isinstance(checksum, bool): raise SPYTypeError(checksum, varname="checksum", expected="bool") # Abuse `AnalogData.mode`-setter to check `mode` try: spd.AnalogData().mode = mode except Exception as exc: raise exc # If `filename` points to a spy container, `glob` what's inside, otherwise just load if fileInfo["filename"] is None: if dataclass is None: extensions = FILE_EXT["data"] container = os.path.join(fileInfo["folder"], fileInfo["container"]) fileList = [] for ext in extensions: for tag in tags: fileList.extend(glob(os.path.join(container, tag + ext))) if len(fileList) == 0: fsloc = os.path.join( container, "" + "or ".join(tag + " " for tag in tags) + "with extensions " + "or ".join(ext + " " for ext in extensions), ) raise SPYIOError(fsloc, exists=False) if len(fileList) == 1: return _load(fileList[0], checksum, mode, out) if out is not None: msg = "When loading multiple objects, the `out` keyword is ignored" SPYWarning(msg) objectDict = {} for fname in fileList: obj = _load(fname, checksum, mode, None) objectDict[os.path.basename(obj.filename)] = obj return objectDict else: if dataclass is not None: if os.path.splitext(fileInfo["filename"])[1] not in dataclass: lgl = "extension '" + "or '".join(dclass + "' " for dclass in dataclass) raise SPYValueError(legal=lgl, varname="filename", actual=fileInfo["filename"]) return _load(filename, checksum, mode, out)
def _load(filename, checksum, mode, out): """ Local helper """ fileInfo = filename_parser(filename) hdfFile = os.path.join(fileInfo["folder"], fileInfo["filename"]) jsonFile = hdfFile + FILE_EXT["info"] try: _ = io_parser(hdfFile, varname="hdfFile", isfile=True, exists=True) _ = io_parser(jsonFile, varname="jsonFile", isfile=True, exists=True) except Exception as exc: raise exc with open(jsonFile, "r") as file: jsonDict = json.load(file) if "dataclass" not in jsonDict.keys(): raise SPYError("Info file {} does not contain a dataclass field".format(jsonFile)) if hasattr(spd, jsonDict["dataclass"]): dataclass = getattr(spd, jsonDict["dataclass"]) else: raise SPYError("Unknown data class {class}".format(jsonDict["dataclass"])) requiredFields = tuple(startInfoDict.keys()) + dataclass._infoFileProperties for key in requiredFields: if key not in jsonDict.keys() and key not in legacy_not_required: raise SPYError( "Required field {field} for {cls} not in {file}".format( field=key, cls=dataclass.__name__, file=jsonFile ) ) # FIXME: add version comparison (syncopy.__version__ vs jsonDict["_version"]) # If wanted, perform checksum matching if checksum: hsh_msg = "hash = {hsh:s}" hsh = hash_file(hdfFile) if hsh != jsonDict["file_checksum"]: raise SPYValueError( legal=hsh_msg.format(hsh=jsonDict["file_checksum"]), varname=os.path.basename(hdfFile), actual=hsh_msg.format(hsh=hsh), ) # Parsing is done, create new or check provided object dimord = jsonDict.pop("dimord") if out is not None: try: data_parser(out, varname="out", writable=True, dataclass=jsonDict["dataclass"]) except Exception as exc: raise exc new_out = False out.dimord = dimord else: out = dataclass(dimord=dimord) new_out = True # Access data on disk (error checking is done by setters) out.mode = mode # If the JSON contains `_hdfFileDatasetProperties`, load all datasets listed in there. Otherwise, load the ones # already defined by `out._hdfFileDatasetProperties` and defined in the respective data class. # This is needed to load both new files with, and legacy files without the `_hdfFileDatasetProperties` in the JSON. json_hdfFileDatasetProperties = jsonDict.pop( "_hdfFileDatasetProperties", None ) # They may not be in there for legacy files, so allow None. if json_hdfFileDatasetProperties is not None: out._hdfFileDatasetProperties = tuple( json_hdfFileDatasetProperties ) # It's a list in the JSON, so convert to tuple. for datasetProperty in out._hdfFileDatasetProperties: targetProperty = datasetProperty if datasetProperty == "data" else "_" + datasetProperty try: setattr(out, targetProperty, h5py.File(hdfFile, mode="r")[datasetProperty]) except KeyError: if datasetProperty == "data": raise SPYError( "Data file {file} does not contain a dataset named 'data'.".format(file=hdfFile) ) else: spy.log( f"Dataset '{datasetProperty}' not present in HDF5 file, cannot load it. Setting to None.", level="DEBUG", ) # It is fine if an extra dataset is not present in the file, e.g., the SpikeData waveform dataset is not present when set to None. setattr(out, targetProperty, None) # Abuse ``definetrial`` to set trial-related props trialdef = h5py.File(hdfFile, mode="r")["trialdefinition"][()] out.definetrial(trialdef) # Assign metadata for key in [ prop for prop in dataclass._infoFileProperties if prop != "dimord" and prop in jsonDict.keys() ]: setattr(out, key, jsonDict[key]) thisMethod = sys._getframe().f_code.co_name.replace("_", "") # Write log-entry msg = "Read files v. {ver:s} ".format(ver=jsonDict["_version"]) msg += "{hdf:s}\n\t" + (len(msg) + len(thisMethod) + 2) * " " + "{json:s}" out.log = msg.format(hdf=hdfFile, json=jsonFile) # Happy breakdown return out if new_out else None