Source code for yggdrasil.serialize.SerializeBase

import uuid
import copy
import numpy as np
import warnings
from yggdrasil import tools, units, serialize, constants, rapidjson, datatypes


[docs]class SerializeBase(tools.YggClass): r"""Base class for serializing/deserializing a Python object into/from a bytes message. Args: newline (str, optional): One or more characters indicating a newline. Defaults to '\n'. comment (str, optional): One or more characters indicating a comment. Defaults to '# '. datatype (schema, optional): JSON schema defining the type of object that the serializer will be used to serialize/deserialize. Defaults to default_datatype. **kwargs: Additional keyword args are processed as part of the type definition. Attributes: initialized (bool): True if the serializer has been initialized either by input arguments specifying the type or by infering the type from a processed message. Class Attributes: has_header (bool): True if the serialization has a header when written to a file. default_read_meth (str): Default method that data should be read from a file for deserialization. is_framed (bool): True if the serialization has a frame allowing multiple serialized objects to be recovered from a single message. concats_as_str (bool): True if serialized objects can be concatenated directly as strings. encoded_datatype (schema): JSON schema defining the type of object produced by the class's func_serialize method. For most classes this will be {'type': 'scalar', 'subtype': 'string'}, indicating that the method will produce bytes suitable for serialization. """ _seritype = None _schema_type = 'serializer' _schema_subtype_key = 'seritype' _schema_required = ['seritype'] _schema_properties = { 'seritype': {'type': 'string', 'default': 'default', 'description': ('Serializer type.')}, 'newline': {'type': 'string', 'default': constants.DEFAULT_NEWLINE_STR}, 'comment': {'type': 'string', 'default': constants.DEFAULT_COMMENT_STR}, 'datatype': {'type': 'schema'}, } _oldstyle_kws = ['format_str', 'field_names', 'field_units', 'as_array'] _attr_conv = ['newline', 'comment'] default_datatype = constants.DEFAULT_DATATYPE file_extensions = ['.txt'] has_header = False default_read_meth = 'read' is_framed = False concats_as_str = True def __init__(self, partial_datatype=None, **kwargs): self.partial_datatype = partial_datatype if ('format_str' in kwargs): drv = tools.get_subprocess_language_driver() if drv.decode_format is not None: kwargs['format_str'] = drv.decode_format(kwargs['format_str']) super(SerializeBase, self).__init__(**kwargs) kwargs = self.extra_kwargs self.extra_kwargs = {} self._initialized = False # Update datatype from other keyword arguments if self.datatype is not None: kwargs['datatype'] = self.datatype self.datatype = self.default_datatype self.update_serializer(**kwargs) @property def initialized(self): r"""bool: True if the serializer has been initialized.""" return self._initialized or self.datatype != self.default_datatype
[docs] @staticmethod def before_registration(cls): r"""Operations that should be performed to modify class attributes prior to registration.""" tools.YggClass.before_registration(cls) # If the serialization cannot be concatenated, then it is not framed # by definition and would be meaningless if read-in incrementally if not cls.concats_as_str: assert not cls.is_framed assert cls.default_read_meth == 'read'
[docs] @classmethod def dict2object(cls, obj, **kwargs): r"""Conver a dictionary to a message object. Args: obj (dict): Dictionary to convert to serializable object. **kwargs: Additional keyword arguments are ignored. Returns: object: Serializable object. """ assert len(obj) == 1 return list(obj.values())[0]
[docs] @classmethod def object2dict(cls, obj, **kwargs): r"""Convert a message object into a dictionary. Args: obj (object): Object that would be serialized by this class and should be returned in a dictionary form. **kwargs: Additional keyword arguments are ignored. Returns: dict: Dictionary version of the provided object. """ return {'f0': obj}
[docs] @classmethod def object2array(cls, obj, **kwargs): r"""Convert a message object into an array. Args: obj (object): Object that would be serialized by this class and should be returned in an array form. **kwargs: Additional keyword arguments are ignored. Returns: np.array: Array version of the provided object or None if one cannot be created. """ return None
[docs] @classmethod def concatenate(cls, objects, **kwargs): r"""Concatenate objects to get object that would be recieved if the concatenated serialization were deserialized. Args: objects (list): Objects to be concatenated. **kwargs: Additional keyword arguments are ignored. Returns: list: Set of objects that results from concatenating those provided. """ return objects
[docs] def get_status_message(self, nindent=0, extra_lines_before=None, extra_lines_after=None): r"""Return lines composing a status message. Args: nindent (int, optional): Number of tabs that should be used to indent each line. Defaults to 0. extra_lines_before (list, optional): Additional lines that should be added to the beginning of the default print message. Defaults to empty list if not provided. extra_lines_after (list, optional): Additional lines that should be added to the end of the default print message. Defaults to empty list if not provided. Returns: tuple(list, prefix): Lines composing the status message and the prefix string used for the last message. """ if extra_lines_before is None: extra_lines_before = [] if extra_lines_after is None: extra_lines_after = [] prefix = nindent * '\t' lines = ['%s%s' % (prefix, x) for x in extra_lines_before] lines += ['%s%-15s:' % (prefix, 'sinfo')] lines += [prefix + '\t' + x for x in self.pprint(self.serializer_info).splitlines()] lines += ['%s%-15s:' % (prefix, 'datatype')] lines += [prefix + '\t' + x for x in self.pprint(self.datatype).splitlines()] lines += ['%s%s' % (prefix, x) for x in extra_lines_after] return lines, prefix
[docs] @classmethod def get_testing_options(cls, table_example=False, array_columns=False, include_oldkws=False, table_string_type='bytes', no_names=False, no_units=False, **kwargs): r"""Method to return a dictionary of testing options for this class. Arguments: table_example (bool, optional): If True, the returned options will be for an array of elements representing a table-like structure. Defaults to False. array_columns (bool, optional): If True, table_example is set to True and the returned options will be for an array data type where each element is an array representing a column Defaults to False. include_oldkws (bool, optional): If True, old-style keywords will be added to the returned options. This will only have an effect if table_example is True. Defaults to False. table_string_type (str, optional): Type that should be used for the string column in the table. Defaults to 'bytes'. no_names (bool, optional): If True, an example is returned where the names are not provided to the deserializer. Defaults to False. no_units (bool, optional): If True, units will not be added to the returned array if table_example is True. Returns: dict: Dictionary of variables to use for testing. Key/value pairs: * kwargs (dict): Keyword arguments for comms tested with the provided content. * empty (object): Object produced from deserializing an empty message. * objects (list): List of objects to be serialized/deserialized. extra_kwargs (dict): Extra keyword arguments not used to construct type definition. * typedef (dict): Type definition resulting from the supplied kwargs. * dtype (np.dtype): Numpy data types that is consistent with the determined type definition. * contents (bytes): Concatenated serialization that will result from deserializing the serialized objects. * contents_recv (list): List of objects that would be deserialized from contents. """ if array_columns: table_example = True if table_example: field_names = ['name', 'count', 'size'] assert table_string_type in ['bytes', 'unicode', 'string'] if table_string_type == 'string': table_string_type = 'unicode' int_type = np.int32 if np.dtype(int) == int_type: # pragma: windows int_type = int if table_string_type == 'bytes': table_string_fmt = '%5s' np_dtype_str = 'S' rows = [(b'one', int_type(1), 1.0), (b'two', int_type(2), 2.0), (b'three', int_type(3), 3.0)] dtype1 = {'type': 'scalar', 'subtype': 'string'} else: table_string_fmt = '%5s' np_dtype_str = 'U' rows = [('one', int_type(1), 1.0), ('two', int_type(2), 2.0), ('three', int_type(3), 3.0)] dtype1 = {'type': 'scalar', 'subtype': 'string', 'encoding': 'UCS4'} umol = b'\xce\xbcmol'.decode('utf-8') if no_units: units_line = b'' else: units_line = b'# \t\xce\xbcmol\tcm\n' out = {'kwargs': {}, 'empty': [], 'dtype': None, 'extra_kwargs': {}, 'datatype': { 'type': 'array', 'items': [dtype1, {'type': 'scalar', 'subtype': 'int', 'precision': 4}, {'type': 'scalar', 'subtype': 'float', 'precision': 8}]}, 'contents': (b'# name\tcount\tsize\n' + units_line + b'# ' + table_string_fmt.encode('utf8') + b'\t%d\t%f\n' + b' one\t1\t1.000000\n' + b' two\t2\t2.000000\n' + b'three\t3\t3.000000\n' + b' one\t1\t1.000000\n' + b' two\t2\t2.000000\n' + b'three\t3\t3.000000\n'), 'objects': 2 * rows, 'field_units': ['', umol, 'cm']} if not no_units: out['field_units'] = ['', umol, 'cm'] out['datatype']['items'][1]['units'] = umol out['datatype']['items'][2]['units'] = 'cm' if not no_names: out['field_names'] = field_names for x, n in zip(out['datatype']['items'], field_names): x['title'] = n if include_oldkws: out['kwargs'].update( format_str=(table_string_fmt + '\t%d\t%f\n')) if not no_units: out['kwargs']['field_units'] = ['', umol, 'cm'] out['objects'] = [ [units.add_units(x, u) for x, u in zip(row, out['kwargs']['field_units'])] for row in out['objects']] if not no_names: out['kwargs']['field_names'] = field_names out['extra_kwargs']['format_str'] = out['kwargs']['format_str'] if 'format_str' in cls._attr_conv: out['extra_kwargs']['format_str'] = tools.str2bytes( out['extra_kwargs']['format_str']) if array_columns: out['kwargs']['as_array'] = True if not no_names: dtype = np.dtype( {'names': out['field_names'], 'formats': [f'{np_dtype_str}5', 'i4', 'f8']}) else: dtype = np.dtype(f'{np_dtype_str}5,i4,f8') out['dtype'] = dtype for x in out['datatype']['items']: x['type'] = '1darray' if x['subtype'] == 'string': x['precision'] = 5 if x.get('encoding', None) in ['UTF8', 'UCS4']: x['precision'] *= 4 arr = np.array(rows, dtype=dtype) if no_names: arr = [arr[n] for n in arr.dtype.names] lst = rapidjson.normalize(arr, out['datatype']) out['objects'] = [lst, lst] else: out = {'kwargs': {}, 'empty': b'', 'dtype': None, 'extra_kwargs': {}, 'objects': [b'Test message\n', b'Test message 2\n']} out['contents'] = b''.join(out['objects']) if cls.default_datatype: out['datatype'] = cls.default_datatype return out
@property def read_meth(self): r"""str: Method that should be used to read data for deserialization.""" return self.default_read_meth
[docs] @classmethod def seri_kws(cls): r"""Get a list of valid keyword arguments.""" return list(set(list(cls._schema_properties.keys()) + cls._oldstyle_kws))
@property def input_kwargs(self): r"""dict: Get the input keyword arguments used to create this class.""" out = {} for k in self._schema_properties.keys(): v = getattr(self, k, None) if v is not None: out[k] = copy.deepcopy(v) for k in self._attr_conv: if k in out: out[k] = tools.bytes2str(out[k]) return out @property def serializer_info(self): r"""dict: Serializer info.""" out = copy.deepcopy(self.extra_kwargs) for k in self._schema_properties.keys(): if (k != 'seritype') and (k in self._defaults_set): continue v = getattr(self, k, None) if v is not None: out[k] = copy.deepcopy(v) for k in out.keys(): v = out[k] try: out[k] = tools.bytes2str(v, recurse=True) except TypeError: out[k] = v return out @property def empty_msg(self): r"""obj: Object indicating empty message.""" return datatypes.get_empty_msg(self.datatype) # def is_empty(self, obj): # r"""Determine if an object represents an empty message for this serializer. # Args: # obj (object): Object to test. # Returns: # bool: True if the object is empty, False otherwise. # """ # emsg = self.empty_msg # return (isinstance(obj, type(emsg)) and (obj == emsg))
[docs] def get_field_names(self, as_bytes=False): r"""Get the field names for an array of fields. Args: as_bytes (bool, optional): If True, the field names will be returned as bytes. If False the field names will be returned as unicode. Defaults to False. Returns: list: Names for each field in the data type. """ if getattr(self, 'field_names', None) is not None: out = copy.deepcopy(self.field_names) elif ((self.datatype['type'] != 'array') or ('items' not in self.datatype)): out = None elif isinstance(self.datatype['items'], dict): # pragma: debug raise Exception("Variable number of items not yet supported.") elif isinstance(self.datatype['items'], list): out = [] any_names = False for i, x in enumerate(self.datatype['items']): out.append(x.get('title', 'f%d' % i)) if len(x.get('title', '')) > 0: any_names = True # Don't use field names if they are all defaults if not any_names: out = None if (out is not None): if as_bytes: out = tools.str2bytes(out, recurse=True) else: out = tools.bytes2str(out, recurse=True) return out
[docs] def get_field_units(self, as_bytes=False): r"""Get the field units for an array of fields. Args: as_bytes (bool, optional): If True, the field units will be returned as bytes. If False the field units will be returned as unicode. Defaults to False. Returns: list: Units for each field in the data type. """ if self.datatype['type'] != 'array': return None out = None if getattr(self, 'field_units', None) is not None: out = [str(units.Units(x)) for x in self.field_units] elif 'items' in self.datatype: if isinstance(self.datatype['items'], dict): # pragma: debug raise Exception("Variable number of items not yet " "supported.") elif isinstance(self.datatype['items'], list): out = [] any_units = False for i, x in enumerate(self.datatype['items']): out.append(x.get('units', '')) if len(x.get('units', '')) > 0: any_units = True # Don't use field units if they are all defaults if not any_units: out = None if (out is not None): if as_bytes: out = tools.str2bytes(out, recurse=True) else: out = tools.bytes2str(out, recurse=True) return out
@property def numpy_dtype(self): r"""np.dtype: Corresponding structured data type. Will be None unless the type is an array of 1darrays.""" return datatypes.type2numpy(self.datatype) @property def typedef(self): # pragma: deprecated r"""dict: Alias for datatype.""" warnings.warn(message=("`typedef` attribute is deprecated`; use " " `datatype` instead."), category=DeprecationWarning) return self.datatype
[docs] def initialize_from_message(self, msg, serializer=None, **metadata): r"""Initialize the serializer based on recieved message. Args: msg (object): Message that serializer should be initialized from. **kwargs: Additional keyword arguments are treated as metadata that may contain additional information for initializing the serializer. """ if ((self.initialized or metadata.get('raw', False) or metadata.get('incomplete', False))): return if serializer is None: serializer = {} if 'datatype' not in serializer: try: serializer['datatype'] = rapidjson.encode_schema(msg, minimal=True) except TypeError as e: raise serialize.SerializationError(e) self.update_serializer(from_message=True, **serializer)
[docs] def initialize_from_metadata(self, metadata): r"""Initialize a serializer based on received metadata. This method will exit early if the serializer has already been intialized. Args: metadata (dict): Header information including type info that should be used to initialize the serializer class. """ if ((self.initialized or metadata.get('raw', False) or metadata.get('incomplete', False))): return self.update_serializer(**metadata.get('serializer', {}))
[docs] def update_serializer(self, skip_type=False, seritype=None, datatype=None, from_message=False, **kwargs): r"""Update serializer with provided information. Args: skip_type (bool, optional): If True, everything is updated except the data type. Defaults to False. **kwargs: Additional keyword arguments are processed as part of they type definition and are parsed for old-style keywords. Raises: RuntimeError: If there are keywords that are not valid datatype keywords (currect or old-style). """ if seritype not in [None, self._seritype, 'default']: # pragma: debug raise Exception(f"Cannot change types form {self._seritype} " f"to {seritype}.") # Set attributes and remove unused metadata keys for k in self._schema_properties.keys(): if k in kwargs: setattr(self, k, kwargs.pop(k)) # Update extra keywords if (len(kwargs) > 0): self.extra_kwargs.update(kwargs) self.debug("Extra kwargs: %.100s..." % str(self.extra_kwargs)) # Update type if not skip_type: old_datatype = None if self.initialized: old_datatype = copy.deepcopy(self.datatype) if datatype is None: datatype = {} # Update datatype from oldstyle keywords in extra_kwargs if from_message or (datatype and datatype != self.default_datatype): datatype = self.update_typedef_from_oldstyle(datatype) if 'type' in datatype: # TODO: Fix push/pull of schema properties if ((self.partial_datatype and (from_message or datatype != self.default_datatype))): datatype.update(self.partial_datatype) self.partial_datatype = None self.datatype = rapidjson.normalize(datatype, {'type': 'schema'}) if from_message: self._initialized = True if ((self.datatype['type'] == 'array' and isinstance(self.datatype.get('items', None), list) and len(self.datatype['items']) == 1)): self.datatype['allowSingular'] = True self.datatype['items'][0].pop('allowWrapped', False) # elif self.datatype['type'] not in ['array', 'object']: # self.datatype['allowWrapped'] = True # Check to see if new datatype is compatible with new one if old_datatype and datatype: rapidjson.compare_schemas(self.datatype, old_datatype) # Enfore that strings used with messages are in bytes for k in self._attr_conv: v = getattr(self, k, None) if isinstance(v, (str, bytes)): setattr(self, k, tools.str2bytes(v))
[docs] def cformat2nptype(self, *args, **kwargs): r"""Method to convert c format string to numpy data type. Args: *args: Arguments are passed to serialize.cformat2nptype. **kwargs: Keyword arguments are passed to serialize.cformat2nptype. Returns: np.dtype: Corresponding numpy data type. """ return serialize.cformat2nptype(*args, **kwargs)
[docs] def update_typedef_from_oldstyle(self, typedef): r"""Update a given typedef using an old, table-style serialization spec. Existing typedef values are not overwritten and warnings are raised if the provided serialization spec is not compatible with the type definition. Args: typedef (dict): Type definition to update. Returns: dict: Updated typedef. """ for k in self._oldstyle_kws: used = [] updated = [] v = self.extra_kwargs.get(k, getattr(self, k, None)) if v is None: continue # Check status if ((k != 'format_str') and (typedef.get('type', None) != 'array')): continue # Key specific changes to type if k == 'format_str': v = tools.bytes2str(v) fmts = serialize.extract_formats(v) if typedef == self.default_datatype: typedef.clear() if 'type' in typedef: if (typedef.get('type', None) == 'array'): assert len(typedef.get('items', [])) == len(fmts) elif len(fmts) == 1: cpy = copy.deepcopy(typedef) typedef.clear() typedef.update(type='array', items=[cpy]) else: # pragma: debug continue as_array = self.extra_kwargs.get('as_array', getattr(self, 'as_array', False)) typedef.setdefault('type', 'array') typedef.setdefault('items', []) for i, fmt in enumerate(fmts): nptype = self.cformat2nptype(fmt) itype_fmt = rapidjson.encode_schema( np.ones(1, nptype), minimal=True) if as_array: itype_fmt['type'] = '1darray' else: itype_fmt['type'] = 'scalar' if itype_fmt['subtype'] in constants.FLEXIBLE_TYPES: itype_fmt.pop('precision', None) if len(typedef['items']) < (i + 1): typedef['items'].append(itype_fmt) continue itype = typedef['items'][i] itype_fmt['type'] = itype['type'] if ((itype_fmt['subtype'] in constants.FLEXIBLE_TYPES and (itype_fmt['type'] == 'scalar' or ('encoding' in itype and 'encoding' not in itype_fmt and 'precision' in itype)))): itype_fmt.pop('precision', None) typedef['items'][i].update(itype_fmt) used.append('as_array') updated.append('format_str') elif k == 'as_array': # Can only be used in conjunction with format_str pass elif k in ['field_names', 'field_units']: v = tools.bytes2str(v, recurse=True) if k == 'field_names': tk = 'title' else: tk = 'units' if isinstance(typedef.get('items', []), dict): typedef['items'] = [copy.deepcopy(typedef['items']) for _ in range(len(v))] if ((len(v) != len(typedef.get('items', [])) and (len(v) == 1) and (len(v[0].split(',')) == len(typedef.get( 'items', []))))): valt = v[0].split(',') v[0] = valt[0] for vv in valt[1:]: v.append(vv) assert len(v) == len(typedef.get('items', [])) # if len(v) != len(typedef.get('items', [])): # warnings.warn('%d %ss provided, but only %d items in typedef.' # % (len(v), k, len(typedef.get('items', [])))) # continue all_updated = True for iv, itype in zip(v, typedef.get('items', [])): if tk in itype: all_updated = False if tk == 'units': if units.is_null_unit(iv): continue iv = str(units.Units(iv)) type_map = {'number': 'float', 'integer': 'int'} if itype['type'] in type_map: itype.update(type='scalar', subtype=type_map[itype['type']], precision=8) itype.setdefault(tk, iv) if all_updated: used.append(k) updated.append(k) # Won't change anything unless its an attribute else: # pragma: debug raise ValueError( "Unrecognized table-style specification keyword: '%s'." % k) for rk in used: if rk in self.extra_kwargs: del self.extra_kwargs[rk] for rk in updated: if rk in self.extra_kwargs: self.extra_kwargs[rk] = v elif hasattr(self, rk): setattr(self, rk, v) return typedef
[docs] def func_serialize(self, args): # pragma: debug r"""Serialize a message. Args: args: List of arguments to be formatted or numpy array to be serialized. Returns: bytes, str: Serialized message. """ raise NotImplementedError("func_serialize not implemented.")
[docs] def func_deserialize(self, msg): # pragma: debug r"""Deserialize a message. Args: msg: Message to be deserialized. Returns: obj: Deserialized message. """ raise NotImplementedError("func_deserialize not implemented.")
[docs] def normalize(self, args): r"""Normalize a message to conform to the expected datatype. Args: args (object): Message arguments. Returns: object: Normalized message. """ if self.initialized: # try: args = rapidjson.normalize(args, self.datatype) # except rapidjson.NormalizationError: # self.info(f"args = {args}, datatype = {self.datatype}") # if ((isinstance(args, (list, tuple)) and len(args) == 1 # and 'allowWrapped' not in self.datatype)): # self.datatype['allowWrapped'] = True # return rapidjson.normalize(args, self.datatype) # raise return args
[docs] def serialize(self, args, metadata=None, add_serializer_info=False, no_metadata=False, max_header_size=0): r"""Serialize a message. Args: args (obj): List of arguments to be formatted or a ready made message. metadata (dict, optional): Keyword arguments that should be added to the header. Defaults to None and no header is added. add_serializer_info (bool, optional): If True, serializer information will be added to the metadata. Defaults to False. no_metadata (bool, optional): If True, no metadata will be added to the serialized message. Defaults to False. max_header_size (int, optional): Maximum size that header should occupy in order to be sent in a single message. A value of 0 indicates that any size header is valid. Defaults to 0. Returns: bytes, str: Serialized message. Raises: TypeError: If returned msg is not bytes type (str on Python 2). """ if metadata is None: metadata = {} if isinstance(args, bytes) and (args == constants.YGG_MSG_EOF): metadata['raw'] = True if not metadata.get('raw', False): was_init = self.initialized if was_init: args = self.normalize(args) self.initialize_from_message(args, **metadata) if (not was_init) and self.initialized: args = self.normalize(args) if add_serializer_info: self.verbose_debug("serializer_info = %.100s...", str(self.serializer_info)) metadata['serializer'] = self.serializer_info if metadata.get('raw', False): data = args else: data = self.func_serialize(args) if not isinstance(data, bytes): raise TypeError(f"Serialization function returned " f"object of type '{type(data)}', not " f"required '{bytes}' type.") return self.encode(data, metadata, no_metadata=no_metadata, max_header_size=max_header_size)
[docs] def encode(self, data, metadata, no_metadata=False, max_header_size=0): r"""Encode the message with metadata in a header. Args: data (bytes): Message data serialized into bytes. metadata (dict): Metadata that should be included in the message header. no_metadata (bool, optional): If True, no metadata will be added to the serialized message. Defaults to False. max_header_size (int, optional): Maximum size that header should occupy in order to be sent in a single message. A value of 0 indicates that any size header is valid. Defaults to 0. Returns: bytes: Encoded message with header. """ if no_metadata: return data metadata.setdefault('__meta__', {}) metadata['__meta__']['size'] = len(data) metadata['__meta__'].setdefault('id', str(uuid.uuid4())) header = (constants.YGG_MSG_HEAD + tools.str2bytes(rapidjson.dumps(metadata)) + constants.YGG_MSG_HEAD) if (max_header_size > 0) and (len(header) > max_header_size): metadata_required = {'__meta__': metadata['__meta__']} metadata = {k: v for k, v in metadata.items() if k != '__meta__'} data = (tools.str2bytes(rapidjson.dumps(metadata)) + constants.YGG_MSG_HEAD + data) metadata_required['__meta__']['size'] = len(data) metadata_required['__meta__']['in_data'] = True header = (constants.YGG_MSG_HEAD + tools.str2bytes(rapidjson.dumps(metadata_required)) + constants.YGG_MSG_HEAD) if len(header) > max_header_size: # pragma: debug raise AssertionError(f"The header is larger ({len(header)})" f" than the maximum ({max_header_size}):" f" {header[:min(len(header), 100)]}...") return header + data
[docs] def deserialize(self, msg, **kwargs): r"""Deserialize a message. Args: msg (str, bytes): Message to be deserialized. **kwargs: Additional keyword arguments are passed to the decode method. Returns: tuple(obj, dict): Deserialized message and header information. Raises: TypeError: If msg is not bytes type (str on Python 2). """ msg, metadata = self.decode(msg, **kwargs) self.initialize_from_metadata(metadata) if metadata['__meta__']['size'] == 0: out = self.empty_msg elif metadata.get('incomplete', False) or metadata.get('raw', False): out = msg else: out = self.func_deserialize(msg) was_init = self.initialized if was_init: out = self.normalize(out) self.initialize_from_message(out, **metadata) if (not was_init) and self.initialized: out = self.normalize(out) return out, metadata
[docs] def decode(self, msg, no_data=False, metadata=None): r"""Decode message parts into header and body. Args: msg (str, bytes): Message to be decoded. no_data (bool, optional): If True, only the metadata is returned. Defaults to False. metadata (dict, optional): Metadata that should be used to deserialize the message instead of the current header content. Defaults to None and is not used. Returns: tuple(obj, dict): Deserialized message and header information. Raises: ValueError: If msg contains a header, but metadata is also provided as an argument. TypeError: If msg is not bytes. """ if not isinstance(msg, bytes): raise TypeError("Messages are expected to be bytes.") if msg.startswith(constants.YGG_MSG_HEAD): if metadata is not None: # pragma: debug raise ValueError("Metadata in header and provided by keyword.") _, metadata, data = msg.split(constants.YGG_MSG_HEAD, 2) metadata = rapidjson.loads(metadata) elif isinstance(metadata, dict) and metadata['__meta__'].get('in_data', False): assert msg.count(constants.YGG_MSG_HEAD) == 1 metadata_remainder, data = msg.split(constants.YGG_MSG_HEAD, 1) if len(metadata_remainder) > 0: metadata.update(rapidjson.loads(metadata_remainder)) metadata['__meta__'].pop('in_data') # Data no longer contains the additional metadata metadata['__meta__']['size'] = len(data) else: data = msg if metadata is None: metadata = {'__meta__': {'size': len(msg)}} # Set flags based on data metadata['incomplete'] = (len(data) < metadata['__meta__']['size']) if data == constants.YGG_MSG_EOF: metadata['raw'] = True # Return based on flags if no_data: return metadata return data, metadata
[docs] def load(self, fd, **kwargs): r"""Deserialize from a file. Args: fd (str, file): Filename or file-like object to load from. **kwargs: Additional keyword arguments are passed to the created FileComm used for reading. Returns: object: The deserialized data object or a list of deserialized data objects if there is more than one. """ from yggdrasil.communication.FileComm import FileComm kwargs.setdefault("serializer", self) comm = FileComm(fd, direction="recv", **kwargs) try: out = comm.load() finally: comm.close() return out
[docs] def dump(self, fd, obj, **kwargs): r"""Serialize to a file. Args: fd (str, file): Filename or file-like object to load from. **kwargs: Additional keyword arguments are passed to the created FileComm used for reading. """ from yggdrasil.communication.FileComm import FileComm kwargs.setdefault("serializer", self) comm = FileComm(fd, direction="send", **kwargs) try: comm.dump(obj) finally: comm.close()
[docs] def enable_file_header(self): # pragma: no cover r"""Set serializer attributes to enable file headers to be included in the serializations.""" pass
[docs] def disable_file_header(self): r"""Set serializer attributes to disable file headers from being included in the serializations.""" pass
[docs] def serialize_file_header(self): # pragma: no cover r"""Return the serialized header information that should be prepended to files serialized using this class. Returns: bytes: Header string that should be written to the file. """ return b''
[docs] def deserialize_file_header(self, fd): # pragma: no cover r"""Deserialize the header information from the file and update the serializer. Args: fd (file): File containing header. """ pass
[docs] def consolidate_array(self, out): r"""Consolidate message into a structure numpy array if possible. Args: out (list, tuple, np.ndarray): Object to consolidate into a structured numpy array. Returns: np.ndarray: Structured numpy array containing consolidated message. Raises: ValueError: If the array cannot be consolidated. """ np_dtype = self.numpy_dtype if np_dtype and isinstance(out, (list, tuple, np.ndarray)): out = serialize.consolidate_array(out, dtype=np_dtype) else: warnings.warn(("Cannot consolidate message into a structured " + "numpy array: %s") % str(out)) return out
# def format_header(self, header_info): # r"""Format header info to form a string that should prepend a message. # Args: # header_info (dict): Properties that should be included in the header. # Returns: # str: Message with header in front. # """
[docs] def parse_header(self, msg): r"""Extract header info from a message. Args: msg (str): Message to extract header from. Returns: dict: Message properties. """ return self.decode(msg, no_data=True)