Source code for yggdrasil.drivers.TimeSyncModelDriver

import os
import pandas as pd
from yggdrasil import units, tools, multitasking
from yggdrasil.drivers.DSLModelDriver import DSLModelDriver


_default_agg = 'mean'
_default_interp = 'index'


[docs]class TimeSyncModelDriver(DSLModelDriver):
    r"""Class for synchronizing states for timesteps between two models.

    Args:
        synonyms (dict, optional): Mapping from model names to mappings
            from base variables names to information about one or
            more alternate variable names used by the named model
            that should be converted to the base variable. Values for
            providing information about alternate variables can either
            be strings (implies equivalence with the base variable in
            everything but name and units) or mappings with the keys:

              alt (str, list): Name of one or more variables used by
                 the model that should be used to calculate the named
                 base variable.
              alt2base (function): Callable object that takes the
                 alternate variables named by the 'alt' property as
                 input and returns the base variable.
              base2alt (function): Callable object that takes the base
                 variable as input and returns the alternate variables
                 named by the 'alt' property.

            Defaults to an empty dictionary.
        aggregation (str, dict, optional): Method(s) that should be used
            to aggregate synonymous variables across models. This can
            be a single method that should be used for all synonymous
            variables or a dictionary mapping between synonymous variables
            and the method that should be used by that variable. If a
            variable is not present in the dictionary or a values is
            not provided, 'mean' will be used. See the documentation
            for pandas.DataFrame.aggregate for available options.
        interpolation (str, dict, optional): Method(s) or keyword
            arguments that should be used to interpolate missing
            timesteps. This can be a single method or a dictionary
            mapping between model name and the interpolation methods
            (or keyword arguments) that should be used for variables
            from that model. Defaults to 'index'. See the documentation
            for pandas.DataFrame.interpolate for available options.
        additional_variables (dict, optional): Mapping from model
            name to a list of variables from other models that are
            not provided by the model, but should still be returned
            to the model. Defaults to empty dictionary.

    """

    _schema_subtype_description = ('Model is dedicated to synchronizing '
                                   'timesteps between other models.')
    _schema_properties = {
        'synonyms': {'type': 'object',
                     'additionalProperties': {
                         'type': 'object',
                         'additionalProperties': {'anyOf': [
                             {'type': 'string'},
                             {'type': 'object',
                              'required': ['alt', 'alt2base', 'base2alt'],
                              'properties': {
                                  'alt': {'anyOf': [
                                      {'type': 'string'},
                                      {'type': 'array',
                                       'items': {'type': 'string'}}]},
                                  'alt2base': {'type': 'function'},
                                  'base2alt': {'type': 'function'}}}]}},
                     'default': {}},
        'interpolation': {
            'anyOf': [{'type': 'string'},
                      {'type': 'object',
                       'additionalProperties': {'oneOf': [
                           {'type': 'string'},
                           {'type': 'object',
                            'required': ['method'],
                            'properties': {
                                'method': {'type': 'string'}}}]}},
                      {'type': 'object',
                       'required': ['method'],
                       'properties': {
                           'method': {'type': 'string'}}}],
            'default': _default_interp},
        'aggregation': {
            'anyOf': [{'type': 'function'},
                      {'type': 'string'},
                      {'type': 'object',
                       'additionalProperties': {
                           'anyOf': [{'type': 'function'},
                                     {'type': 'string'}]}}],
            'default': _default_agg},
        'additional_variables': {
            'type': 'object',
            'additionalProperties': {'type': 'array',
                                     'items': {'type': 'string'}},
            'default': {}},
        'args': {'type': 'array', 'default': [],
                 'items': {'type': ['string', 'number']},
                 'allowSingular': True}}
    _schema_no_default_subtype = True
    language = 'timesync'
    executable_type = 'other'

    def __init__(self, name, *args, **kwargs):
        super(TimeSyncModelDriver, self).__init__(name, *args, **kwargs)
        # Ensure that options are uniform in their format and check
        # that they are valid
        for k, v in self.synonyms.items():
            for s0, x in list(v.items()):
                if isinstance(x, str):
                    x = {'alt': x, 'alt2base': None, 'base2alt': None}
                if not isinstance(x['alt'], list):
                    x['alt'] = [x['alt']]
                if ((((x['alt2base'] is None) or (x['base2alt'] is None))
                     and (len(x['alt']) > 1))):  # pragma: debug
                    raise RuntimeError(
                        ('Cannot convert from multiple alternate '
                         'variables (%s) to single base variable (%s) '
                         'without transformation functions.')
                        % (x['alt'], s0))
                v[s0] = x
        if isinstance(self.interpolation, str):
            self.interpolation = {'method': self.interpolation}
        if 'method' not in self.interpolation:
            for k, v in list(self.interpolation.items()):
                if isinstance(v, str):
                    v = {'method': v}
                    self.interpolation[k] = v

[docs]    def parse_arguments(self, args, **kwargs):
        r"""Sort model arguments to determine which one is the executable
        and which ones are arguments.

        Args:
            args (list): List of arguments provided.
            **kwargs: Additional keyword arguments are ignored.

        """
        assert isinstance(args, list) and (len(args) == 0)
        self.model_file = 'dummy'
        
    @property
    def model_wrapper_args(self):
        r"""tuple: Positional arguments for the model wrapper."""
        return (self.name, self.synonyms, self.interpolation,
                self.aggregation, self.additional_variables)

[docs]    @classmethod
    def model_wrapper(cls, name, synonyms, interpolation,
                      aggregation, additional_variables, env=None):
        r"""Model wrapper."""
        from yggdrasil.languages.Python.YggInterface import YggTimesyncServer
        if env is not None:
            os.environ.update(env)
        rpc = YggTimesyncServer(name)
        threads = {}
        times = []
        tables = {}
        table_units = {'base': {}}
        table_lock = multitasking.RLock()
        default_agg = _default_agg
        if not isinstance(aggregation, dict):
            default_agg = aggregation
            aggregation = {}
        while True:
            # Check for errors on response threads
            for v in threads.values():
                if v.check_flag_attr('error_flag'):  # pragma: debug
                    for v in threads.values():
                        if v.is_alive():
                            v.terminate()
                    raise Exception("Error on response thread.")
            # Receive values from client models
            flag, values, request_id = rpc.recv_from(timeout=1.0,
                                                     quiet_timeout=True)
            if not flag:
                print("timesync server: End of input.")
                break
            if len(values) == 0:
                rpc.sleep()
                continue
            t, state = values[:]
            t_pd = units.convert_to_pandas_timedelta(t)
            client_model = rpc.ocomm[
                rpc.requests[request_id].response_address].client_model
            # Remove variables marked as external so they are not merged
            external_variables = additional_variables.get(client_model, [])
            for k in external_variables:
                state.pop(k, None)
            internal_variables = list(state.keys())
            # Update record
            with table_lock:
                if client_model not in tables:
                    tables[client_model] = pd.DataFrame({'time': times})
                # Update units & aggregation methods
                if client_model not in table_units:
                    # NOTE: this assumes that units will not change
                    # between timesteps for a single model. Is there a
                    # case where this might not be true?
                    table_units[client_model] = {
                        k: units.get_units(v) for k, v in state.items()}
                    table_units[client_model]['time'] = units.get_units(t)
                    alt_vars = []
                    for k, v in synonyms.get(client_model, {}).items():
                        alt_vars += v['alt']
                        if v['alt2base'] is not None:
                            table_units[client_model][k] = units.get_units(
                                v['alt2base'](*[state[a] for a in v['alt']]))
                        else:
                            table_units[client_model][k] = table_units[
                                client_model][v['alt'][0]]
                    for k, v in table_units[client_model].items():
                        table_units['base'].setdefault(k, v)
                    for k in list(set(state.keys()) - set(alt_vars)):
                        aggregation.setdefault(k, default_agg)
                # Update the state
                if t_pd not in times:
                    times.append(t_pd)
                for model, table in tables.items():
                    new_data = {'time': [t_pd]}
                    if model == client_model:
                        new_data.update({k: [units.get_data(v)]
                                         for k, v in state.items()})
                    new_data = pd.DataFrame(new_data)
                    idx = table['time'].isin([t_pd])
                    if not idx.any():
                        table = pd.concat([table, new_data], sort=False)
                    elif model == client_model:
                        table = table.drop(table.index[idx])
                        table = pd.concat([table, new_data], sort=False)
                    tables[model] = table.sort_values('time')
            # Assign thread to handle checking when data is filled in
            threads[request_id] = multitasking.YggTaskLoop(
                target=cls.response_loop,
                args=(client_model, request_id, rpc, t_pd,
                      internal_variables, external_variables,
                      tables, table_units, table_lock,
                      synonyms, interpolation, aggregation))
            threads[request_id].start()
        # Cleanup threads (only called if there is an error since the
        # loop will only be broken when all of the clients have signed
        # off, implying that all requests have been responded to).
        for v in threads.values():
            if v.is_alive():  # pragma: debug
                v.wait(0.5)
        for v in threads.values():
            if v.is_alive():  # pragma: debug
                v.terminate()

[docs]    @classmethod
    def check_for_data(cls, time, tables, table_units, table_lock,
                       open_clients):
        r"""Check for a time in the tables to determine if there is
        sufficient data available to calculate the state.

        Args:
            time (pandas.Timedelta): Time that state is requested at.
            tables (dict): Mapping from model name to pandas DataFrames
                containing variables supplied by the model.
            table_units (dict): Mapping from model name to dictionaries
                mapping from variable names to units.
            table_lock (RLock): Thread-safe lock for accessing table.
            open_clients (list): Clients that are still open.

        Returns:
            bool: True if there is sufficient data, False otherwise.

        """
        with table_lock:
            for k, v in tables.items():
                if (k in open_clients) and (time > max(v.dropna()['time'])):
                    return False
            for k in open_clients:
                if k not in table_units:  # pragma: debug
                    return False
        return True

[docs]    @classmethod
    def response_loop(cls, client_model, request_id, rpc, time,
                      internal_variables, external_variables,
                      tables, table_units, table_lock,
                      synonyms, interpolation, aggregation):
        r"""Check for available data and send response if it is
        available.

        Args:
            client_model (str): Name of model that made the request.
            request_id (str): ID associated with request that should
                be responded to.
            rpc (ServerComm): Server RPC comm that should be used to
                reply to the request when the data is available.
            time (pandas.Timedelta): Time to get variables at.
            internal_variables (list): Variables that model is requesting
                that it also calculates.
            external_variables (list): Variables that model is requesting
                that will be provided by other models.
            tables (dict): Mapping from model name to pandas DataFrames
                containing variables supplied by the model.
            table_units (dict): Mapping from model name to dictionaries
                mapping from variable names to units.
            table_lock (RLock): Thread-safe lock for accessing table.
            synonyms (dict): Dictionary mapping from base variables to
                alternate variables and mapping functions used to convert
                between the variables. Defaults to empty dict and no
                conversions are performed.
            interpolation (dict): Mapping from model name to the
                interpolation kwargs that should be used. Defaults to
                empty dictionary.
            aggregation (dict): Mapping from variable name to the
                aggregation method that should be used. Defaults to
                empty dictionary.

        """
        if not (rpc.all_clients_connected
                and cls.check_for_data(time, tables, table_units, table_lock,
                                       rpc.open_clients)):
            # Don't start sampling until all clients have connected
            # and there is data available for the requested timestep
            tools.sleep(1.0)
            return
        tot = cls.merge(tables, table_units, table_lock, rpc.open_clients,
                        synonyms, interpolation, aggregation)
        # Update external units
        for k in external_variables:
            if k not in table_units[client_model]:
                table_units[client_model][k] = table_units['base'][k]
        # Check if data is available at the desired timestep?
        # Convert units
        for k in tot.columns:
            funits = units.get_conversion_function(table_units['base'][k],
                                                   table_units[client_model][k])
            tot[k] = tot[k].apply(funits)
        # Transform back to variables expected by the model
        for kbase, alt in synonyms.get(client_model, {}).items():
            if alt['base2alt'] is not None:
                alt_vars = alt['base2alt'](tot[kbase])
                if isinstance(alt_vars, (tuple, list)):
                    assert len(alt_vars) == len(alt['alt'])
                    for k, v in zip(alt['alt'], alt_vars):
                        tot[k] = v
                else:
                    assert len(alt['alt']) == 1
                    tot[alt['alt'][0]] = alt_vars
            else:
                tot[alt['alt'][0]] = tot[kbase]
        # Get state
        state = {}
        for v in internal_variables + external_variables:
            v_res = tot.loc[time, v]
            state[v] = units.add_units(v_res, table_units[client_model][v])
        time_u = units.convert_to(units.convert_from_pandas_timedelta(time),
                                  table_units[client_model]['time'])
        flag = rpc.send_to(request_id, state)
        if not flag:  # pragma: debug
            raise RuntimeError(("Failed to send response to "
                                "request %s for time %s from "
                                "model %s.")
                               % (request_id, time_u, client_model))
        raise multitasking.BreakLoopException
    
[docs]    @classmethod
    def merge(cls, tables, table_units, table_lock, open_clients,
              synonyms, interpolation, aggregation):
        r"""Merge tables from models to get data.

        Args:
            tables (dict): Mapping from model name to pandas DataFrames
                containing variables supplied by the model.
            table_units (dict): Mapping from model name to dictionaries
                mapping from variable names to units.
            table_lock (RLock): Thread-safe lock for accessing table.
            open_clients (list): Clients that are still open.
            synonyms (dict): Dictionary mapping from base variables to
                alternate variables and mapping functions used to convert
                between the variables. Defaults to empty dict and no
                conversions are performed.
            interpolation (dict): Mapping from model name to the
                interpolation kwargs that should be used. Defaults to
                empty dictionary.
            aggregation (dict): Mapping from variable name to the
                aggregation method that should be used. Defaults to
                empty dictionary.


        """
        # Adjust input arguments
        interp_default = {'method': _default_interp}
        if 'method' in interpolation:
            interp_default = interpolation
            interpolation = {}
        # Interpolate
        table_temp = {}
        with table_lock:
            for k, v in tables.items():
                kws = interpolation.get(k, interp_default).copy()
                if k not in open_clients:
                    # Ensure that clients that have signed off are
                    # extrapolated, otherwise they would never produce
                    # valid data
                    kws['limit_area'] = None
                if 'order' in kws:
                    kws['order'] = min(v.dropna().shape[0] - 1,
                                       kws['order'])
                    if kws['order'] == 0:
                        kws.pop('order')
                        kws.update(interp_default)
                v = v.set_index('time')
                # Cannot interpolate on pandas timedelta as of pandas 1.0.1
                ind = v.index
                v.index = v.index.total_seconds()
                table_temp[k] = v.interpolate(**kws)
                table_temp[k].index = ind
        # Rename + transformation
        for model, v in table_temp.items():
            drop = []
            for kbase, alt in synonyms.get(model, {}).items():
                if alt['alt2base'] is not None:
                    args = [v[k] for k in alt['alt']]
                    v[kbase] = alt['alt2base'](*args)
                else:
                    v[kbase] = v[alt['alt'][0]]
                drop += alt['alt']
            for k in drop:
                v = v.drop(k, axis=1)
            # Units
            for k in v.columns:
                funits = units.get_conversion_function(table_units[model][k],
                                                       table_units['base'][k])
                v[k] = v[k].apply(funits)
            table_temp[model] = v
        # Append
        out = pd.DataFrame()
        for k, v in table_temp.items():
            out = pd.concat([out, v], sort=False)
        # Groupby + aggregate
        out = out.groupby('time').agg(aggregation)
        return out