Source code for tsl.datasets.synthetic

import math
from typing import Mapping, Type

import numpy as np
import torch
from torch import nn
from tqdm import tqdm

from tsl.datasets import TabularDataset
from tsl.ops.connectivity import parse_connectivity
from tsl.typing import SparseTensArray
from tsl.utils.casting import torch_to_numpy
from tsl.utils.python_utils import foo_signature


[docs]class GaussianNoiseSyntheticDataset(TabularDataset):
    r"""A generator of synthetic datasets from an input model and input graph.

    The input model must be implemented as a :class:`torch.nn.Module` and must
    return the observation at the next step and (optionally) the hidden state
    for the next step. Gaussian noise will be added to the output of the model
    at each step.

    Args:
        num_features (int): Number of features in the generated dataset.
        num_nodes (int): Number of nodes in the graph.
        num_steps (int): Number of steps to generate.
        connectivity (SparseTensArray): Connectivity of the underlying graph.
        model (torch.nn.Module): Model used to generate data. If :obj:`None`,
            it will attempt to create model from ``model_class`` and
            ``model_kwargs``.
        model_class (type, optional): Class of the model used to generate the
            data.
            (default: :obj:`None`)
        model_kwargs (dict, optional): Keyword arguments needed to initialize
            the model.
            (default: :obj:`None`)
        sigma_noise (float): Standard deviation of the noise.
            (default: :obj:`0.2`)
        name (str, optional): Name for the generated dataset.
            (default: :obj:`None`)
        seed (int, optional): Seed for the random number generator.
            (default: :obj:`None`)
    """

    seed: int = None

    def __init__(self,
                 num_features: int,
                 num_nodes: int,
                 num_steps: int,
                 connectivity: SparseTensArray,
                 min_window: int = 1,
                 model: nn.Module = None,
                 model_class: Type = None,
                 model_kwargs: Mapping = None,
                 sigma_noise: float = .2,
                 name: str = None,
                 seed: int = None,
                 **kwargs):
        self.name = name
        self._num_nodes = num_nodes
        self._num_features = num_features
        self._num_steps = num_steps
        self._min_window = min_window
        if seed is not None:
            self.seed = seed

        if model is not None:
            self.model = model
        else:
            self.model = model_class(**model_kwargs)

        self._model_forward_signature = foo_signature(model.forward)

        self.sigma_noise = sigma_noise
        if connectivity is not None:
            self.connectivity = parse_connectivity(connectivity,
                                                   target_layout='edge_index',
                                                   num_nodes=num_nodes)
        else:
            self.connectivity = None

        target, optimal_pred, mask = self.load()
        super().__init__(target=target, mask=mask, name=name, **kwargs)

        self.add_covariate('optimal_pred', optimal_pred, 't n f')

[docs]    def load_raw(self, *args, **kwargs):
        return self.generate_data(self.seed)

    @property
    def mae_optimal_model(self):
        r""":math:`\mathbb{E}[|\mathbf{X}|]` of a Gaussian
        :math:`\mathbf{X} \sim \mathcal{N}(0, \sigma^2)`, computed as
        :math:`\varepsilon = \sqrt{\frac{2}{\pi}}\sigma`.
        """
        return math.sqrt(2.0 / math.pi) * self.sigma_noise

    def _filter_forward_kwargs(self, kwargs):
        if not self._model_forward_signature['has_kwargs']:
            kwargs = {
                k: v
                for k, v in kwargs.items()
                if k in self._model_forward_signature['signature']
            }
        return kwargs

    def _model_forward(self, *args, **kwargs):
        kwargs = self._filter_forward_kwargs(kwargs)
        out = self.model(*args, **kwargs)
        if len(out) != 2:
            return out, None
        # Assumes that if the output has length 2,
        # then it will contain [output, hidden_state].
        return out

    def generate_data(self, seed=None):
        """"""
        rng = torch.Generator()
        if seed is not None:
            rng.manual_seed(seed)

        # initialize with noise
        x = torch.empty(
            (self._num_steps + self._min_window, self._num_nodes,
             self._num_features)).normal_(generator=rng) * self.sigma_noise

        y_opt = torch.empty(
            (self._num_steps, self._num_nodes, self._num_features))

        if self.connectivity is None:
            edge_index = edge_weight = None
        else:
            edge_index, edge_weight = self.connectivity

        with torch.no_grad():
            h_t = None
            for t in tqdm(range(self._min_window,
                                self._min_window + self._num_steps),
                          desc=f"Generating {self.__class__.__name__} data"):
                x_t, h_t = self._model_forward(x[None, t - self._min_window:t],
                                               h=h_t,
                                               t=t,
                                               edge_index=edge_index,
                                               edge_weight=edge_weight)
                y_opt[t - self._min_window:t + 1 - self._min_window] = x_t[0]
                # add noise
                x_t = x_t + torch.zeros_like(x_t).normal_(
                    generator=rng) * self.sigma_noise
                x[t:t + 1] = x_t[0]

        x = torch_to_numpy(x[self._min_window:])
        y_opt = torch_to_numpy(y_opt)
        return x, y_opt, np.ones_like(x)

    def get_connectivity(self, layout: str = 'edge_index', **kwargs):
        """"""
        if self.connectivity is not None:
            return parse_connectivity(connectivity=self.connectivity,
                                      target_layout=layout,
                                      num_nodes=self.n_nodes)
        return None