Source code for pfrl.policies.gaussian_policy

import numpy as np
import torch
from torch import nn


[docs]class GaussianHeadWithStateIndependentCovariance(nn.Module):
    """Gaussian head with state-independent learned covariance.

    This link is intended to be attached to a neural network that outputs
    the mean of a Gaussian policy. The only learnable parameter this link has
    determines the variance in a state-independent way.

    State-independent parameterization of the variance of a Gaussian policy
    is often used with PPO and TRPO, e.g., in https://arxiv.org/abs/1709.06560.

    Args:
        action_size (int): Number of dimensions of the action space.
        var_type (str): Type of parameterization of variance. It must be
            'spherical' or 'diagonal'.
        var_func (callable): Callable that computes the variance from the var
            parameter. It should always return positive values.
        var_param_init (float): Initial value the var parameter.
    """

    def __init__(
        self,
        action_size,
        var_type="spherical",
        var_func=nn.functional.softplus,
        var_param_init=0,
    ):
        super().__init__()

        self.var_func = var_func
        var_size = {"spherical": 1, "diagonal": action_size}[var_type]

        self.var_param = nn.Parameter(
            torch.tensor(
                np.broadcast_to(var_param_init, var_size),
                dtype=torch.float,
            )
        )

    def forward(self, mean):
        """Return a Gaussian with given mean.

        Args:
            mean (torch.Tensor or ndarray): Mean of Gaussian.

        Returns:
            torch.distributions.Distribution: Gaussian whose mean is the
                mean argument and whose variance is computed from the parameter
                of this link.
        """
        var = self.var_func(self.var_param)
        return torch.distributions.Independent(
            torch.distributions.Normal(loc=mean, scale=torch.sqrt(var)), 1
        )


[docs]class GaussianHeadWithDiagonalCovariance(nn.Module):
    """Gaussian head with diagonal covariance.

    This module is intended to be attached to a neural network that outputs
    a vector that is twice the size of an action vector. The vector is split
    and interpreted as the mean and diagonal covariance of a Gaussian policy.

    Args:
        var_func (callable): Callable that computes the variance
            from the second input. It should always return positive values.
    """

    def __init__(self, var_func=nn.functional.softplus):
        super().__init__()
        self.var_func = var_func

    def forward(self, mean_and_var):
        """Return a Gaussian with given mean and diagonal covariance.

        Args:
            mean_and_var (torch.Tensor): Vector that is twice the size of an
                action vector.

        Returns:
            torch.distributions.Distribution: Gaussian distribution with given
                mean and diagonal covariance.
        """
        assert mean_and_var.ndim == 2
        mean, pre_var = mean_and_var.chunk(2, dim=1)
        scale = self.var_func(pre_var).sqrt()
        return torch.distributions.Independent(
            torch.distributions.Normal(loc=mean, scale=scale), 1
        )


[docs]class GaussianHeadWithFixedCovariance(nn.Module):
    """Gaussian head with fixed covariance.

    This module is intended to be attached to a neural network that outputs
    the mean of a Gaussian policy. Its covariance is fixed to a diagonal matrix
    with a given scale.

    Args:
        scale (float): Scale parameter.
    """

    def __init__(self, scale=1):
        super().__init__()
        self.scale = scale

    def forward(self, mean):
        """Return a Gaussian with given mean.

        Args:
            mean (torch.Tensor): Batch of mean vectors.

        Returns:
            torch.distributions.Distribution: Multivariate Gaussian whose mean
                is the mean argument and whose scale is fixed.
        """
        return torch.distributions.Independent(
            torch.distributions.Normal(loc=mean, scale=self.scale), 1
        )