• BindsNET学习系列 ——Reward


    相关源码:bindsnet/bindsnet/learning/reward.py

    1、AbstractReward

    class AbstractReward(ABC):
        # language=rst
        """
        Abstract base class for reward computation.
        """
    
        @abstractmethod
        def compute(self, **kwargs) -> None:
            # language=rst
            """
            Computes/modifies reward.
            """
            pass
    
        @abstractmethod
        def update(self, **kwargs) -> None:
            # language=rst
            """
            Updates internal variables needed to modify reward. Usually called once per
            episode.
            """
            pass
    View Code

    2、MovingAvgRPE

    class MovingAvgRPE(AbstractReward):
        # language=rst
        """
        Computes reward prediction error (RPE) based on an exponential moving average (EMA)
        of past rewards.
        """
    
        def __init__(self, **kwargs) -> None:
            # language=rst
            """
            Constructor for EMA reward prediction error.
            """
            self.reward_predict = torch.tensor(0.0)  # Predicted reward (per step).
            self.reward_predict_episode = torch.tensor(0.0)  # Predicted reward per episode.
            self.rewards_predict_episode = (
                []
            )  # List of predicted rewards per episode (used for plotting).
    
        def compute(self, **kwargs) -> torch.Tensor:
            # language=rst
            """
            Computes the reward prediction error using EMA.
    
            Keyword arguments:
    
            :param Union[float, torch.Tensor] reward: Current reward.
            :return: Reward prediction error.
            """
            # Get keyword arguments.
            reward = kwargs["reward"]
    
            return reward - self.reward_predict
    
        def update(self, **kwargs) -> None:
            # language=rst
            """
            Updates the EMAs. Called once per episode.
    
            Keyword arguments:
    
            :param Union[float, torch.Tensor] accumulated_reward: Reward accumulated over
                one episode.
            :param int steps: Steps in that episode.
            :param float ema_window: Width of the averaging window.
            """
            # Get keyword arguments.
            accumulated_reward = kwargs["accumulated_reward"]
            steps = torch.tensor(kwargs["steps"]).float()
            ema_window = torch.tensor(kwargs.get("ema_window", 10.0))
    
            # Compute average reward per step.
            reward = accumulated_reward / steps
    
            # Update EMAs.
            self.reward_predict = (
                1 - 1 / ema_window
            ) * self.reward_predict + 1 / ema_window * reward
            self.reward_predict_episode = (
                1 - 1 / ema_window
            ) * self.reward_predict_episode + 1 / ema_window * accumulated_reward
            self.rewards_predict_episode.append(self.reward_predict_episode.item())
    View Code
  • 相关阅读:
    主席树学习记录
    P1072 Hanson 的趣味题 题解
    好文章收集
    计算几何专题
    小问题
    CSP-S2020题解
    上下界网络流
    想到的无法解决的点子
    省选联考2020组合数问题
    省选数学复习
  • 原文地址:https://www.cnblogs.com/lucifer1997/p/14755445.html
Copyright © 2020-2023  润新知