Source code for rl_equation_solver.utilities.reward

"""Collection of reward functions"""
from abc import abstractmethod
import numpy as np


[docs]class RewardMixin:
    """Reward function collection"""

[docs]    @abstractmethod
    def expression_complexity(self, state):
        """Get the graph / expression complexity for a given state. This is
        equal to number_of_nodes + number_of_edges"""

[docs]    def diff_loss_reward(self, state_old, state_new):
        """
        Reward is decrease in complexity

        Parameters
        ----------
        state_old : str
            String representation of last state
        state_new : str
            String representation of new state

        Returns
        -------
        reward : int
            Difference between loss for state_new and state_old
        """
        loss_old = self.expression_complexity(state_old)
        loss_new = self.expression_complexity(state_new)
        return loss_old - loss_new

    # pylint: disable=unused-argument
[docs]    def sub_loss_reward(self, state_old, state_new):
        """
        Reward is decrease in complexity

        Parameters
        ----------
        state_old : str
            String representation of last state
        state_new : str
            String representation of new state

        Returns
        -------
        reward : int
            Difference between loss for state_new and state_old
        """
        loss_new = self.expression_complexity(state_new)
        return -1 * loss_new

    # pylint: disable=unused-argument
[docs]    def exp_loss_reward(self, state_old, state_new):
        """
        Reward is decrease in complexity

        Parameters
        ----------
        state_old : str
            String representation of last state
        state_new : str
            String representation of new state

        Returns
        -------
        reward : int
            Difference between loss for state_new and state_old
        """
        loss_new = self.expression_complexity(state_new)
        return np.exp(-loss_new)

    # pylint: disable=unused-argument
[docs]    def inv_loss_reward(self, state_old, state_new):
        """
        Reward is decrease in complexity

        Parameters
        ----------
        state_old : str
            String representation of last state
        state_new : str
            String representation of new state

        Returns
        -------
        reward : int
            Difference between loss for state_new and state_old
        """
        loss_new = self.expression_complexity(state_new)
        return 1 / (1 + loss_new)