Source code for rl_equation_solver.utilities.reward

"""Collection of reward functions"""
from abc import abstractmethod
import numpy as np


[docs]class RewardMixin: """Reward function collection"""
[docs] @abstractmethod def expression_complexity(self, state): """Get the graph / expression complexity for a given state. This is equal to number_of_nodes + number_of_edges"""
[docs] def diff_loss_reward(self, state_old, state_new): """ Reward is decrease in complexity Parameters ---------- state_old : str String representation of last state state_new : str String representation of new state Returns ------- reward : int Difference between loss for state_new and state_old """ loss_old = self.expression_complexity(state_old) loss_new = self.expression_complexity(state_new) return loss_old - loss_new
# pylint: disable=unused-argument
[docs] def sub_loss_reward(self, state_old, state_new): """ Reward is decrease in complexity Parameters ---------- state_old : str String representation of last state state_new : str String representation of new state Returns ------- reward : int Difference between loss for state_new and state_old """ loss_new = self.expression_complexity(state_new) return -1 * loss_new
# pylint: disable=unused-argument
[docs] def exp_loss_reward(self, state_old, state_new): """ Reward is decrease in complexity Parameters ---------- state_old : str String representation of last state state_new : str String representation of new state Returns ------- reward : int Difference between loss for state_new and state_old """ loss_new = self.expression_complexity(state_new) return np.exp(-loss_new)
# pylint: disable=unused-argument
[docs] def inv_loss_reward(self, state_old, state_new): """ Reward is decrease in complexity Parameters ---------- state_old : str String representation of last state state_new : str String representation of new state Returns ------- reward : int Difference between loss for state_new and state_old """ loss_new = self.expression_complexity(state_new) return 1 / (1 + loss_new)