"""Collection of reward functions"""
from abc import abstractmethod
import numpy as np
[docs]class RewardMixin:
"""Reward function collection"""
[docs] @abstractmethod
def expression_complexity(self, state):
"""Get the graph / expression complexity for a given state. This is
equal to number_of_nodes + number_of_edges"""
[docs] def diff_loss_reward(self, state_old, state_new):
"""
Reward is decrease in complexity
Parameters
----------
state_old : str
String representation of last state
state_new : str
String representation of new state
Returns
-------
reward : int
Difference between loss for state_new and state_old
"""
loss_old = self.expression_complexity(state_old)
loss_new = self.expression_complexity(state_new)
return loss_old - loss_new
# pylint: disable=unused-argument
[docs] def sub_loss_reward(self, state_old, state_new):
"""
Reward is decrease in complexity
Parameters
----------
state_old : str
String representation of last state
state_new : str
String representation of new state
Returns
-------
reward : int
Difference between loss for state_new and state_old
"""
loss_new = self.expression_complexity(state_new)
return -1 * loss_new
# pylint: disable=unused-argument
[docs] def exp_loss_reward(self, state_old, state_new):
"""
Reward is decrease in complexity
Parameters
----------
state_old : str
String representation of last state
state_new : str
String representation of new state
Returns
-------
reward : int
Difference between loss for state_new and state_old
"""
loss_new = self.expression_complexity(state_new)
return np.exp(-loss_new)
# pylint: disable=unused-argument
[docs] def inv_loss_reward(self, state_old, state_new):
"""
Reward is decrease in complexity
Parameters
----------
state_old : str
String representation of last state
state_new : str
String representation of new state
Returns
-------
reward : int
Difference between loss for state_new and state_old
"""
loss_new = self.expression_complexity(state_new)
return 1 / (1 + loss_new)