-
Notifications
You must be signed in to change notification settings - Fork 0
/
reward_calculator.py
93 lines (58 loc) · 2.72 KB
/
reward_calculator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from learning_advisor.learning_utils.swirl_com import b_to_mb
class RewardCalculator(object):
def __init__(self):
self.reset()
def reset(self):
self.accumulated_reward = 0
def calculate_reward(self, environment_state):
current_cost = environment_state["current_cost"]
previous_cost = environment_state["previous_cost"]
initial_cost = environment_state["initial_cost"]
new_index_size = environment_state["new_index_size"]
assert new_index_size is not None
reward = self._calculate_reward(current_cost, previous_cost, initial_cost, new_index_size)
self.accumulated_reward += reward
return reward
def _calculate_reward(self, current_cost, previous_cost, initial_cost, new_index_size):
raise NotImplementedError
class AbsoluteDifferenceRelativeToStorageReward(RewardCalculator):
def __init__(self):
RewardCalculator.__init__(self)
def _calculate_reward(self, current_cost, previous_cost, initial_cost, new_index_size):
reward = (previous_cost - current_cost) / new_index_size
return reward
class AbsoluteDifferenceToPreviousReward(RewardCalculator):
def __init__(self):
RewardCalculator.__init__(self)
def _calculate_reward(self, current_cost, previous_cost, initial_cost, new_index_size):
reward = previous_cost - current_cost
return reward
class RelativeDifferenceToPreviousReward(RewardCalculator):
def __init__(self):
RewardCalculator.__init__(self)
def _calculate_reward(self, current_cost, previous_cost, initial_cost, new_index_size):
reward = (previous_cost - current_cost) / initial_cost
return reward
class RelativeDifferenceRelativeToStorageReward(RewardCalculator):
def __init__(self):
RewardCalculator.__init__(self)
self.SCALER = 1
def _calculate_reward(self, current_cost, previous_cost, initial_cost, new_index_size):
assert new_index_size > 0
if initial_cost == 0:
reward = 0
else:
reward = ((previous_cost - current_cost) / initial_cost) / b_to_mb(new_index_size) * self.SCALER
return reward
class DRLindaReward(RewardCalculator):
def __init__(self):
RewardCalculator.__init__(self)
def _calculate_reward(self, current_cost, previous_cost, initial_cost, new_index_size):
reward = ((initial_cost - current_cost) / initial_cost) * 100
return reward
class DQNReward(RewardCalculator):
def __init__(self):
RewardCalculator.__init__(self)
def _calculate_reward(self, current_cost, previous_cost, initial_cost, new_index_size):
reward = ((previous_cost - current_cost) / initial_cost)
return reward