Slide 11
Slide 11 text
ڥͷͭ͘Γ͔ͨ (1/2)
• ࣗͰఆٛ͢Δ߹
• ڥͷॳظԽɿenv.reset
• ڥͷ࣮ߦɿenv.step
• ྫɿͯήʔϜ
• ॳظԽ࣌ʹൿີͷΛܾΊΔ
• ΤʔδΣϯτͷਪଌ (ʮߦಈʯ)
ͷେখΛʮ؍ଌʯͱͯ͠ฦ͢
class GuessNumberEnv (object):
def reset(self):
self._state = np.random.uniform(-1, 1)
obs = np.array([0, 0, 0, 1], dtype=np.float32)
return obs
def step(self, action):
assert action.shape == (1,)
diff = action[0] - self._state
obs = np.array([0, 0, 0, 0], dtype=np.float32)
obs[1 + int(np.sign(diff))] = 1
reward = np.random.normal(0, 1) - abs(diff)
return obs, reward, False, None # not done, no info