• ॳظԽ࣌ʹൿີͷΛܾΊΔ • ΤʔδΣϯτͷਪଌ (ʮߦಈʯ) ͷେখΛʮ؍ଌʯͱͯ͠ฦ͢ class GuessNumberEnv (object): def reset(self): self._state = np.random.uniform(-1, 1) obs = np.array([0, 0, 0, 1], dtype=np.float32) return obs def step(self, action): assert action.shape == (1,) diff = action[0] - self._state obs = np.array([0, 0, 0, 0], dtype=np.float32) obs[1 + int(np.sign(diff))] = 1 reward = np.random.normal(0, 1) - abs(diff) return obs, reward, False, None # not done, no info