PPO2 from stable_baselines.common.vec_env import DummyVecEnv from stable_baselines.common import set_global_seeds from stable_baselines.bench import Monitor from util import callback, log_dir ENV_ID = 'BreakoutNoFrameskip-v0' NUM_ENV = 8 def make_env(env_id, rank, seed=0): def _init(): env = gym.make(env_id) if rank == 0: env = Monitor(env, log_dir, allow_early_resets=True) env.seed(seed + rank) return env set_global_seeds(seed) return _init def main(): train_env = DummyVecEnv([make_env(ENV_ID, i) for i in range(NUM_ENV)]) model = PPO2('CnnPolicy', train_env, verbose=0) model.learn(total_timesteps=1280000, callback=callback) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25