scenarionet/scenarionet_training/train_utils/multi_worker_PPO.py

import logging
from typing import Callable, Type

from ray.rllib.agents.ppo.ppo import PPOTrainer
from ray.rllib.env.env_context import EnvContext
from ray.rllib.policy import Policy
from ray.rllib.utils.typing import TrainerConfigDict, \
    EnvType

from scenarionet_training.train_utils.anisotropic_workerset import AnisotropicWorkerSet

logger = logging.getLogger(__name__)


class MultiWorkerPPO(PPOTrainer):
    """
    In this class, each work will have different config for speeding up and saving memory. More importantly, it can
    allow us to cover all test/train cases more evenly
    """

    def _make_workers(self, env_creator: Callable[[EnvContext], EnvType],
                      policy_class: Type[Policy], config: TrainerConfigDict,
                      num_workers: int):
        """Default factory method for a WorkerSet running under this Trainer.

        Override this method by passing a custom `make_workers` into
        `build_trainer`.

        Args:
            env_creator (callable): A function that return and Env given an env
                config.
            policy (Type[Policy]): The Policy class to use for creating the
                policies of the workers.
            config (TrainerConfigDict): The Trainer's config.
            num_workers (int): Number of remote rollout workers to create.
                0 for local only.

        Returns:
            WorkerSet: The created WorkerSet.
        """
        return AnisotropicWorkerSet(
            env_creator=env_creator,
            policy_class=policy_class,
            trainer_config=config,
            num_workers=num_workers,
            logdir=self.logdir)
Add come updates for Neurips paper (#4) * scenarionet training * wandb * train utils * fix callback * run PPO * use pg test * save path * use torch * add dependency * update ignore * update training * large model * use curriculum training * add time to exp name * storage_path * restore * update training * use my key * add log message * check seed * restore callback * restore call bacl * add log message * add logging message * restore ray1.4 * length 500 * ray 100 * wandb * use tf * more levels * add callback * 10 worker * show level * no env horizon * callback result level * more call back * add diffuculty * add mroen stat * mroe stat * show levels * add callback * new * ep len 600 * fix setup * fix stepup * fix to 3.8 * update setup * parallel worker! * new exp * add callback * lateral dist * pg dataset * evaluate * modify config * align config * train single RL * update training script * 100w eval * less eval to reveal * 2000 env eval * new trianing * eval 1000 * update eval * more workers * more worker * 20 worker * dataset to database * split tool! * split dataset * try fix * train 003 * fix mapping * fix test * add waymo tqdm * utils * fix bug * fix bug * waymo * int type * 8 worker read * disable * read file * add log message * check existence * dist 0 * int * check num * suprass warning * add filter API * filter * store map false * new * ablation * filter * fix * update filyter * reanme to from * random select * add overlapping checj * fix * new training sceheme * new reward * add waymo train script * waymo different config * copy raw data * fix bug * add tqdm * update readme * waymo * pg * max lateral dist 3 * pg * crash_done instead of penalty * no crash done * gpu * update eval script * steering range penalty * evaluate * finish pg * update setup * fix bug * test * fix * add on line * train nuplan * generate sensor * udpate training * static obj * multi worker eval * filx bug * use ray for testing * eval! * filter senario * id filter * fox bug * dist = 2 * filter * eval * eval ret * ok * update training pg * test before use * store data=False * collect figures * capture pic --------- Co-authored-by: Quanyi Li <quanyi@bolei-gpu02.cs.ucla.edu> 2023-06-10 18:56:33 +01:00			`import logging`
			`from typing import Callable, Type`

			`from ray.rllib.agents.ppo.ppo import PPOTrainer`
			`from ray.rllib.env.env_context import EnvContext`
			`from ray.rllib.policy import Policy`
			`from ray.rllib.utils.typing import TrainerConfigDict, \`
			`EnvType`

			`from scenarionet_training.train_utils.anisotropic_workerset import AnisotropicWorkerSet`

			`logger = logging.getLogger(__name__)`


			`class MultiWorkerPPO(PPOTrainer):`
			`"""`
			`In this class, each work will have different config for speeding up and saving memory. More importantly, it can`
			`allow us to cover all test/train cases more evenly`
			`"""`

			`def _make_workers(self, env_creator: Callable[[EnvContext], EnvType],`
			`policy_class: Type[Policy], config: TrainerConfigDict,`
			`num_workers: int):`
			`"""Default factory method for a WorkerSet running under this Trainer.`

			Override this method by passing a custom `make_workers` into
			`build_trainer`.

			`Args:`
			`env_creator (callable): A function that return and Env given an env`
			`config.`
			`policy (Type[Policy]): The Policy class to use for creating the`
			`policies of the workers.`
			`config (TrainerConfigDict): The Trainer's config.`
			`num_workers (int): Number of remote rollout workers to create.`
			`0 for local only.`

			`Returns:`
			`WorkerSet: The created WorkerSet.`
			`"""`
			`return AnisotropicWorkerSet(`
			`env_creator=env_creator,`
			`policy_class=policy_class,`
			`trainer_config=config,`
			`num_workers=num_workers,`
			`logdir=self.logdir)`