Add come updates for Neurips paper (#4)

* scenarionet training

* wandb

* train utils

* fix callback

* run PPO

* use pg test

* save path

* use torch

* add dependency

* update ignore

* update training

* large model

* use curriculum training

* add time to exp name

* storage_path

* restore

* update training

* use my key

* add log message

* check seed

* restore callback

* restore call bacl

* add log message

* add logging message

* restore ray1.4

* length 500

* ray 100

* wandb

* use tf

* more levels

* add callback

* 10 worker

* show level

* no env horizon

* callback result level

* more call back

* add diffuculty

* add mroen stat

* mroe stat

* show levels

* add callback

* new

* ep len 600

* fix setup

* fix stepup

* fix to 3.8

* update setup

* parallel worker!

* new exp

* add callback

* lateral dist

* pg dataset

* evaluate

* modify config

* align config

* train single RL

* update training script

* 100w eval

* less eval to reveal

* 2000 env eval

* new trianing

* eval 1000

* update eval

* more workers

* more worker

* 20 worker

* dataset to database

* split tool!

* split dataset

* try fix

* train 003

* fix mapping

* fix test

* add waymo tqdm

* utils

* fix bug

* fix bug

* waymo

* int type

* 8 worker read

* disable

* read file

* add log message

* check existence

* dist 0

* int

* check num

* suprass warning

* add filter API

* filter

* store map false

* new

* ablation

* filter

* fix

* update filyter

* reanme to from

* random select

* add overlapping checj

* fix

* new training sceheme

* new reward

* add waymo train script

* waymo different config

* copy raw data

* fix bug

* add tqdm

* update readme

* waymo

* pg

* max lateral dist 3

* pg

* crash_done instead of penalty

* no crash done

* gpu

* update eval script

* steering range penalty

* evaluate

* finish pg

* update setup

* fix bug

* test

* fix

* add on line

* train nuplan

* generate sensor

* udpate training

* static obj

* multi worker eval

* filx bug

* use ray for testing

* eval!

* filter senario

* id filter

* fox bug

* dist = 2

* filter

* eval

* eval ret

* ok

* update training pg

* test before use

* store data=False

* collect figures

* capture pic

---------

Co-authored-by: Quanyi Li <quanyi@bolei-gpu02.cs.ucla.edu>
This commit is contained in:
Quanyi Li
2023-06-10 18:56:33 +01:00
committed by GitHub
parent 41c0b01f39
commit db50bca7fd
53 changed files with 2274 additions and 133 deletions

View File

View File

@@ -0,0 +1,23 @@
from scenarionet_training.scripts.train_nuplan import config
from scenarionet_training.train_utils.utils import eval_ckpt
if __name__ == '__main__':
# 27 29 30 37 39
ckpt_path = "C:\\Users\\x1\\Desktop\\checkpoint_510\\checkpoint-510"
scenario_data_path = "D:\\scenarionet_testset\\nuplan_test\\nuplan_test_w_raw"
num_scenarios = 2000
start_scenario_index = 0
horizon = 600
render = False
explore = True # PPO is a stochastic policy, turning off exploration can reduce jitter but may harm performance
log_interval = 10
eval_ckpt(config,
ckpt_path,
scenario_data_path,
num_scenarios,
start_scenario_index,
horizon,
render,
explore,
log_interval)

View File

@@ -0,0 +1,27 @@
import os.path
from scenarionet import SCENARIONET_DATASET_PATH
from scenarionet_training.scripts.train_pg import config
from scenarionet_training.train_utils.utils import eval_ckpt
if __name__ == '__main__':
# Merge all evaluate script
# 10/15/20/26/30/31/32
ckpt_path = "C:\\Users\\x1\\Desktop\\checkpoint_330\\checkpoint-330"
scenario_data_path = os.path.join(SCENARIONET_DATASET_PATH, "pg_2000")
num_scenarios = 2000
start_scenario_index = 0
horizon = 600
render = False
explore = True # PPO is a stochastic policy, turning off exploration can reduce jitter but may harm performance
log_interval = 2
eval_ckpt(config,
ckpt_path,
scenario_data_path,
num_scenarios,
start_scenario_index,
horizon,
render,
explore,
log_interval)

View File

@@ -0,0 +1,22 @@
from scenarionet_training.scripts.train_waymo import config
from scenarionet_training.train_utils.utils import eval_ckpt
if __name__ == '__main__':
ckpt_path = "C:\\Users\\x1\\Desktop\\checkpoint_170\\checkpoint-170"
scenario_data_path = "D:\\scenarionet_testset\\waymo_test_raw_data"
num_scenarios = 2000
start_scenario_index = 0
horizon = 600
render = True
explore = True # PPO is a stochastic policy, turning off exploration can reduce jitter but may harm performance
log_interval = 2
eval_ckpt(config,
ckpt_path,
scenario_data_path,
num_scenarios,
start_scenario_index,
horizon,
render,
explore,
log_interval)

View File

@@ -0,0 +1,80 @@
import os.path
from metadrive.envs.scenario_env import ScenarioEnv
from scenarionet import SCENARIONET_REPO_PATH, SCENARIONET_DATASET_PATH
from scenarionet_training.train_utils.multi_worker_PPO import MultiWorkerPPO
from scenarionet_training.train_utils.utils import train, get_train_parser, get_exp_name
if __name__ == '__main__':
env = ScenarioEnv
args = get_train_parser().parse_args()
exp_name = get_exp_name(args)
stop = int(100_000_000)
config = dict(
env=env,
env_config=dict(
# scenario
start_scenario_index=0,
num_scenarios=32,
data_directory=os.path.join(SCENARIONET_DATASET_PATH, "pg"),
sequential_seed=True,
# traffic & light
reactive_traffic=False,
no_static_vehicles=True,
no_light=True,
static_traffic_object=True,
# curriculum training
curriculum_level=4,
target_success_rate=0.8,
# training
horizon=None,
use_lateral_reward=True,
),
# # ===== Evaluation =====
evaluation_interval=2,
evaluation_num_episodes=32,
evaluation_config=dict(env_config=dict(start_scenario_index=32,
num_scenarios=32,
sequential_seed=True,
curriculum_level=1, # turn off
data_directory=os.path.join(SCENARIONET_DATASET_PATH, "pg"))),
evaluation_num_workers=2,
metrics_smoothing_episodes=10,
# ===== Training =====
model=dict(fcnet_hiddens=[512, 256, 128]),
horizon=600,
num_sgd_iter=20,
lr=5e-5,
rollout_fragment_length=500,
sgd_minibatch_size=100,
train_batch_size=4000,
num_gpus=0.5 if args.num_gpus != 0 else 0,
num_cpus_per_worker=0.4,
num_cpus_for_driver=1,
num_workers=2,
framework="tf"
)
train(
MultiWorkerPPO,
exp_name=exp_name,
save_dir=os.path.join(SCENARIONET_REPO_PATH, "experiment"),
keep_checkpoints_num=5,
stop=stop,
config=config,
num_gpus=args.num_gpus,
# num_seeds=args.num_seeds,
num_seeds=1,
# test_mode=args.test,
# local_mode=True,
# TODO remove this when we release our code
# wandb_key_file="~/wandb_api_key_file.txt",
wandb_project="scenarionet",
)

View File

@@ -0,0 +1,74 @@
import argparse
import pickle
import json
import os
import numpy as np
from scenarionet_training.scripts.train_nuplan import config
from scenarionet_training.train_utils.callbacks import DrivingCallbacks
from scenarionet_training.train_utils.multi_worker_PPO import MultiWorkerPPO
from scenarionet_training.train_utils.utils import initialize_ray
class NumpyEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, np.ndarray):
return obj.tolist()
elif isinstance(obj, np.int32):
return int(obj)
elif isinstance(obj, np.int64):
return int(obj)
return json.JSONEncoder.default(self, obj)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--start_index", type=int, default=0)
parser.add_argument("--ckpt_path", type=str, required=True)
parser.add_argument("--database_path", type=str, required=True)
parser.add_argument("--id", type=str, default="")
parser.add_argument("--num_scenarios", type=int, default=5000)
parser.add_argument("--num_workers", type=int, default=10)
parser.add_argument("--horizon", type=int, default=600)
parser.add_argument("--allowed_more_steps", type=int, default=50)
parser.add_argument("--max_lateral_dist", type=int, default=2.5)
parser.add_argument("--overwrite", action="store_true")
args = parser.parse_args()
file = "eval_{}_{}_{}".format(args.id, os.path.basename(args.ckpt_path), os.path.basename(args.database_path))
if os.path.exists(file) and not args.overwrite:
raise FileExistsError("Please remove {} or set --overwrite".format(file))
initialize_ray(test_mode=True, num_gpus=1)
config["callbacks"] = DrivingCallbacks
config["evaluation_num_workers"] = args.num_workers
config["evaluation_num_episodes"] = args.num_scenarios
config["metrics_smoothing_episodes"] = args.num_scenarios
config["custom_eval_function"] = None
config["num_workers"] = 0
config["evaluation_config"]["env_config"].update(dict(
start_scenario_index=args.start_index,
num_scenarios=args.num_scenarios,
sequential_seed=True,
store_map=False,
store_data=False,
allowed_more_steps=args.allowed_more_steps,
# no_map=True,
max_lateral_dist=args.max_lateral_dist,
curriculum_level=1, # disable curriculum
target_success_rate=1,
horizon=args.horizon,
episodes_to_evaluate_curriculum=args.num_scenarios,
data_directory=args.database_path,
use_render=False))
trainer = MultiWorkerPPO(config)
trainer.restore(args.ckpt_path)
ret = trainer._evaluate()["evaluation"]
with open(file + ".json", "w") as f:
json.dump(ret, f, cls=NumpyEncoder)
with open(file + ".pkl", "wb+") as f:
pickle.dump(ret, f)

View File

@@ -0,0 +1,96 @@
import os.path
from metadrive.envs.scenario_env import ScenarioEnv
from scenarionet import SCENARIONET_REPO_PATH, SCENARIONET_DATASET_PATH
from scenarionet_training.train_utils.multi_worker_PPO import MultiWorkerPPO
from scenarionet_training.train_utils.utils import train, get_train_parser, get_exp_name
config = dict(
env=ScenarioEnv,
env_config=dict(
# scenario
start_scenario_index=0,
num_scenarios=40000,
data_directory=os.path.join(SCENARIONET_DATASET_PATH, "nuplan_train"),
sequential_seed=True,
# curriculum training
curriculum_level=100,
target_success_rate=0.8, # or 0.7
# episodes_to_evaluate_curriculum=400, # default=num_scenarios/curriculum_level
# traffic & light
reactive_traffic=True,
no_static_vehicles=True,
no_light=True,
static_traffic_object=True,
# training scheme
horizon=None,
driving_reward=4,
steering_range_penalty=1.0,
heading_penalty=2,
lateral_penalty=2.0,
no_negative_reward=True,
on_lane_line_penalty=0,
crash_vehicle_penalty=2,
crash_human_penalty=2,
crash_object_penalty=0.5,
# out_of_road_penalty=2,
max_lateral_dist=2,
# crash_vehicle_done=True,
vehicle_config=dict(side_detector=dict(num_lasers=0))
),
# ===== Evaluation =====
evaluation_interval=15,
evaluation_num_episodes=1000,
# TODO (LQY), this is a sample from testset do eval on all scenarios after training!
evaluation_config=dict(env_config=dict(start_scenario_index=0,
num_scenarios=1000,
sequential_seed=True,
curriculum_level=1, # turn off
data_directory=os.path.join(SCENARIONET_DATASET_PATH, "nuplan_test"))),
evaluation_num_workers=10,
metrics_smoothing_episodes=10,
# ===== Training =====
model=dict(fcnet_hiddens=[512, 256, 128]),
horizon=600,
num_sgd_iter=20,
lr=1e-4,
rollout_fragment_length=500,
sgd_minibatch_size=200,
train_batch_size=50000,
num_gpus=0.5,
num_cpus_per_worker=0.3,
num_cpus_for_driver=1,
num_workers=20,
framework="tf"
)
if __name__ == '__main__':
# PG data is generated with seeds 10,000 to 60,000
args = get_train_parser().parse_args()
exp_name = get_exp_name(args)
stop = int(100_000_000)
config["num_gpus"] = 0.5 if args.num_gpus != 0 else 0
train(
MultiWorkerPPO,
exp_name=exp_name,
save_dir=os.path.join(SCENARIONET_REPO_PATH, "experiment"),
keep_checkpoints_num=5,
stop=stop,
config=config,
num_gpus=args.num_gpus,
# num_seeds=args.num_seeds,
num_seeds=5,
test_mode=args.test,
# local_mode=True,
# TODO remove this when we release our code
# wandb_key_file="~/wandb_api_key_file.txt",
wandb_project="scenarionet",
)

View File

@@ -0,0 +1,95 @@
import os.path
from ray.tune import grid_search
from metadrive.envs.scenario_env import ScenarioEnv
from scenarionet import SCENARIONET_REPO_PATH, SCENARIONET_DATASET_PATH
from scenarionet_training.train_utils.multi_worker_PPO import MultiWorkerPPO
from scenarionet_training.train_utils.utils import train, get_train_parser, get_exp_name
config = dict(
env=ScenarioEnv,
env_config=dict(
# scenario
start_scenario_index=0,
num_scenarios=40000,
data_directory=os.path.join(SCENARIONET_DATASET_PATH, "pg_train"),
sequential_seed=True,
# curriculum training
curriculum_level=100,
target_success_rate=0.8,
# episodes_to_evaluate_curriculum=400, # default=num_scenarios/curriculum_level
# traffic & light
reactive_traffic=False,
no_static_vehicles=True,
no_light=True,
static_traffic_object=True,
# training scheme
horizon=None,
steering_range_penalty=2,
heading_penalty=1.0,
lateral_penalty=1.0,
no_negative_reward=True,
on_lane_line_penalty=0,
crash_vehicle_penalty=2,
crash_human_penalty=2,
out_of_road_penalty=2,
max_lateral_dist=2,
# crash_vehicle_done=True,
vehicle_config=dict(side_detector=dict(num_lasers=0))
),
# ===== Evaluation =====
evaluation_interval=15,
evaluation_num_episodes=1000,
# TODO (LQY), this is a sample from testset do eval on all scenarios after training!
evaluation_config=dict(env_config=dict(start_scenario_index=0,
num_scenarios=1000,
sequential_seed=True,
curriculum_level=1, # turn off
data_directory=os.path.join(SCENARIONET_DATASET_PATH, "pg_test"))),
evaluation_num_workers=10,
metrics_smoothing_episodes=10,
# ===== Training =====
model=dict(fcnet_hiddens=[512, 256, 128]),
horizon=600,
num_sgd_iter=20,
lr=1e-4,
rollout_fragment_length=500,
sgd_minibatch_size=200,
train_batch_size=50000,
num_gpus=0.5,
num_cpus_per_worker=0.3,
num_cpus_for_driver=1,
num_workers=20,
framework="tf"
)
if __name__ == '__main__':
# PG data is generated with seeds 10,000 to 60,000
args = get_train_parser().parse_args()
exp_name = get_exp_name(args)
stop = int(100_000_000)
config["num_gpus"] = 0.5 if args.num_gpus != 0 else 0
train(
MultiWorkerPPO,
exp_name=exp_name,
save_dir=os.path.join(SCENARIONET_REPO_PATH, "experiment"),
keep_checkpoints_num=5,
stop=stop,
config=config,
num_gpus=args.num_gpus,
# num_seeds=args.num_seeds,
num_seeds=5,
test_mode=args.test,
# local_mode=True,
# TODO remove this when we release our code
# wandb_key_file="~/wandb_api_key_file.txt",
wandb_project="scenarionet",
)

View File

@@ -0,0 +1,95 @@
import os.path
from metadrive.envs.scenario_env import ScenarioEnv
from scenarionet import SCENARIONET_REPO_PATH, SCENARIONET_DATASET_PATH
from scenarionet_training.train_utils.multi_worker_PPO import MultiWorkerPPO
from scenarionet_training.train_utils.utils import train, get_train_parser, get_exp_name
config = dict(
env=ScenarioEnv,
env_config=dict(
# scenario
start_scenario_index=0,
num_scenarios=40000,
data_directory=os.path.join(SCENARIONET_DATASET_PATH, "waymo_train"),
sequential_seed=True,
# curriculum training
curriculum_level=100,
target_success_rate=0.8,
# episodes_to_evaluate_curriculum=400, # default=num_scenarios/curriculum_level
# traffic & light
reactive_traffic=True,
no_static_vehicles=True,
no_light=True,
static_traffic_object=True,
# training scheme
horizon=None,
driving_reward=1,
steering_range_penalty=0,
heading_penalty=1,
lateral_penalty=1.0,
no_negative_reward=True,
on_lane_line_penalty=0,
crash_vehicle_penalty=2,
crash_human_penalty=2,
out_of_road_penalty=2,
max_lateral_dist=2,
# crash_vehicle_done=True,
vehicle_config=dict(side_detector=dict(num_lasers=0))
),
# ===== Evaluation =====
evaluation_interval=15,
evaluation_num_episodes=1000,
# TODO (LQY), this is a sample from testset do eval on all scenarios after training!
evaluation_config=dict(env_config=dict(start_scenario_index=0,
num_scenarios=1000,
sequential_seed=True,
curriculum_level=1, # turn off
data_directory=os.path.join(SCENARIONET_DATASET_PATH, "waymo_test"))),
evaluation_num_workers=10,
metrics_smoothing_episodes=10,
# ===== Training =====
model=dict(fcnet_hiddens=[512, 256, 128]),
horizon=600,
num_sgd_iter=20,
lr=1e-4,
rollout_fragment_length=500,
sgd_minibatch_size=200,
train_batch_size=50000,
num_gpus=0.5,
num_cpus_per_worker=0.3,
num_cpus_for_driver=1,
num_workers=20,
framework="tf"
)
if __name__ == '__main__':
# PG data is generated with seeds 10,000 to 60,000
args = get_train_parser().parse_args()
exp_name = get_exp_name(args)
stop = int(100_000_000)
config["num_gpus"] = 0.5 if args.num_gpus != 0 else 0
train(
MultiWorkerPPO,
exp_name=exp_name,
save_dir=os.path.join(SCENARIONET_REPO_PATH, "experiment"),
keep_checkpoints_num=5,
stop=stop,
config=config,
num_gpus=args.num_gpus,
# num_seeds=args.num_seeds,
num_seeds=5,
test_mode=args.test,
# local_mode=True,
# TODO remove this when we release our code
# wandb_key_file="~/wandb_api_key_file.txt",
wandb_project="scenarionet",
)