Fix bug for generating dataset (#2)

* update parameters for scripts * update write function * modify waymo script * use exist ok instead of overwrite * remove TODO * rename to comvine_dataset * use exist_ok and force_overwrite together * format * test * creat env for each thread * restore * fix bug * fix pg bug * fix * fix bug * add assert * don't return done info * to dict * add test * only compare sdc * no store mao * release memory * add start index to argumen * test * format some settings/flags * add tmp path * add tmp dir * test all scripts * suppress warning * suppress warning * format * test memory leak * fix memory leak * remove useless functions * imap * thread-1 process for avoiding memory leak * add list() * rename * verify existence * verify completeness * test * add test * add default value * add limit * use script * add anotation * test script * fix bug * fix bug * add author4 * add overwrite * fix bug * fix * combine overwrite * fix bug * gpu007 * add result save dir * adjust sequence * fix test bug * disable bash scri[t * add episode length limit * move scripts to root dir * format * fix test
2023-05-13 15:15:31 +01:00
parent 944c7f3707
commit 37d4e72db5
35 changed files with 592 additions and 258 deletions
--- a/scenarionet/verifier/utils.py
+++ b/scenarionet/verifier/utils.py
@@ -4,12 +4,14 @@ import os

 import numpy as np

+from scenarionet.common_utils import read_scenario, read_dataset_summary
 from scenarionet.verifier.error import ErrorDescription as ED
 from scenarionet.verifier.error import ErrorFile as EF

 logger = logging.getLogger(__name__)
 import tqdm
 from metadrive.envs.scenario_env import ScenarioEnv
+from metadrive.scenario.scenario_description import ScenarioDescription as SD
 from metadrive.policy.replay_policy import ReplayEgoCarPolicy
 from metadrive.scenario.utils import get_number_of_scenarios
 from functools import partial
@@ -23,9 +25,16 @@ def set_random_drop(drop):
    RANDOM_DROP = drop


-def verify_loading_into_metadrive(dataset_path, result_save_dir, steps_to_run=1000, num_workers=8):
+def verify_dataset(dataset_path, result_save_dir, overwrite=False, num_workers=8, steps_to_run=1000):
+    global RANDOM_DROP
    assert os.path.isdir(result_save_dir), "result_save_dir must be a dir, get {}".format(result_save_dir)
    os.makedirs(result_save_dir, exist_ok=True)
+    error_file_name = EF.get_error_file_name(dataset_path)
+    if os.path.exists(os.path.join(result_save_dir, error_file_name)) and not overwrite:
+        raise FileExistsError(
+            "An error_file already exists in result_save_directory. "
+            "Setting overwrite=True to cancel this alert"
+        )
    num_scenario = get_number_of_scenarios(dataset_path)
    if num_scenario < num_workers:
        # single process
@@ -34,7 +43,7 @@ def verify_loading_into_metadrive(dataset_path, result_save_dir, steps_to_run=10

    # prepare arguments
    argument_list = []
-    func = partial(loading_wrapper, dataset_path=dataset_path, steps_to_run=steps_to_run)
+    func = partial(loading_wrapper, dataset_path=dataset_path, steps_to_run=steps_to_run, random_drop=RANDOM_DROP)

    num_scenario_each_worker = int(num_scenario // num_workers)
    for i in range(num_workers):
@@ -64,53 +73,75 @@ def verify_loading_into_metadrive(dataset_path, result_save_dir, steps_to_run=10
    return success, errors


-def loading_into_metadrive(start_scenario_index, num_scenario, dataset_path, steps_to_run, metadrive_config=None):
-    global RANDOM_DROP
+def loading_into_metadrive(
+    start_scenario_index, num_scenario, dataset_path, steps_to_run, metadrive_config=None, random_drop=False
+):
    logger.info(
        "================ Begin Scenario Loading Verification for scenario {}-{} ================ \n".format(
            start_scenario_index, num_scenario + start_scenario_index
        )
    )
    success = True
-    metadrive_config = metadrive_config or {}
-    metadrive_config.update(
-        {
-            "agent_policy": ReplayEgoCarPolicy,
-            "num_scenarios": num_scenario,
-            "horizon": 1000,
-            "start_scenario_index": start_scenario_index,
-            "no_static_vehicles": False,
-            "data_directory": dataset_path,
-        }
-    )
-    env = ScenarioEnv(metadrive_config)
-    logging.disable(logging.INFO)
    error_msgs = []
-    desc = "Scenarios: {}-{}".format(start_scenario_index, start_scenario_index + num_scenario)
-    for scenario_index in tqdm.tqdm(range(start_scenario_index, start_scenario_index + num_scenario), desc=desc):
-        try:
-            env.reset(force_seed=scenario_index)
-            arrive = False
-            if RANDOM_DROP and np.random.rand() < 0.5:
-                raise ValueError("Random Drop")
-            for _ in range(steps_to_run):
-                o, r, d, info = env.step([0, 0])
-                if d and info["arrive_dest"]:
-                    arrive = True
-            assert arrive, "Can not arrive destination"
-        except Exception as e:
-            file_name = env.engine.data_manager.summary_lookup[scenario_index]
-            file_path = os.path.join(dataset_path, env.engine.data_manager.mapping[file_name], file_name)
-            error_msg = ED.make(scenario_index, file_path, file_name, str(e))
-            error_msgs.append(error_msg)
-            success = False
-            # proceed to next scenario
-            continue

-    env.close()
+    if steps_to_run == 0:
+        summary, scenarios, mapping = read_dataset_summary(dataset_path)
+        index_count = -1
+        for file_name in tqdm.tqdm(scenarios):
+            index_count += 1
+            try:
+                scenario = read_scenario(dataset_path, mapping, file_name)
+                SD.sanity_check(scenario)
+                if random_drop and np.random.rand() < 0.5:
+                    raise ValueError("Random Drop")
+            except Exception as e:
+                file_path = os.path.join(dataset_path, mapping[file_name], file_name)
+                error_msg = ED.make(index_count, file_path, file_name, str(e))
+                error_msgs.append(error_msg)
+                success = False
+                # proceed to next scenario
+                continue
+    else:
+        metadrive_config = metadrive_config or {}
+        metadrive_config.update(
+            {
+                "agent_policy": ReplayEgoCarPolicy,
+                "num_scenarios": num_scenario,
+                "horizon": 1000,
+                "start_scenario_index": start_scenario_index,
+                "no_static_vehicles": False,
+                "data_directory": dataset_path,
+            }
+        )
+        env = ScenarioEnv(metadrive_config)
+        logging.disable(logging.INFO)
+        desc = "Scenarios: {}-{}".format(start_scenario_index, start_scenario_index + num_scenario)
+        for scenario_index in tqdm.tqdm(range(start_scenario_index, start_scenario_index + num_scenario), desc=desc):
+            try:
+                env.reset(force_seed=scenario_index)
+                arrive = False
+                if random_drop and np.random.rand() < 0.5:
+                    raise ValueError("Random Drop")
+                for _ in range(steps_to_run):
+                    o, r, d, info = env.step([0, 0])
+                    if d and info["arrive_dest"]:
+                        arrive = True
+                assert arrive, "Can not arrive destination"
+            except Exception as e:
+                file_name = env.engine.data_manager.summary_lookup[scenario_index]
+                file_path = os.path.join(dataset_path, env.engine.data_manager.mapping[file_name], file_name)
+                error_msg = ED.make(scenario_index, file_path, file_name, str(e))
+                error_msgs.append(error_msg)
+                success = False
+                # proceed to next scenario
+                continue
+
+        env.close()
    return success, error_msgs


-def loading_wrapper(arglist, dataset_path, steps_to_run):
+def loading_wrapper(arglist, dataset_path, steps_to_run, random_drop):
    assert len(arglist) == 2, "Too much arguments!"
-    return loading_into_metadrive(arglist[0], arglist[1], dataset_path=dataset_path, steps_to_run=steps_to_run)
+    return loading_into_metadrive(
+        arglist[0], arglist[1], dataset_path=dataset_path, steps_to_run=steps_to_run, random_drop=random_drop
+    )