diff --git a/README.md b/README.md index f98b618..f19a27f 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ pip install -e. # Install ScenarioNet cd ~/ # Go to the folder you want to host these two repos. -git clone git@github.com:metadriverse/scenarionet.git +git clone https://github.com/metadriverse/scenarionet.git cd scenarionet pip install -e . ``` diff --git a/documentation/example.rst b/documentation/example.rst index e156b29..918a0ae 100644 --- a/documentation/example.rst +++ b/documentation/example.rst @@ -38,7 +38,7 @@ For Waymo data, we already have the parser in ScenarioNet so just install the Te conda install protobuf==3.20 .. note:: - You may fail to install ``protobuf`` if using ``pip install protobuf==3.20``. + You may fail to install ``protobuf`` if using ``pip install protobuf==3.20``. If so, install via ``conda install protobuf=3.20``. For other datasets like nuPlan and nuScenes, you need to setup `nuplan-devkit `_ and `nuscenes-devkit `_ respectively. Guidance on how to setup these datasets and connect them with ScenarioNet can be found at :ref:`datasets`. diff --git a/documentation/waymo.rst b/documentation/waymo.rst index c0b9a64..d00dea3 100644 --- a/documentation/waymo.rst +++ b/documentation/waymo.rst @@ -35,7 +35,7 @@ First of all, we have to install tensorflow and Protobuf:: conda install protobuf==3.20 .. note:: - You may fail to install ``protobuf`` if using ``pip install protobuf==3.20``. + You may fail to install ``protobuf`` if using ``pip install protobuf==3.20``. If so, install via ``conda install protobuf=3.20``. 2. Download TFRecord @@ -79,12 +79,18 @@ The downloaded data should be stored in a directory like this:: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Run the following command to extract scenarios in any directory containing ``tfrecord``. + + Here we take converting raw data in ``training_20s`` as an example:: - python -m scenarionet.convert_waymo -d /path/to/your/database --raw_data_path ./waymo/training_20s --num_files=1000 + python -m scenarionet.convert_waymo -d /path/to/your/database --raw_data_path ./waymo/training_20s --num_workers 64 Now all converted scenarios will be placed at ``/path/to/your/database`` and are ready to be used in your work. +.. note:: + When running the conversion, please double check whether GPU is being used. This converter should NOT use GPU. + We have disable GPU usage by ``os.environ["CUDA_VISIBLE_DEVICES"] = ""``. + Known Issues: Waymo ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ N/A diff --git a/scenarionet/common_utils.py b/scenarionet/common_utils.py index 93c6229..a0d0c50 100644 --- a/scenarionet/common_utils.py +++ b/scenarionet/common_utils.py @@ -1,9 +1,12 @@ +import logging import os.path import pickle import numpy as np from metadrive.scenario import utils as sd_utils +logger = logging.getLogger(__file__) + def recursive_equal(data1, data2, need_assert=False): from metadrive.utils.config import Config @@ -71,7 +74,7 @@ def save_summary_and_mapping(summary_file_path, mapping_file_path, summary, mapp pickle.dump(dict_recursive_remove_array_and_set(summary), file) with open(mapping_file_path, "wb") as file: pickle.dump(mapping, file) - print( + logging.info( "\n ================ Dataset Summary and Mapping are saved at: {} " "================ \n".format(summary_file_path) ) diff --git a/scenarionet/convert_pg.py b/scenarionet/convert_pg.py index 084df09..a169202 100644 --- a/scenarionet/convert_pg.py +++ b/scenarionet/convert_pg.py @@ -4,6 +4,7 @@ if __name__ == '__main__': import pkg_resources # for suppress warning import argparse import os.path + import os import metadrive @@ -11,6 +12,8 @@ if __name__ == '__main__': from scenarionet.converter.pg.utils import get_pg_scenarios, convert_pg_scenario from scenarionet.converter.utils import write_to_directory + os.environ["CUDA_VISIBLE_DEVICES"] = "" + # For the PG environment config, see: scenarionet/converter/pg/utils.py:6 parser = argparse.ArgumentParser(description=desc) parser.add_argument( diff --git a/scenarionet/convert_waymo.py b/scenarionet/convert_waymo.py index 8b4037c..15025c5 100644 --- a/scenarionet/convert_waymo.py +++ b/scenarionet/convert_waymo.py @@ -11,6 +11,8 @@ if __name__ == '__main__': from scenarionet.converter.utils import write_to_directory from scenarionet.converter.waymo.utils import convert_waymo_scenario, get_waymo_scenarios, preprocess_waymo_scenarios + os.environ["CUDA_VISIBLE_DEVICES"] = "" + logger = logging.getLogger(__name__) parser = argparse.ArgumentParser(description=desc) @@ -36,14 +38,14 @@ if __name__ == '__main__': default=0, type=int, help="Control how many files to use. We will list all files in the raw data folder " - "and select files[start_file_index: start_file_index+num_files]" + "and select files[start_file_index: start_file_index+num_files]. Default: 0." ) parser.add_argument( "--num_files", - default=1000, + default=None, type=int, help="Control how many files to use. We will list all files in the raw data folder " - "and select files[start_file_index: start_file_index+num_files]" + "and select files[start_file_index: start_file_index+num_files]. Default: None, will read all files." ) args = parser.parse_args() @@ -65,6 +67,12 @@ if __name__ == '__main__': waymo_data_directory = os.path.join(SCENARIONET_DATASET_PATH, args.raw_data_path) files = get_waymo_scenarios(waymo_data_directory, args.start_file_index, args.num_files) + logger.info( + f"We will read {len(files)} raw files. You set the number of workers to {args.num_workers}. " + f"Please make sure there will not be too much files to be read in each worker " + f"(now it's {len(files) / args.num_workers})!" + ) + write_to_directory( convert_func=convert_waymo_scenario, scenarios=files, diff --git a/scenarionet/converter/utils.py b/scenarionet/converter/utils.py index c48dc06..245298c 100644 --- a/scenarionet/converter/utils.py +++ b/scenarionet/converter/utils.py @@ -218,7 +218,8 @@ def write_to_directory_single_worker( kwargs["env"] = make_env(start_index=scenarios[0], num_scenarios=len(scenarios)) count = 0 - for scenario in tqdm.tqdm(scenarios, desc="Worker Index: {}".format(worker_index)): + # for scenario in tqdm.tqdm(scenarios, position=2, leave=True, desc=f"Worker {worker_index} Number of scenarios"): + for scenario in scenarios: # convert scenario sd_scenario = convert_func(scenario, dataset_version, **kwargs) scenario_id = sd_scenario[SD.ID] @@ -248,6 +249,9 @@ def write_to_directory_single_worker( print("Current Memory: {}".format(process_memory())) count += 1 + if count % 500 == 0: + logger.info(f"Worker {worker_index} has processed {count} scenarios.") + # store summary file save_summary_and_mapping(summary_file_path, mapping_file_path, summary, mapping) @@ -257,6 +261,8 @@ def write_to_directory_single_worker( shutil.rmtree(delay_remove) os.rename(output_path, save_path) + logger.info(f"Worker {worker_index} finished! Files are saved at: {save_path}") + def process_memory(): process = psutil.Process(os.getpid()) diff --git a/scenarionet/converter/waymo/utils.py b/scenarionet/converter/waymo/utils.py index 0cc2ffe..fcb4a3f 100644 --- a/scenarionet/converter/waymo/utils.py +++ b/scenarionet/converter/waymo/utils.py @@ -430,6 +430,11 @@ def get_waymo_scenarios(waymo_data_directory, start_index, num): # there is 1000 raw data in google cloud, each of them produce about 500 pkl file logger.info("\nReading raw data") file_list = os.listdir(waymo_data_directory) + if num is None: + logger.warning( + "You haven't specified the number of raw files! It is set to {} now.".format(len(file_list) - start_index) + ) + num = len(file_list) - start_index assert len(file_list) >= start_index + num and start_index >= 0, \ "No sufficient files ({}) in raw_data_directory. need: {}, start: {}".format(len(file_list), num, start_index) file_list = file_list[start_index:start_index + num] @@ -448,9 +453,13 @@ def preprocess_waymo_scenarios(files, worker_index): """ from scenarionet.converter.waymo.waymo_protos import scenario_pb2 - for file in tqdm.tqdm(files, desc="Process Waymo scenarios for worker {}".format(worker_index)): + for file in tqdm.tqdm(files, leave=False, position=0, desc="Worker {} Number of raw file".format(worker_index)): + + logger.info(f"Worker {worker_index} is reading raw file: {file}") + file_path = os.path.join(file) if ("tfrecord" not in file_path) or (not os.path.isfile(file_path)): + logger.info(f"Worker {worker_index} skip this file: {file}") continue for data in tf.data.TFRecordDataset(file_path, compression_type="").as_numpy_iterator(): scenario = scenario_pb2.Scenario() @@ -458,5 +467,7 @@ def preprocess_waymo_scenarios(files, worker_index): # a trick for loging file name scenario.scenario_id = scenario.scenario_id + SPLIT_KEY + file yield scenario + + logger.info(f"Worker {worker_index} finished read {len(files)} files.") # logger.info("Worker {}: Process {} waymo scenarios".format(worker_index, len(scenarios))) # return scenarios