Fix a bug in Waymo conversion: GPU should be disable (#64)

* Update waymo.rst

* Update waymo.rst

* allow generate all data

* update readme

* update

* better logging info

* more info

* up

* fix

* add note on GPU

* better log

* format
This commit is contained in:
Zhenghao Peng
2024-02-20 13:28:09 -08:00
committed by GitHub
parent 06c3aee0e2
commit 6cda061ed8
8 changed files with 47 additions and 10 deletions

View File

@@ -55,7 +55,7 @@ pip install -e.
# Install ScenarioNet # Install ScenarioNet
cd ~/ # Go to the folder you want to host these two repos. cd ~/ # Go to the folder you want to host these two repos.
git clone git@github.com:metadriverse/scenarionet.git git clone https://github.com/metadriverse/scenarionet.git
cd scenarionet cd scenarionet
pip install -e . pip install -e .
``` ```

View File

@@ -38,7 +38,7 @@ For Waymo data, we already have the parser in ScenarioNet so just install the Te
conda install protobuf==3.20 conda install protobuf==3.20
.. note:: .. note::
You may fail to install ``protobuf`` if using ``pip install protobuf==3.20``. You may fail to install ``protobuf`` if using ``pip install protobuf==3.20``. If so, install via ``conda install protobuf=3.20``.
For other datasets like nuPlan and nuScenes, you need to setup `nuplan-devkit <https://github.com/motional/nuplan-devkit>`_ and `nuscenes-devkit <https://github.com/nutonomy/nuscenes-devkit>`_ respectively. For other datasets like nuPlan and nuScenes, you need to setup `nuplan-devkit <https://github.com/motional/nuplan-devkit>`_ and `nuscenes-devkit <https://github.com/nutonomy/nuscenes-devkit>`_ respectively.
Guidance on how to setup these datasets and connect them with ScenarioNet can be found at :ref:`datasets`. Guidance on how to setup these datasets and connect them with ScenarioNet can be found at :ref:`datasets`.

View File

@@ -35,7 +35,7 @@ First of all, we have to install tensorflow and Protobuf::
conda install protobuf==3.20 conda install protobuf==3.20
.. note:: .. note::
You may fail to install ``protobuf`` if using ``pip install protobuf==3.20``. You may fail to install ``protobuf`` if using ``pip install protobuf==3.20``. If so, install via ``conda install protobuf=3.20``.
2. Download TFRecord 2. Download TFRecord
@@ -79,12 +79,18 @@ The downloaded data should be stored in a directory like this::
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Run the following command to extract scenarios in any directory containing ``tfrecord``. Run the following command to extract scenarios in any directory containing ``tfrecord``.
Here we take converting raw data in ``training_20s`` as an example:: Here we take converting raw data in ``training_20s`` as an example::
python -m scenarionet.convert_waymo -d /path/to/your/database --raw_data_path ./waymo/training_20s --num_files=1000 python -m scenarionet.convert_waymo -d /path/to/your/database --raw_data_path ./waymo/training_20s --num_workers 64
Now all converted scenarios will be placed at ``/path/to/your/database`` and are ready to be used in your work. Now all converted scenarios will be placed at ``/path/to/your/database`` and are ready to be used in your work.
.. note::
When running the conversion, please double check whether GPU is being used. This converter should NOT use GPU.
We have disable GPU usage by ``os.environ["CUDA_VISIBLE_DEVICES"] = ""``.
Known Issues: Waymo Known Issues: Waymo
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
N/A N/A

View File

@@ -1,9 +1,12 @@
import logging
import os.path import os.path
import pickle import pickle
import numpy as np import numpy as np
from metadrive.scenario import utils as sd_utils from metadrive.scenario import utils as sd_utils
logger = logging.getLogger(__file__)
def recursive_equal(data1, data2, need_assert=False): def recursive_equal(data1, data2, need_assert=False):
from metadrive.utils.config import Config from metadrive.utils.config import Config
@@ -71,7 +74,7 @@ def save_summary_and_mapping(summary_file_path, mapping_file_path, summary, mapp
pickle.dump(dict_recursive_remove_array_and_set(summary), file) pickle.dump(dict_recursive_remove_array_and_set(summary), file)
with open(mapping_file_path, "wb") as file: with open(mapping_file_path, "wb") as file:
pickle.dump(mapping, file) pickle.dump(mapping, file)
print( logging.info(
"\n ================ Dataset Summary and Mapping are saved at: {} " "\n ================ Dataset Summary and Mapping are saved at: {} "
"================ \n".format(summary_file_path) "================ \n".format(summary_file_path)
) )

View File

@@ -4,6 +4,7 @@ if __name__ == '__main__':
import pkg_resources # for suppress warning import pkg_resources # for suppress warning
import argparse import argparse
import os.path import os.path
import os
import metadrive import metadrive
@@ -11,6 +12,8 @@ if __name__ == '__main__':
from scenarionet.converter.pg.utils import get_pg_scenarios, convert_pg_scenario from scenarionet.converter.pg.utils import get_pg_scenarios, convert_pg_scenario
from scenarionet.converter.utils import write_to_directory from scenarionet.converter.utils import write_to_directory
os.environ["CUDA_VISIBLE_DEVICES"] = ""
# For the PG environment config, see: scenarionet/converter/pg/utils.py:6 # For the PG environment config, see: scenarionet/converter/pg/utils.py:6
parser = argparse.ArgumentParser(description=desc) parser = argparse.ArgumentParser(description=desc)
parser.add_argument( parser.add_argument(

View File

@@ -11,6 +11,8 @@ if __name__ == '__main__':
from scenarionet.converter.utils import write_to_directory from scenarionet.converter.utils import write_to_directory
from scenarionet.converter.waymo.utils import convert_waymo_scenario, get_waymo_scenarios, preprocess_waymo_scenarios from scenarionet.converter.waymo.utils import convert_waymo_scenario, get_waymo_scenarios, preprocess_waymo_scenarios
os.environ["CUDA_VISIBLE_DEVICES"] = ""
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
parser = argparse.ArgumentParser(description=desc) parser = argparse.ArgumentParser(description=desc)
@@ -36,14 +38,14 @@ if __name__ == '__main__':
default=0, default=0,
type=int, type=int,
help="Control how many files to use. We will list all files in the raw data folder " help="Control how many files to use. We will list all files in the raw data folder "
"and select files[start_file_index: start_file_index+num_files]" "and select files[start_file_index: start_file_index+num_files]. Default: 0."
) )
parser.add_argument( parser.add_argument(
"--num_files", "--num_files",
default=1000, default=None,
type=int, type=int,
help="Control how many files to use. We will list all files in the raw data folder " help="Control how many files to use. We will list all files in the raw data folder "
"and select files[start_file_index: start_file_index+num_files]" "and select files[start_file_index: start_file_index+num_files]. Default: None, will read all files."
) )
args = parser.parse_args() args = parser.parse_args()
@@ -65,6 +67,12 @@ if __name__ == '__main__':
waymo_data_directory = os.path.join(SCENARIONET_DATASET_PATH, args.raw_data_path) waymo_data_directory = os.path.join(SCENARIONET_DATASET_PATH, args.raw_data_path)
files = get_waymo_scenarios(waymo_data_directory, args.start_file_index, args.num_files) files = get_waymo_scenarios(waymo_data_directory, args.start_file_index, args.num_files)
logger.info(
f"We will read {len(files)} raw files. You set the number of workers to {args.num_workers}. "
f"Please make sure there will not be too much files to be read in each worker "
f"(now it's {len(files) / args.num_workers})!"
)
write_to_directory( write_to_directory(
convert_func=convert_waymo_scenario, convert_func=convert_waymo_scenario,
scenarios=files, scenarios=files,

View File

@@ -218,7 +218,8 @@ def write_to_directory_single_worker(
kwargs["env"] = make_env(start_index=scenarios[0], num_scenarios=len(scenarios)) kwargs["env"] = make_env(start_index=scenarios[0], num_scenarios=len(scenarios))
count = 0 count = 0
for scenario in tqdm.tqdm(scenarios, desc="Worker Index: {}".format(worker_index)): # for scenario in tqdm.tqdm(scenarios, position=2, leave=True, desc=f"Worker {worker_index} Number of scenarios"):
for scenario in scenarios:
# convert scenario # convert scenario
sd_scenario = convert_func(scenario, dataset_version, **kwargs) sd_scenario = convert_func(scenario, dataset_version, **kwargs)
scenario_id = sd_scenario[SD.ID] scenario_id = sd_scenario[SD.ID]
@@ -248,6 +249,9 @@ def write_to_directory_single_worker(
print("Current Memory: {}".format(process_memory())) print("Current Memory: {}".format(process_memory()))
count += 1 count += 1
if count % 500 == 0:
logger.info(f"Worker {worker_index} has processed {count} scenarios.")
# store summary file # store summary file
save_summary_and_mapping(summary_file_path, mapping_file_path, summary, mapping) save_summary_and_mapping(summary_file_path, mapping_file_path, summary, mapping)
@@ -257,6 +261,8 @@ def write_to_directory_single_worker(
shutil.rmtree(delay_remove) shutil.rmtree(delay_remove)
os.rename(output_path, save_path) os.rename(output_path, save_path)
logger.info(f"Worker {worker_index} finished! Files are saved at: {save_path}")
def process_memory(): def process_memory():
process = psutil.Process(os.getpid()) process = psutil.Process(os.getpid())

View File

@@ -430,6 +430,11 @@ def get_waymo_scenarios(waymo_data_directory, start_index, num):
# there is 1000 raw data in google cloud, each of them produce about 500 pkl file # there is 1000 raw data in google cloud, each of them produce about 500 pkl file
logger.info("\nReading raw data") logger.info("\nReading raw data")
file_list = os.listdir(waymo_data_directory) file_list = os.listdir(waymo_data_directory)
if num is None:
logger.warning(
"You haven't specified the number of raw files! It is set to {} now.".format(len(file_list) - start_index)
)
num = len(file_list) - start_index
assert len(file_list) >= start_index + num and start_index >= 0, \ assert len(file_list) >= start_index + num and start_index >= 0, \
"No sufficient files ({}) in raw_data_directory. need: {}, start: {}".format(len(file_list), num, start_index) "No sufficient files ({}) in raw_data_directory. need: {}, start: {}".format(len(file_list), num, start_index)
file_list = file_list[start_index:start_index + num] file_list = file_list[start_index:start_index + num]
@@ -448,9 +453,13 @@ def preprocess_waymo_scenarios(files, worker_index):
""" """
from scenarionet.converter.waymo.waymo_protos import scenario_pb2 from scenarionet.converter.waymo.waymo_protos import scenario_pb2
for file in tqdm.tqdm(files, desc="Process Waymo scenarios for worker {}".format(worker_index)): for file in tqdm.tqdm(files, leave=False, position=0, desc="Worker {} Number of raw file".format(worker_index)):
logger.info(f"Worker {worker_index} is reading raw file: {file}")
file_path = os.path.join(file) file_path = os.path.join(file)
if ("tfrecord" not in file_path) or (not os.path.isfile(file_path)): if ("tfrecord" not in file_path) or (not os.path.isfile(file_path)):
logger.info(f"Worker {worker_index} skip this file: {file}")
continue continue
for data in tf.data.TFRecordDataset(file_path, compression_type="").as_numpy_iterator(): for data in tf.data.TFRecordDataset(file_path, compression_type="").as_numpy_iterator():
scenario = scenario_pb2.Scenario() scenario = scenario_pb2.Scenario()
@@ -458,5 +467,7 @@ def preprocess_waymo_scenarios(files, worker_index):
# a trick for loging file name # a trick for loging file name
scenario.scenario_id = scenario.scenario_id + SPLIT_KEY + file scenario.scenario_id = scenario.scenario_id + SPLIT_KEY + file
yield scenario yield scenario
logger.info(f"Worker {worker_index} finished read {len(files)} files.")
# logger.info("Worker {}: Process {} waymo scenarios".format(worker_index, len(scenarios))) # logger.info("Worker {}: Process {} waymo scenarios".format(worker_index, len(scenarios)))
# return scenarios # return scenarios