Fix a bug in Waymo conversion: GPU should be disable (#64)
* Update waymo.rst * Update waymo.rst * allow generate all data * update readme * update * better logging info * more info * up * fix * add note on GPU * better log * format
This commit is contained in:
@@ -55,7 +55,7 @@ pip install -e.
|
||||
|
||||
# Install ScenarioNet
|
||||
cd ~/ # Go to the folder you want to host these two repos.
|
||||
git clone git@github.com:metadriverse/scenarionet.git
|
||||
git clone https://github.com/metadriverse/scenarionet.git
|
||||
cd scenarionet
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
@@ -38,7 +38,7 @@ For Waymo data, we already have the parser in ScenarioNet so just install the Te
|
||||
conda install protobuf==3.20
|
||||
|
||||
.. note::
|
||||
You may fail to install ``protobuf`` if using ``pip install protobuf==3.20``.
|
||||
You may fail to install ``protobuf`` if using ``pip install protobuf==3.20``. If so, install via ``conda install protobuf=3.20``.
|
||||
|
||||
For other datasets like nuPlan and nuScenes, you need to setup `nuplan-devkit <https://github.com/motional/nuplan-devkit>`_ and `nuscenes-devkit <https://github.com/nutonomy/nuscenes-devkit>`_ respectively.
|
||||
Guidance on how to setup these datasets and connect them with ScenarioNet can be found at :ref:`datasets`.
|
||||
|
||||
@@ -35,7 +35,7 @@ First of all, we have to install tensorflow and Protobuf::
|
||||
conda install protobuf==3.20
|
||||
|
||||
.. note::
|
||||
You may fail to install ``protobuf`` if using ``pip install protobuf==3.20``.
|
||||
You may fail to install ``protobuf`` if using ``pip install protobuf==3.20``. If so, install via ``conda install protobuf=3.20``.
|
||||
|
||||
|
||||
2. Download TFRecord
|
||||
@@ -79,12 +79,18 @@ The downloaded data should be stored in a directory like this::
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Run the following command to extract scenarios in any directory containing ``tfrecord``.
|
||||
|
||||
|
||||
Here we take converting raw data in ``training_20s`` as an example::
|
||||
|
||||
python -m scenarionet.convert_waymo -d /path/to/your/database --raw_data_path ./waymo/training_20s --num_files=1000
|
||||
python -m scenarionet.convert_waymo -d /path/to/your/database --raw_data_path ./waymo/training_20s --num_workers 64
|
||||
|
||||
Now all converted scenarios will be placed at ``/path/to/your/database`` and are ready to be used in your work.
|
||||
|
||||
.. note::
|
||||
When running the conversion, please double check whether GPU is being used. This converter should NOT use GPU.
|
||||
We have disable GPU usage by ``os.environ["CUDA_VISIBLE_DEVICES"] = ""``.
|
||||
|
||||
Known Issues: Waymo
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
N/A
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
import logging
|
||||
import os.path
|
||||
import pickle
|
||||
|
||||
import numpy as np
|
||||
from metadrive.scenario import utils as sd_utils
|
||||
|
||||
logger = logging.getLogger(__file__)
|
||||
|
||||
|
||||
def recursive_equal(data1, data2, need_assert=False):
|
||||
from metadrive.utils.config import Config
|
||||
@@ -71,7 +74,7 @@ def save_summary_and_mapping(summary_file_path, mapping_file_path, summary, mapp
|
||||
pickle.dump(dict_recursive_remove_array_and_set(summary), file)
|
||||
with open(mapping_file_path, "wb") as file:
|
||||
pickle.dump(mapping, file)
|
||||
print(
|
||||
logging.info(
|
||||
"\n ================ Dataset Summary and Mapping are saved at: {} "
|
||||
"================ \n".format(summary_file_path)
|
||||
)
|
||||
|
||||
@@ -4,6 +4,7 @@ if __name__ == '__main__':
|
||||
import pkg_resources # for suppress warning
|
||||
import argparse
|
||||
import os.path
|
||||
import os
|
||||
|
||||
import metadrive
|
||||
|
||||
@@ -11,6 +12,8 @@ if __name__ == '__main__':
|
||||
from scenarionet.converter.pg.utils import get_pg_scenarios, convert_pg_scenario
|
||||
from scenarionet.converter.utils import write_to_directory
|
||||
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
||||
|
||||
# For the PG environment config, see: scenarionet/converter/pg/utils.py:6
|
||||
parser = argparse.ArgumentParser(description=desc)
|
||||
parser.add_argument(
|
||||
|
||||
@@ -11,6 +11,8 @@ if __name__ == '__main__':
|
||||
from scenarionet.converter.utils import write_to_directory
|
||||
from scenarionet.converter.waymo.utils import convert_waymo_scenario, get_waymo_scenarios, preprocess_waymo_scenarios
|
||||
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
parser = argparse.ArgumentParser(description=desc)
|
||||
@@ -36,14 +38,14 @@ if __name__ == '__main__':
|
||||
default=0,
|
||||
type=int,
|
||||
help="Control how many files to use. We will list all files in the raw data folder "
|
||||
"and select files[start_file_index: start_file_index+num_files]"
|
||||
"and select files[start_file_index: start_file_index+num_files]. Default: 0."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--num_files",
|
||||
default=1000,
|
||||
default=None,
|
||||
type=int,
|
||||
help="Control how many files to use. We will list all files in the raw data folder "
|
||||
"and select files[start_file_index: start_file_index+num_files]"
|
||||
"and select files[start_file_index: start_file_index+num_files]. Default: None, will read all files."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -65,6 +67,12 @@ if __name__ == '__main__':
|
||||
waymo_data_directory = os.path.join(SCENARIONET_DATASET_PATH, args.raw_data_path)
|
||||
files = get_waymo_scenarios(waymo_data_directory, args.start_file_index, args.num_files)
|
||||
|
||||
logger.info(
|
||||
f"We will read {len(files)} raw files. You set the number of workers to {args.num_workers}. "
|
||||
f"Please make sure there will not be too much files to be read in each worker "
|
||||
f"(now it's {len(files) / args.num_workers})!"
|
||||
)
|
||||
|
||||
write_to_directory(
|
||||
convert_func=convert_waymo_scenario,
|
||||
scenarios=files,
|
||||
|
||||
@@ -218,7 +218,8 @@ def write_to_directory_single_worker(
|
||||
kwargs["env"] = make_env(start_index=scenarios[0], num_scenarios=len(scenarios))
|
||||
|
||||
count = 0
|
||||
for scenario in tqdm.tqdm(scenarios, desc="Worker Index: {}".format(worker_index)):
|
||||
# for scenario in tqdm.tqdm(scenarios, position=2, leave=True, desc=f"Worker {worker_index} Number of scenarios"):
|
||||
for scenario in scenarios:
|
||||
# convert scenario
|
||||
sd_scenario = convert_func(scenario, dataset_version, **kwargs)
|
||||
scenario_id = sd_scenario[SD.ID]
|
||||
@@ -248,6 +249,9 @@ def write_to_directory_single_worker(
|
||||
print("Current Memory: {}".format(process_memory()))
|
||||
count += 1
|
||||
|
||||
if count % 500 == 0:
|
||||
logger.info(f"Worker {worker_index} has processed {count} scenarios.")
|
||||
|
||||
# store summary file
|
||||
save_summary_and_mapping(summary_file_path, mapping_file_path, summary, mapping)
|
||||
|
||||
@@ -257,6 +261,8 @@ def write_to_directory_single_worker(
|
||||
shutil.rmtree(delay_remove)
|
||||
os.rename(output_path, save_path)
|
||||
|
||||
logger.info(f"Worker {worker_index} finished! Files are saved at: {save_path}")
|
||||
|
||||
|
||||
def process_memory():
|
||||
process = psutil.Process(os.getpid())
|
||||
|
||||
@@ -430,6 +430,11 @@ def get_waymo_scenarios(waymo_data_directory, start_index, num):
|
||||
# there is 1000 raw data in google cloud, each of them produce about 500 pkl file
|
||||
logger.info("\nReading raw data")
|
||||
file_list = os.listdir(waymo_data_directory)
|
||||
if num is None:
|
||||
logger.warning(
|
||||
"You haven't specified the number of raw files! It is set to {} now.".format(len(file_list) - start_index)
|
||||
)
|
||||
num = len(file_list) - start_index
|
||||
assert len(file_list) >= start_index + num and start_index >= 0, \
|
||||
"No sufficient files ({}) in raw_data_directory. need: {}, start: {}".format(len(file_list), num, start_index)
|
||||
file_list = file_list[start_index:start_index + num]
|
||||
@@ -448,9 +453,13 @@ def preprocess_waymo_scenarios(files, worker_index):
|
||||
"""
|
||||
from scenarionet.converter.waymo.waymo_protos import scenario_pb2
|
||||
|
||||
for file in tqdm.tqdm(files, desc="Process Waymo scenarios for worker {}".format(worker_index)):
|
||||
for file in tqdm.tqdm(files, leave=False, position=0, desc="Worker {} Number of raw file".format(worker_index)):
|
||||
|
||||
logger.info(f"Worker {worker_index} is reading raw file: {file}")
|
||||
|
||||
file_path = os.path.join(file)
|
||||
if ("tfrecord" not in file_path) or (not os.path.isfile(file_path)):
|
||||
logger.info(f"Worker {worker_index} skip this file: {file}")
|
||||
continue
|
||||
for data in tf.data.TFRecordDataset(file_path, compression_type="").as_numpy_iterator():
|
||||
scenario = scenario_pb2.Scenario()
|
||||
@@ -458,5 +467,7 @@ def preprocess_waymo_scenarios(files, worker_index):
|
||||
# a trick for loging file name
|
||||
scenario.scenario_id = scenario.scenario_id + SPLIT_KEY + file
|
||||
yield scenario
|
||||
|
||||
logger.info(f"Worker {worker_index} finished read {len(files)} files.")
|
||||
# logger.info("Worker {}: Process {} waymo scenarios".format(worker_index, len(scenarios)))
|
||||
# return scenarios
|
||||
|
||||
Reference in New Issue
Block a user