Fix a bug in Waymo conversion: GPU should be disable (#64)
* Update waymo.rst * Update waymo.rst * allow generate all data * update readme * update * better logging info * more info * up * fix * add note on GPU * better log * format
This commit is contained in:
@@ -55,7 +55,7 @@ pip install -e.
|
|||||||
|
|
||||||
# Install ScenarioNet
|
# Install ScenarioNet
|
||||||
cd ~/ # Go to the folder you want to host these two repos.
|
cd ~/ # Go to the folder you want to host these two repos.
|
||||||
git clone git@github.com:metadriverse/scenarionet.git
|
git clone https://github.com/metadriverse/scenarionet.git
|
||||||
cd scenarionet
|
cd scenarionet
|
||||||
pip install -e .
|
pip install -e .
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ For Waymo data, we already have the parser in ScenarioNet so just install the Te
|
|||||||
conda install protobuf==3.20
|
conda install protobuf==3.20
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
You may fail to install ``protobuf`` if using ``pip install protobuf==3.20``.
|
You may fail to install ``protobuf`` if using ``pip install protobuf==3.20``. If so, install via ``conda install protobuf=3.20``.
|
||||||
|
|
||||||
For other datasets like nuPlan and nuScenes, you need to setup `nuplan-devkit <https://github.com/motional/nuplan-devkit>`_ and `nuscenes-devkit <https://github.com/nutonomy/nuscenes-devkit>`_ respectively.
|
For other datasets like nuPlan and nuScenes, you need to setup `nuplan-devkit <https://github.com/motional/nuplan-devkit>`_ and `nuscenes-devkit <https://github.com/nutonomy/nuscenes-devkit>`_ respectively.
|
||||||
Guidance on how to setup these datasets and connect them with ScenarioNet can be found at :ref:`datasets`.
|
Guidance on how to setup these datasets and connect them with ScenarioNet can be found at :ref:`datasets`.
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ First of all, we have to install tensorflow and Protobuf::
|
|||||||
conda install protobuf==3.20
|
conda install protobuf==3.20
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
You may fail to install ``protobuf`` if using ``pip install protobuf==3.20``.
|
You may fail to install ``protobuf`` if using ``pip install protobuf==3.20``. If so, install via ``conda install protobuf=3.20``.
|
||||||
|
|
||||||
|
|
||||||
2. Download TFRecord
|
2. Download TFRecord
|
||||||
@@ -79,12 +79,18 @@ The downloaded data should be stored in a directory like this::
|
|||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
Run the following command to extract scenarios in any directory containing ``tfrecord``.
|
Run the following command to extract scenarios in any directory containing ``tfrecord``.
|
||||||
|
|
||||||
|
|
||||||
Here we take converting raw data in ``training_20s`` as an example::
|
Here we take converting raw data in ``training_20s`` as an example::
|
||||||
|
|
||||||
python -m scenarionet.convert_waymo -d /path/to/your/database --raw_data_path ./waymo/training_20s --num_files=1000
|
python -m scenarionet.convert_waymo -d /path/to/your/database --raw_data_path ./waymo/training_20s --num_workers 64
|
||||||
|
|
||||||
Now all converted scenarios will be placed at ``/path/to/your/database`` and are ready to be used in your work.
|
Now all converted scenarios will be placed at ``/path/to/your/database`` and are ready to be used in your work.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
When running the conversion, please double check whether GPU is being used. This converter should NOT use GPU.
|
||||||
|
We have disable GPU usage by ``os.environ["CUDA_VISIBLE_DEVICES"] = ""``.
|
||||||
|
|
||||||
Known Issues: Waymo
|
Known Issues: Waymo
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
N/A
|
N/A
|
||||||
|
|||||||
@@ -1,9 +1,12 @@
|
|||||||
|
import logging
|
||||||
import os.path
|
import os.path
|
||||||
import pickle
|
import pickle
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from metadrive.scenario import utils as sd_utils
|
from metadrive.scenario import utils as sd_utils
|
||||||
|
|
||||||
|
logger = logging.getLogger(__file__)
|
||||||
|
|
||||||
|
|
||||||
def recursive_equal(data1, data2, need_assert=False):
|
def recursive_equal(data1, data2, need_assert=False):
|
||||||
from metadrive.utils.config import Config
|
from metadrive.utils.config import Config
|
||||||
@@ -71,7 +74,7 @@ def save_summary_and_mapping(summary_file_path, mapping_file_path, summary, mapp
|
|||||||
pickle.dump(dict_recursive_remove_array_and_set(summary), file)
|
pickle.dump(dict_recursive_remove_array_and_set(summary), file)
|
||||||
with open(mapping_file_path, "wb") as file:
|
with open(mapping_file_path, "wb") as file:
|
||||||
pickle.dump(mapping, file)
|
pickle.dump(mapping, file)
|
||||||
print(
|
logging.info(
|
||||||
"\n ================ Dataset Summary and Mapping are saved at: {} "
|
"\n ================ Dataset Summary and Mapping are saved at: {} "
|
||||||
"================ \n".format(summary_file_path)
|
"================ \n".format(summary_file_path)
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ if __name__ == '__main__':
|
|||||||
import pkg_resources # for suppress warning
|
import pkg_resources # for suppress warning
|
||||||
import argparse
|
import argparse
|
||||||
import os.path
|
import os.path
|
||||||
|
import os
|
||||||
|
|
||||||
import metadrive
|
import metadrive
|
||||||
|
|
||||||
@@ -11,6 +12,8 @@ if __name__ == '__main__':
|
|||||||
from scenarionet.converter.pg.utils import get_pg_scenarios, convert_pg_scenario
|
from scenarionet.converter.pg.utils import get_pg_scenarios, convert_pg_scenario
|
||||||
from scenarionet.converter.utils import write_to_directory
|
from scenarionet.converter.utils import write_to_directory
|
||||||
|
|
||||||
|
os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
||||||
|
|
||||||
# For the PG environment config, see: scenarionet/converter/pg/utils.py:6
|
# For the PG environment config, see: scenarionet/converter/pg/utils.py:6
|
||||||
parser = argparse.ArgumentParser(description=desc)
|
parser = argparse.ArgumentParser(description=desc)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
|||||||
@@ -11,6 +11,8 @@ if __name__ == '__main__':
|
|||||||
from scenarionet.converter.utils import write_to_directory
|
from scenarionet.converter.utils import write_to_directory
|
||||||
from scenarionet.converter.waymo.utils import convert_waymo_scenario, get_waymo_scenarios, preprocess_waymo_scenarios
|
from scenarionet.converter.waymo.utils import convert_waymo_scenario, get_waymo_scenarios, preprocess_waymo_scenarios
|
||||||
|
|
||||||
|
os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description=desc)
|
parser = argparse.ArgumentParser(description=desc)
|
||||||
@@ -36,14 +38,14 @@ if __name__ == '__main__':
|
|||||||
default=0,
|
default=0,
|
||||||
type=int,
|
type=int,
|
||||||
help="Control how many files to use. We will list all files in the raw data folder "
|
help="Control how many files to use. We will list all files in the raw data folder "
|
||||||
"and select files[start_file_index: start_file_index+num_files]"
|
"and select files[start_file_index: start_file_index+num_files]. Default: 0."
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--num_files",
|
"--num_files",
|
||||||
default=1000,
|
default=None,
|
||||||
type=int,
|
type=int,
|
||||||
help="Control how many files to use. We will list all files in the raw data folder "
|
help="Control how many files to use. We will list all files in the raw data folder "
|
||||||
"and select files[start_file_index: start_file_index+num_files]"
|
"and select files[start_file_index: start_file_index+num_files]. Default: None, will read all files."
|
||||||
)
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
@@ -65,6 +67,12 @@ if __name__ == '__main__':
|
|||||||
waymo_data_directory = os.path.join(SCENARIONET_DATASET_PATH, args.raw_data_path)
|
waymo_data_directory = os.path.join(SCENARIONET_DATASET_PATH, args.raw_data_path)
|
||||||
files = get_waymo_scenarios(waymo_data_directory, args.start_file_index, args.num_files)
|
files = get_waymo_scenarios(waymo_data_directory, args.start_file_index, args.num_files)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"We will read {len(files)} raw files. You set the number of workers to {args.num_workers}. "
|
||||||
|
f"Please make sure there will not be too much files to be read in each worker "
|
||||||
|
f"(now it's {len(files) / args.num_workers})!"
|
||||||
|
)
|
||||||
|
|
||||||
write_to_directory(
|
write_to_directory(
|
||||||
convert_func=convert_waymo_scenario,
|
convert_func=convert_waymo_scenario,
|
||||||
scenarios=files,
|
scenarios=files,
|
||||||
|
|||||||
@@ -218,7 +218,8 @@ def write_to_directory_single_worker(
|
|||||||
kwargs["env"] = make_env(start_index=scenarios[0], num_scenarios=len(scenarios))
|
kwargs["env"] = make_env(start_index=scenarios[0], num_scenarios=len(scenarios))
|
||||||
|
|
||||||
count = 0
|
count = 0
|
||||||
for scenario in tqdm.tqdm(scenarios, desc="Worker Index: {}".format(worker_index)):
|
# for scenario in tqdm.tqdm(scenarios, position=2, leave=True, desc=f"Worker {worker_index} Number of scenarios"):
|
||||||
|
for scenario in scenarios:
|
||||||
# convert scenario
|
# convert scenario
|
||||||
sd_scenario = convert_func(scenario, dataset_version, **kwargs)
|
sd_scenario = convert_func(scenario, dataset_version, **kwargs)
|
||||||
scenario_id = sd_scenario[SD.ID]
|
scenario_id = sd_scenario[SD.ID]
|
||||||
@@ -248,6 +249,9 @@ def write_to_directory_single_worker(
|
|||||||
print("Current Memory: {}".format(process_memory()))
|
print("Current Memory: {}".format(process_memory()))
|
||||||
count += 1
|
count += 1
|
||||||
|
|
||||||
|
if count % 500 == 0:
|
||||||
|
logger.info(f"Worker {worker_index} has processed {count} scenarios.")
|
||||||
|
|
||||||
# store summary file
|
# store summary file
|
||||||
save_summary_and_mapping(summary_file_path, mapping_file_path, summary, mapping)
|
save_summary_and_mapping(summary_file_path, mapping_file_path, summary, mapping)
|
||||||
|
|
||||||
@@ -257,6 +261,8 @@ def write_to_directory_single_worker(
|
|||||||
shutil.rmtree(delay_remove)
|
shutil.rmtree(delay_remove)
|
||||||
os.rename(output_path, save_path)
|
os.rename(output_path, save_path)
|
||||||
|
|
||||||
|
logger.info(f"Worker {worker_index} finished! Files are saved at: {save_path}")
|
||||||
|
|
||||||
|
|
||||||
def process_memory():
|
def process_memory():
|
||||||
process = psutil.Process(os.getpid())
|
process = psutil.Process(os.getpid())
|
||||||
|
|||||||
@@ -430,6 +430,11 @@ def get_waymo_scenarios(waymo_data_directory, start_index, num):
|
|||||||
# there is 1000 raw data in google cloud, each of them produce about 500 pkl file
|
# there is 1000 raw data in google cloud, each of them produce about 500 pkl file
|
||||||
logger.info("\nReading raw data")
|
logger.info("\nReading raw data")
|
||||||
file_list = os.listdir(waymo_data_directory)
|
file_list = os.listdir(waymo_data_directory)
|
||||||
|
if num is None:
|
||||||
|
logger.warning(
|
||||||
|
"You haven't specified the number of raw files! It is set to {} now.".format(len(file_list) - start_index)
|
||||||
|
)
|
||||||
|
num = len(file_list) - start_index
|
||||||
assert len(file_list) >= start_index + num and start_index >= 0, \
|
assert len(file_list) >= start_index + num and start_index >= 0, \
|
||||||
"No sufficient files ({}) in raw_data_directory. need: {}, start: {}".format(len(file_list), num, start_index)
|
"No sufficient files ({}) in raw_data_directory. need: {}, start: {}".format(len(file_list), num, start_index)
|
||||||
file_list = file_list[start_index:start_index + num]
|
file_list = file_list[start_index:start_index + num]
|
||||||
@@ -448,9 +453,13 @@ def preprocess_waymo_scenarios(files, worker_index):
|
|||||||
"""
|
"""
|
||||||
from scenarionet.converter.waymo.waymo_protos import scenario_pb2
|
from scenarionet.converter.waymo.waymo_protos import scenario_pb2
|
||||||
|
|
||||||
for file in tqdm.tqdm(files, desc="Process Waymo scenarios for worker {}".format(worker_index)):
|
for file in tqdm.tqdm(files, leave=False, position=0, desc="Worker {} Number of raw file".format(worker_index)):
|
||||||
|
|
||||||
|
logger.info(f"Worker {worker_index} is reading raw file: {file}")
|
||||||
|
|
||||||
file_path = os.path.join(file)
|
file_path = os.path.join(file)
|
||||||
if ("tfrecord" not in file_path) or (not os.path.isfile(file_path)):
|
if ("tfrecord" not in file_path) or (not os.path.isfile(file_path)):
|
||||||
|
logger.info(f"Worker {worker_index} skip this file: {file}")
|
||||||
continue
|
continue
|
||||||
for data in tf.data.TFRecordDataset(file_path, compression_type="").as_numpy_iterator():
|
for data in tf.data.TFRecordDataset(file_path, compression_type="").as_numpy_iterator():
|
||||||
scenario = scenario_pb2.Scenario()
|
scenario = scenario_pb2.Scenario()
|
||||||
@@ -458,5 +467,7 @@ def preprocess_waymo_scenarios(files, worker_index):
|
|||||||
# a trick for loging file name
|
# a trick for loging file name
|
||||||
scenario.scenario_id = scenario.scenario_id + SPLIT_KEY + file
|
scenario.scenario_id = scenario.scenario_id + SPLIT_KEY + file
|
||||||
yield scenario
|
yield scenario
|
||||||
|
|
||||||
|
logger.info(f"Worker {worker_index} finished read {len(files)} files.")
|
||||||
# logger.info("Worker {}: Process {} waymo scenarios".format(worker_index, len(scenarios)))
|
# logger.info("Worker {}: Process {} waymo scenarios".format(worker_index, len(scenarios)))
|
||||||
# return scenarios
|
# return scenarios
|
||||||
|
|||||||
Reference in New Issue
Block a user