From 2564764c176b8f3b9e8b70610d741b13bea4abf0 Mon Sep 17 00:00:00 2001 From: QuanyiLi Date: Sat, 6 May 2023 20:33:47 +0100 Subject: [PATCH] waymo --- scenarionet/converter/nuplan/utils.py | 8 +- scenarionet/converter/nuscenes/utils.py | 14 +- scenarionet/converter/utils.py | 11 +- scenarionet/converter/waymo/protos/README.md | 5 - scenarionet/converter/waymo/utils.py | 108 +++++++++-- scenarionet/examples/convert_nuscenes.py | 11 +- scenarionet/examples/convert_waymo.py | 194 ++----------------- 7 files changed, 133 insertions(+), 218 deletions(-) delete mode 100644 scenarionet/converter/waymo/protos/README.md diff --git a/scenarionet/converter/nuplan/utils.py b/scenarionet/converter/nuplan/utils.py index bd2d77e..77830cf 100644 --- a/scenarionet/converter/nuplan/utils.py +++ b/scenarionet/converter/nuplan/utils.py @@ -34,7 +34,7 @@ try: NUPLAN_PACKAGE_PATH = os.path.dirname(nuplan.__file__) except ImportError as e: - raise ImportError("Can not import nuplan-devkit: {}".format(e)) + logger.warning("Can not import nuplan-devkit: {}".format(e)) EGO = "ego" @@ -391,7 +391,7 @@ def extract_traffic(scenario: NuPlanScenario, center): return tracks -def convert_nuplan_scenario(scenario: NuPlanScenario): +def convert_nuplan_scenario(scenario: NuPlanScenario, version): """ Data will be interpolated to 0.1s time interval, while the time interval of original key frames are 0.5s. """ @@ -401,7 +401,7 @@ def convert_nuplan_scenario(scenario: NuPlanScenario): result = SD() result[SD.ID] = scenario.scenario_name - result[SD.VERSION] = "nuplan" + scenario.map_version + result[SD.VERSION] = "nuplan_" + version result[SD.LENGTH] = scenario.get_number_of_iterations() # metadata result[SD.METADATA] = {} @@ -432,7 +432,7 @@ def convert_nuplan_scenario(scenario: NuPlanScenario): # map result[SD.MAP_FEATURES] = extract_map_features(scenario.map_api, scenario_center) - return result, scenario.scenario_name + return result # only for example using diff --git a/scenarionet/converter/nuscenes/utils.py b/scenarionet/converter/nuscenes/utils.py index 3724aee..9b4bd7b 100644 --- a/scenarionet/converter/nuscenes/utils.py +++ b/scenarionet/converter/nuscenes/utils.py @@ -325,8 +325,6 @@ def get_map_features(scene_info, nuscenes: NuScenes, map_center, radius=250, poi SD.TYPE: MetaDriveType.LANE_SURFACE_STREET, SD.POLYLINE: discretize_lane(map_api.arcline_path_3[id], resolution_meters=points_distance), SD.POLYGON: boundary_polygon, - # TODO Add speed limit if needed - "speed_limit_kmh": 100 } for id in map_objs["lane_connector"]: @@ -345,7 +343,7 @@ def get_map_features(scene_info, nuscenes: NuScenes, map_center, radius=250, poi return ret -def convert_nuscenes_scenario(scene, nuscenes: NuScenes): +def convert_nuscenes_scenario(scene, version, nuscenes: NuScenes): """ Data will be interpolated to 0.1s time interval, while the time interval of original key frames are 0.5s. """ @@ -363,7 +361,7 @@ def convert_nuscenes_scenario(scene, nuscenes: NuScenes): result = SD() result[SD.ID] = scene_info["name"] - result[SD.VERSION] = "nuscenes" + nuscenes.version + result[SD.VERSION] = "nuscenes" + version result[SD.LENGTH] = (len(frames) - 1) * 5 + 1 result[SD.METADATA] = {} result[SD.METADATA]["dataset"] = "nuscenes" @@ -386,4 +384,10 @@ def convert_nuscenes_scenario(scene, nuscenes: NuScenes): map_center = result[SD.TRACKS]["ego"]["state"]["position"][0] result[SD.MAP_FEATURES] = get_map_features(scene_info, nuscenes, map_center, 250) - return result, scene_token + return result + + +def get_nuscenes_scenarios(dataroot, version): + nusc = NuScenes(version=version, dataroot=dataroot) + scenarios = nusc.scene + return scenarios, nusc diff --git a/scenarionet/converter/utils.py b/scenarionet/converter/utils.py index 365dfb1..df0108c 100644 --- a/scenarionet/converter/utils.py +++ b/scenarionet/converter/utils.py @@ -1,16 +1,18 @@ import ast import copy import inspect +import logging import math import os import pickle import shutil -from collections import defaultdict import numpy as np import tqdm from metadrive.scenario import ScenarioDescription as SD +logger = logging.getLogger(__file__) + def nuplan_to_metadrive_vector(vector, nuplan_center=(0, 0)): "All vec in nuplan should be centered in (0,0) to avoid numerical explosion" @@ -77,6 +79,10 @@ def write_to_directory(convert_func, if not contains_explicit_return(convert_func): raise RuntimeError("The convert function should return a metadata dict") + if "version" not in kwargs: + kwargs.pop("version") + logger.info("the specified version in kwargs is replaced by argument: 'dataset_version'") + save_path = copy.deepcopy(output_path) output_path = output_path + "_tmp" # meta recorder and data summary @@ -97,7 +103,8 @@ def write_to_directory(convert_func, metadata_recorder = {} for scenario in tqdm.tqdm(scenarios): # convert scenario - sd_scenario, scenario_id = convert_func(scenario, **kwargs) + sd_scenario = convert_func(scenario, dataset_version, **kwargs) + scenario_id = sd_scenario[SD.METADATA][SD.ID] export_file_name = "sd_{}_{}.pkl".format(dataset_name + "_" + dataset_version, scenario_id) # add agents summary diff --git a/scenarionet/converter/waymo/protos/README.md b/scenarionet/converter/waymo/protos/README.md deleted file mode 100644 index bee32da..0000000 --- a/scenarionet/converter/waymo/protos/README.md +++ /dev/null @@ -1,5 +0,0 @@ -This folder contains files from a linux compiled `waymo-open-dataset-tf-2.11.0==1.5.0` source files. - -We copied them here for compatibility in Windows computer. - -The files will not be pushed to MetaDrive public repo for the sake of license. \ No newline at end of file diff --git a/scenarionet/converter/waymo/utils.py b/scenarionet/converter/waymo/utils.py index 53810e2..24546c2 100644 --- a/scenarionet/converter/waymo/utils.py +++ b/scenarionet/converter/waymo/utils.py @@ -1,21 +1,24 @@ -import matplotlib.pyplot as plt -from matplotlib.pyplot import figure +import logging +import os +import pickle -from metadrive.type import MetaDriveType from scenarionet.converter.utils import mph_to_kmh from scenarionet.converter.waymo.type import WaymoLaneType, WaymoAgentType, WaymoRoadLineType, WaymoRoadEdgeType +logger = logging.getLogger(__name__) +import numpy as np + try: import tensorflow as tf -except ImportError: - pass +except ImportError as e: + logger.info(e) try: - from scenarionet.converter.waymo.protos import scenario_pb2 -except ImportError: - pass -import pickle -import numpy as np -from metadrive.scenario.scenario_description import ScenarioDescription + from waymo_open_dataset.protos.scenario_pb2 import Scenario +except ImportError as e: + logger.warning(e, "\n Please install waymo_open_dataset package: pip install waymo-open-dataset-tf-2-11-0==1.5.0") + +from metadrive.scenario import ScenarioDescription as SD +from metadrive.type import MetaDriveType def extract_poly(message): @@ -299,7 +302,6 @@ class CustomUnpickler(pickle.Unpickler): return super().find_class(module, name) - # return the nearest point"s index of the line def nearest_point(point, line): dist = np.square(line - point) @@ -348,10 +350,80 @@ def compute_width(map): lane["width"] = width return -# parse raw data from input path to output path -# def convert_polyline_to_metadrive(waymo_polyline, coordinate_transform=True): -# """ -# Waymo lane is in a different coordinate system, using them after converting -# """ -# convert_polyline_to_metadrive(waymo_polyline, coordinate_transform) +def convert_waymo_scenario(scenario, version): + scenario_pb2 = Scenario() + scenario_pb2.ParseFromString(scenario) + + md_scenario = SD() + + md_scenario[SD.ID] = scenario_pb2.scenario_id + md_scenario[SD.VERSION] = version + + # Please note that SDC track index is not identical to sdc_id. + # sdc_id is a unique indicator to a track, while sdc_track_index is only the index of the sdc track + # in the tracks datastructure. + + track_length = len(list(scenario_pb2.timestamps_seconds)) + + tracks, sdc_id = extract_tracks(scenario_pb2.tracks, scenario_pb2.sdc_track_index, track_length) + + md_scenario[SD.LENGTH] = track_length + + md_scenario[SD.TRACKS] = tracks + + dynamic_states = extract_dynamic_map_states(scenario_pb2.dynamic_map_states, track_length) + + md_scenario[SD.DYNAMIC_MAP_STATES] = dynamic_states + + map_features = extract_map_features(scenario_pb2.map_features) + md_scenario[SD.MAP_FEATURES] = map_features + + compute_width(md_scenario[SD.MAP_FEATURES]) + + md_scenario[SD.METADATA] = {} + md_scenario[SD.METADATA][SD.COORDINATE] = MetaDriveType.COORDINATE_WAYMO + md_scenario[SD.METADATA][SD.TIMESTEP] = np.asarray(list(scenario_pb2.timestamps_seconds), dtype=np.float32) + md_scenario[SD.METADATA][SD.METADRIVE_PROCESSED] = False + md_scenario[SD.METADATA][SD.SDC_ID] = str(sdc_id) + md_scenario[SD.METADATA]["dataset"] = "waymo" + md_scenario[SD.METADATA]["scenario_id"] = scenario_pb2.scenario_id + # TODO Can we infer it? + # md_scenario[SD.METADATA]["source_file"] = str(file) + md_scenario[SD.METADATA]["track_length"] = track_length + + # === Waymo specific data. Storing them here === + md_scenario[SD.METADATA]["current_time_index"] = scenario_pb2.current_time_index + md_scenario[SD.METADATA]["sdc_track_index"] = scenario_pb2.sdc_track_index + + # obj id + md_scenario[SD.METADATA]["objects_of_interest"] = [str(obj) for obj in scenario_pb2.objects_of_interest] + + track_index = [obj.track_index for obj in scenario_pb2.tracks_to_predict] + track_id = [str(scenario_pb2.tracks[ind].id) for ind in track_index] + track_difficulty = [obj.difficulty for obj in scenario_pb2.tracks_to_predict] + track_obj_type = [tracks[id]["type"] for id in track_id] + md_scenario[SD.METADATA]["tracks_to_predict"] = { + id: { + "track_index": track_index[count], + "track_id": id, + "difficulty": track_difficulty[count], + "object_type": track_obj_type[count] + } + for count, id in enumerate(track_id) + } + return md_scenario + + +def get_waymo_scenarios(waymo_data_direction): + # parse raw data from input path to output path, + # there is 1000 raw data in google cloud, each of them produce about 500 pkl file + file_list = os.listdir(waymo_data_direction) + + scenarios = [] + for file_count, file in enumerate(file_list): + file_path = os.path.join(waymo_data_direction, file) + if ("tfrecord" not in file_path) or (not os.path.isfile(file_path)): + continue + scenarios += [s for s in tf.data.TFRecordDataset(file_path, compression_type="")] + return scenarios diff --git a/scenarionet/examples/convert_nuscenes.py b/scenarionet/examples/convert_nuscenes.py index 71ffb35..3644a0f 100644 --- a/scenarionet/examples/convert_nuscenes.py +++ b/scenarionet/examples/convert_nuscenes.py @@ -4,23 +4,18 @@ MetaDrive. """ import os.path -try: - from nuscenes import NuScenes -except ImportError: - raise ImportError("nuscenes-devkit has to be set up before running data conversion") from scenarionet import SCENARIONET_DATASET_PATH -from scenarionet.converter.nuscenes.utils import convert_nuscenes_scenario +from scenarionet.converter.nuscenes.utils import convert_nuscenes_scenario, get_nuscenes_scenarios from scenarionet.converter.utils import write_to_directory if __name__ == "__main__": dataset_name = "nuscenes" output_path = os.path.join(SCENARIONET_DATASET_PATH, dataset_name) version = 'v1.0-mini' - dataroot = '/home/shady/data/nuscenes' force_overwrite = True - nusc = NuScenes(version=version, dataroot=dataroot) - scenarios = nusc.scene + dataroot = '/home/shady/data/nuscenes' + scenarios, nusc = get_nuscenes_scenarios(dataroot, version) write_to_directory(convert_func=convert_nuscenes_scenario, scenarios=scenarios, diff --git a/scenarionet/examples/convert_waymo.py b/scenarionet/examples/convert_waymo.py index 060dd32..634a044 100644 --- a/scenarionet/examples/convert_waymo.py +++ b/scenarionet/examples/convert_waymo.py @@ -1,183 +1,25 @@ -""" -This script takes --folder as input. It is the folder storing a batch of tfrecord file. -This script will create the output folder "processed_data" sharing the same level as `--folder`. - --- folder --- processed_data - -""" -import argparse -import copy +import logging import os -import pickle -import numpy as np +from scenarionet.converter.utils import write_to_directory +from scenarionet.converter.waymo.utils import convert_waymo_scenario, get_waymo_scenarios -from scenarionet.converter.utils import dict_recursive_remove_array_and_set, get_object_summary, get_number_summary +logger = logging.getLogger(__name__) -try: - import tensorflow as tf -except ImportError: - pass +from scenarionet import SCENARIONET_DATASET_PATH -try: - from waymo_open_dataset.protos import scenario_pb2 -except ImportError: - # TODO, redo all waymo import error! - try: - from metadrive.utils.waymo.protos import scenario_pb2 # Local files that only in PZH's computer. - except ImportError: - print( - "Please install waymo_open_dataset package through metadrive dependencies: " - "pip install waymo-open-dataset-tf-2-11-0==1.5.0" - ) +if __name__ == '__main__': + force_overwrite = True + dataset_name = "waymo" + output_path = os.path.join(SCENARIONET_DATASET_PATH, dataset_name) + version = 'v1.2' -from metadrive.scenario import ScenarioDescription as SD -from metadrive.type import MetaDriveType -from scenarionet.converter.waymo.utils import extract_tracks, extract_dynamic_map_states, extract_map_features, \ - compute_width -import sys + waymo_data_direction = os.path.join(SCENARIONET_DATASET_PATH, "waymo_origin") + scenarios = get_waymo_scenarios(waymo_data_direction) - -def convert_waymo(file_list, input_path, output_path, worker_index=None): - scenario = scenario_pb2.Scenario() - - metadata_recorder = {} - - total_scenarios = 0 - - desc = "" - summary_file = "dataset_summary.pkl" - if worker_index is not None: - desc += "Worker {} ".format(worker_index) - summary_file = "dataset_summary_worker{}.pkl".format(worker_index) - - for file_count, file in enumerate(file_list): - file_path = os.path.join(input_path, file) - if ("tfrecord" not in file_path) or (not os.path.isfile(file_path)): - continue - dataset = tf.data.TFRecordDataset(file_path, compression_type="") - - total = sum(1 for _ in dataset.as_numpy_iterator()) - - for j, data in enumerate(dataset.as_numpy_iterator()): - scenario.ParseFromString(data) - - md_scenario = SD() - - md_scenario[SD.ID] = scenario.scenario_id - # TODO LQY, get version from original files - md_scenario[SD.VERSION] = "1.2" - - # Please note that SDC track index is not identical to sdc_id. - # sdc_id is a unique indicator to a track, while sdc_track_index is only the index of the sdc track - # in the tracks datastructure. - - track_length = len(list(scenario.timestamps_seconds)) - - tracks, sdc_id = extract_tracks(scenario.tracks, scenario.sdc_track_index, track_length) - - md_scenario[SD.LENGTH] = track_length - - md_scenario[SD.TRACKS] = tracks - - dynamic_states = extract_dynamic_map_states(scenario.dynamic_map_states, track_length) - - md_scenario[SD.DYNAMIC_MAP_STATES] = dynamic_states - - map_features = extract_map_features(scenario.map_features) - md_scenario[SD.MAP_FEATURES] = map_features - - compute_width(md_scenario[SD.MAP_FEATURES]) - - md_scenario[SD.METADATA] = {} - md_scenario[SD.METADATA][SD.COORDINATE] = MetaDriveType.COORDINATE_WAYMO - md_scenario[SD.METADATA][SD.TIMESTEP] = np.asarray(list(scenario.timestamps_seconds), dtype=np.float32) - md_scenario[SD.METADATA][SD.METADRIVE_PROCESSED] = False - md_scenario[SD.METADATA][SD.SDC_ID] = str(sdc_id) - md_scenario[SD.METADATA]["dataset"] = "waymo" - md_scenario[SD.METADATA]["scenario_id"] = scenario.scenario_id - md_scenario[SD.METADATA]["source_file"] = str(file) - md_scenario[SD.METADATA]["track_length"] = track_length - - # === Waymo specific data. Storing them here === - md_scenario[SD.METADATA]["current_time_index"] = scenario.current_time_index - md_scenario[SD.METADATA]["sdc_track_index"] = scenario.sdc_track_index - - # obj id - md_scenario[SD.METADATA]["objects_of_interest"] = [str(obj) for obj in scenario.objects_of_interest] - - track_index = [obj.track_index for obj in scenario.tracks_to_predict] - track_id = [str(scenario.tracks[ind].id) for ind in track_index] - track_difficulty = [obj.difficulty for obj in scenario.tracks_to_predict] - track_obj_type = [tracks[id]["type"] for id in track_id] - md_scenario[SD.METADATA]["tracks_to_predict"] = { - id: { - "track_index": track_index[count], - "track_id": id, - "difficulty": track_difficulty[count], - "object_type": track_obj_type[count] - } - for count, id in enumerate(track_id) - } - - export_file_name = "sd_{}_{}.pkl".format(file, scenario.scenario_id) - - summary_dict = {} - summary_dict["sdc"] = get_object_summary( - state_dict=md_scenario.get_sdc_track()["state"], id=sdc_id, type=md_scenario.get_sdc_track()["type"] - ) - for track_id, track in md_scenario[SD.TRACKS].items(): - summary_dict[track_id] = get_object_summary(state_dict=track["state"], id=track_id, type=track["type"]) - md_scenario[SD.METADATA]["object_summary"] = summary_dict - - # Count some objects occurrence - md_scenario[SD.METADATA]["number_summary"] = get_number_summary(md_scenario) - - metadata_recorder[export_file_name] = copy.deepcopy(md_scenario[SD.METADATA]) - - md_scenario = md_scenario.to_dict() - - SD.sanity_check(md_scenario, check_self_type=True) - - p = os.path.join(output_path, export_file_name) - with open(p, "wb") as f: - pickle.dump(md_scenario, f) - - total_scenarios += 1 - if j == total - 1: - print( - f"{desc}Collected {total_scenarios} scenarios. File {file_count + 1}/{len(file_list)} has " - f"{total} Scenarios. The last one is saved at: {p}" - ) - - summary_file = os.path.join(output_path, summary_file) - with open(summary_file, "wb") as file: - pickle.dump(dict_recursive_remove_array_and_set(metadata_recorder), file) - print("Summary is saved at: {}".format(summary_file)) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--input", required=True, help="The data folder storing raw tfrecord from Waymo dataset.") - parser.add_argument( - "--output", default="processed_data", type=str, help="The data folder storing raw tfrecord from Waymo dataset." - ) - args = parser.parse_args() - - scenario_data_path = args.input - - output_path: str = os.path.dirname(scenario_data_path) - output_path = os.path.join(output_path, args.output) - os.makedirs(output_path, exist_ok=True) - - raw_data_path = scenario_data_path - - # parse raw data from input path to output path, - # there is 1000 raw data in google cloud, each of them produce about 500 pkl file - file_list = os.listdir(raw_data_path) - convert_waymo(file_list, raw_data_path, output_path) - sys.exit() - # file_path = AssetLoader.file_path("waymo", "processed", "0.pkl", return_raw_style=False) - # data = read_waymo_data(file_path) - # draw_waymo_map(data) + write_to_directory(convert_func=convert_waymo_scenario, + scenarios=scenarios, + output_path=output_path, + dataset_version=version, + dataset_name=dataset_name, + force_overwrite=force_overwrite)