diff --git a/documentation/datasets.rst b/documentation/datasets.rst
index e8a2dbc..33af5a1 100644
--- a/documentation/datasets.rst
+++ b/documentation/datasets.rst
@@ -22,6 +22,7 @@ We will fix it as best as we can and record it in the troubleshooting section fo
 - :ref:`lyft`
 - :ref:`new_data`
 - :ref:`argoverse2`
+- :ref:`vod`
 
 
 
diff --git a/documentation/index.rst b/documentation/index.rst
index 520a56c..462250e 100644
--- a/documentation/index.rst
+++ b/documentation/index.rst
@@ -55,6 +55,7 @@ Please feel free to contact us if you have any suggestion or idea!
    PG.rst
    lyft.rst
    argoverse2.rst
+   vod.rst
    new_data.rst
 
 
diff --git a/documentation/operations.rst b/documentation/operations.rst
index 297d28b..4f49d35 100644
--- a/documentation/operations.rst
+++ b/documentation/operations.rst
@@ -162,6 +162,55 @@ However, Lyft is now a part of Woven Planet and the new data has to be parsed vi
 We are working on support this new toolkit to support the new Lyft dataset.
 Detailed guide is available at Section :ref:`nuscenes`.
 
+Convert VoD 
+------------------------------------
+
+.. code-block:: text
+
+    python -m scenarionet.convert_vod [-h] [--database_path DATABASE_PATH]
+                      [--dataset_name DATASET_NAME]
+                      [--split
+    {v1.0-trainval,v1.0-test,train,train_val,val,test}]
+                      [--dataroot DATAROOT] [--map_radius MAP_RADIUS]
+                      [--future FUTURE] [--past PAST] [--overwrite]
+                      [--num_workers NUM_WORKERS]
+
+    Build database from VOD scenarios
+
+    optional arguments:
+      -h, --help            show this help message and exit
+      --database_path DATABASE_PATH, -d DATABASE_PATH
+                            directory, The path to place the data
+      --dataset_name DATASET_NAME, -n DATASET_NAME
+                            Dataset name, will be used to generate scenario files
+      --split 
+        {v1.0-trainval,v1.0-test,train,train_val,val,test}
+                            Which splits of VOD data should be used. If set to 
+                            ['v1.0-trainval', 'v1.0-test'], it will
+                            convert the full log into scenarios with 20 second episode
+                            length. If set to ['train', 'train_val', 'val', 'test'],
+                            it will convert segments used for VOD prediction challenge
+                            to scenarios, resulting in more converted scenarios.
+                            Generally, you should choose this parameter from 
+                            ['v1.0-trainval', 'v1.0-test'] to get complete
+                            scenarios for planning unless you want to use the
+                            converted scenario files for prediction task.
+      --dataroot DATAROOT   The path of vod data
+      --map_radius MAP_RADIUS The size of map
+      --future FUTURE       3 seconds by default. How many future seconds to
+                            predict. Only available if split is chosen from 
+                            ['train', 'train_val', 'val', 'test']
+      --past PAST           0.5 seconds by default. How many past seconds are
+                            used for prediction. Only available if split is
+                            chosen from ['train', 'train_val', 'val', 'test']
+      --overwrite           If the database_path exists, whether to overwrite it
+      --num_workers NUM_WORKERS number of workers to use
+
+
+This script converts the View-of-Delft Prediction (VoD) dataset into our scenario descriptions.
+You will need to install ``vod-devkit`` and download the source data from https://intelligent-vehicles.org/datasets/view-of-delft/.
+Detailed guide is available at Section :ref:`vod`.
+
 Convert PG
 -------------------------
 
@@ -519,4 +568,4 @@ The main goal of this command is to ensure that the training and test sets are i
       -h, --help  show this help message and exit
       --d_1 D_1   The path of the first database
       --d_2 D_2   The path of the second database
-      --show_id   whether to show the id of overlapped scenarios
\ No newline at end of file
+      --show_id   whether to show the id of overlapped scenarios
diff --git a/documentation/vod.rst b/documentation/vod.rst
new file mode 100644
index 0000000..9743698
--- /dev/null
+++ b/documentation/vod.rst
@@ -0,0 +1,109 @@
+#############################
+View-of-Delft (VoD) 
+#############################
+
+| Website: https://intelligent-vehicles.org/datasets/view-of-delft/ 
+| Download: https://intelligent-vehicles.org/datasets/view-of-delft/ (Registration required)
+| Papers: 
+    Detection dataset: https://ieeexplore.ieee.org/document/9699098
+    Prediction dataset: https://ieeexplore.ieee.org/document/10493110
+
+The View-of-Delft (VoD) dataset is a novel automotive dataset recorded in Delft,
+the Netherlands. It contains 8600+ frames of synchronized and calibrated
+64-layer LiDAR-, (stereo) camera-, and 3+1D  (range, azimuth, elevation, +
+Doppler) radar-data acquired in complex, urban traffic. It consists of 123100+
+3D bounding box annotations of both moving and static objects, including 26500+
+pedestrian, 10800 cyclist and 26900+ car labels. It additionally contains
+semantic map annotations and accurate ego-vehicle localization data.
+
+Benchmarks for detection and prediction tasks are released for the dataset. See
+the sections below for details on these benchmarks.
+
+**Detection**: 
+    An object detection benchmark is available for researchers to develop and
+    evaluate their models on the VoD dataset. At the time of publication, this
+    benchmark was the largest automotive multi-class object detection dataset
+    containing 3+1D radar data, and the only dataset containing high-end (64-layer)
+    LiDAR and (any kind of) radar data at the same time.
+
+**Prediction**: 
+    A trajectory prediction benchmark is publicly available to enable research
+    on urban multi-class trajectory prediction. This benchmark contains challenging
+    prediction cases in the historic city center of Delft with a high proportion of
+    Vulnerable Road Users (VRUs), such as pedestrians and cyclists. Semantic map
+    annotations for road elements such as lanes, sidewalks, and crosswalks are
+    provided as context for prediction models.
+
+1. Install VoD Prediction Toolkit
+=================================
+
+We will use the VoD Prediction toolkit to convert the data.
+First of all, we have to install the ``vod-devkit``.
+
+.. code-block:: bash
+
+    # install from github (Recommend)
+    git clone git@github.com:tudelft-iv/view-of-delft-prediction-devkit.git 
+    cd vod-devkit
+    pip install -e .
+
+    # or install from PyPI
+    pip install vod-devkit
+
+By installing from github, you can access examples and source code the toolkit.
+The examples are useful to verify whether the installation and dataset setup is correct or not.
+
+
+2. Download VoD Data
+==============================
+
+The official instruction is available at https://intelligent-vehicles.org/datasets/view-of-delft/.
+Here we provide a simplified installation procedure.
+
+First of all, please fill in the access form on vod website: https://intelligent-vehicles.org/datasets/view-of-delft/.
+The maintainers will send the data link to your email. Download and unzip the file named ``view_of_delft_prediction_PUBLIC.zip``.
+
+Secondly, all files should be organized to the following structure::
+
+    /vod/data/path/
+    ├── maps/
+    |   └──expansion/
+    ├── v1.0-trainval/
+    |   ├──attribute.json
+    |   ├──calibrated_sensor.json
+    |   ├──map.json
+    |   ├──log.json
+    |   ├──ego_pose.json
+    |   └──...
+    └── v1.0-test/
+
+**Note**: The sensor data is currently not available in the Prediction dataset, but will be released in the near future.  
+
+The ``/vod/data/path`` should be ``/data/sets/vod`` by default according to the official instructions,
+allowing the ``vod-devkit`` to find it.
+But you can still place it to any other places and:
+
+- build a soft link connect your data folder and ``/data/sets/vod``
+- or specify the ``dataroot`` when calling vod APIs and our convertors.
+
+
+After this step, the examples in ``vod-devkit`` is supposed to work well.
+Please try ``view-of-delft-prediction-devkit/tutorials/vod_tutorial.ipynb`` and see if the demo can successfully run.
+
+3. Build VoD Database
+===========================
+
+After setup the raw data, convertors in ScenarioNet can read the raw data, convert scenario format and build the database.
+Here we take converting raw data in ``v1.0-trainval`` as an example::
+
+    python -m scenarionet.convert_vod -d /path/to/your/database --split v1.0-trainval --dataroot /vod/data/path
+
+The ``split`` is to determine which split to convert. ``dataroot`` is set to ``/data/sets/vod`` by default,
+but you need to specify it if your data is stored in any other directory.
+Now all converted scenarios will be placed at ``/path/to/your/database`` and are ready to be used in your work.
+
+
+Known Issues: VoD 
+=======================
+
+N/A
diff --git a/scenarionet/convert_vod.py b/scenarionet/convert_vod.py
new file mode 100644
index 0000000..ec6e129
--- /dev/null
+++ b/scenarionet/convert_vod.py
@@ -0,0 +1,95 @@
+desc = "Build database from VOD scenarios"
+
+prediction_split = ["train", "train_val", "val", "test"]
+scene_split = ["v1.0-trainval", "v1.0-test"]
+
+split_to_scene = {
+    "train": "v1.0-trainval",
+    "train_val": "v1.0-trainval",
+    "val": "v1.0-trainval",
+    "test": "v1.0-test",
+}
+
+if __name__ == "__main__":
+    import pkg_resources  # for suppress warning
+    import argparse
+    import os.path
+    from functools import partial
+    from scenarionet import SCENARIONET_DATASET_PATH
+    from scenarionet.converter.vod.utils import (
+        convert_vod_scenario,
+        get_vod_scenarios,
+        get_vod_prediction_split,
+    )
+    from scenarionet.converter.utils import write_to_directory
+
+    parser = argparse.ArgumentParser(description=desc)
+    parser.add_argument(
+        "--database_path",
+        "-d",
+        default=os.path.join(SCENARIONET_DATASET_PATH, "vod"),
+        help="directory, The path to place the data",
+    )
+    parser.add_argument(
+        "--dataset_name",
+        "-n",
+        default="vod",
+        help="Dataset name, will be used to generate scenario files",
+    )
+    parser.add_argument(
+        "--split",
+        default="v1.0-trainval",
+        choices=scene_split + prediction_split,
+        help="Which splits of VOD data should be used. If set to {}, it will convert the full log into scenarios"
+        " with 20 second episode length. If set to {}, it will convert segments used for VOD prediction"
+        " challenge to scenarios, resulting in more converted scenarios. Generally, you should choose this "
+        " parameter from {} to get complete scenarios for planning unless you want to use the converted scenario "
+        " files for prediction task.".format(scene_split, prediction_split, scene_split),
+    )
+    parser.add_argument("--dataroot", default="/data/sets/vod", help="The path of vod data")
+    parser.add_argument("--map_radius", default=500, type=float, help="The size of map")
+    parser.add_argument(
+        "--future",
+        default=3,
+        type=float,
+        help="3 seconds by default. How many future seconds to predict. Only "
+        "available if split is chosen from {}".format(prediction_split),
+    )
+    parser.add_argument(
+        "--past",
+        default=0.5,
+        type=float,
+        help="0.5 seconds by default. How many past seconds are used for prediction."
+        " Only available if split is chosen from {}".format(prediction_split),
+    )
+    parser.add_argument(
+        "--overwrite",
+        action="store_true",
+        help="If the database_path exists, whether to overwrite it",
+    )
+    parser.add_argument("--num_workers", type=int, default=8, help="number of workers to use")
+    args = parser.parse_args()
+
+    overwrite = args.overwrite
+    dataset_name = args.dataset_name
+    output_path = args.database_path
+    version = args.split
+
+    if version in scene_split:
+        scenarios, vods = get_vod_scenarios(args.dataroot, version, args.num_workers)
+    else:
+        scenarios, vods = get_vod_prediction_split(args.dataroot, version, args.past, args.future, args.num_workers)
+    write_to_directory(
+        convert_func=convert_vod_scenario,
+        scenarios=scenarios,
+        output_path=output_path,
+        dataset_version=version,
+        dataset_name=dataset_name,
+        overwrite=overwrite,
+        num_workers=args.num_workers,
+        vodelft=vods,
+        past=[args.past for _ in range(args.num_workers)],
+        future=[args.future for _ in range(args.num_workers)],
+        prediction=[version in prediction_split for _ in range(args.num_workers)],
+        map_radius=[args.map_radius for _ in range(args.num_workers)],
+    )
diff --git a/scenarionet/converter/vod/__init__.py b/scenarionet/converter/vod/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/scenarionet/converter/vod/type.py b/scenarionet/converter/vod/type.py
new file mode 100644
index 0000000..e5031b1
--- /dev/null
+++ b/scenarionet/converter/vod/type.py
@@ -0,0 +1,90 @@
+ALL_TYPE = {
+    "noise": 'noise',
+    "human.pedestrian.adult": 'adult',
+    "human.pedestrian.child": 'child',
+    "human.pedestrian.wheelchair": 'wheelchair',
+    "human.pedestrian.stroller": 'stroller',
+    "human.pedestrian.personal_mobility": 'p.mobility',
+    "human.pedestrian.police_officer": 'police',
+    "human.pedestrian.construction_worker": 'worker',
+    "animal": 'animal',
+    "vehicle.car": 'car',
+    "vehicle.motorcycle": 'motorcycle',
+    "vehicle.bicycle": 'bicycle',
+    "vehicle.bus.bendy": 'bus.bendy',
+    "vehicle.bus.rigid": 'bus.rigid',
+    "vehicle.truck": 'truck',
+    "vehicle.construction": 'constr. veh',
+    "vehicle.emergency.ambulance": 'ambulance',
+    "vehicle.emergency.police": 'police car',
+    "vehicle.trailer": 'trailer',
+    "movable_object.barrier": 'barrier',
+    "movable_object.trafficcone": 'trafficcone',
+    "movable_object.pushable_pullable": 'push/pullable',
+    "movable_object.debris": 'debris',
+    "static_object.bicycle_rack": 'bicycle racks',
+    "flat.driveable_surface": 'driveable',
+    "flat.sidewalk": 'sidewalk',
+    "flat.terrain": 'terrain',
+    "flat.other": 'flat.other',
+    "static.manmade": 'manmade',
+    "static.vegetation": 'vegetation',
+    "static.other": 'static.other',
+    "vehicle.ego": "ego",
+    # ADDED:
+    "static.vehicle.bicycle": "static.other",
+    "static.vehicle.motorcycle": "static.other",
+    "vehicle.other": "vehicle.other",
+    "static.vehicle.other": "static.other",
+    "vehicle.unknown": "vehicle.unknown"
+}
+NOISE_TYPE = {
+    "noise": 'noise',
+    "animal": 'animal',
+    "static_object.bicycle_rack": 'bicycle racks',
+    "movable_object.pushable_pullable": 'push/pullable',
+    "movable_object.debris": 'debris',
+    "static.manmade": 'manmade',
+    "static.vegetation": 'vegetation',
+    "static.other": 'static.other',
+    "static.vehicle.bicycle": "static.other",
+    "static.vehicle.motorcycle": "static.other",
+    "static.vehicle.other": "static.other",
+}
+HUMAN_TYPE = {
+    "human.pedestrian.adult": 'adult',
+    "human.pedestrian.child": 'child',
+    "human.pedestrian.wheelchair": 'wheelchair',
+    "human.pedestrian.stroller": 'stroller',
+    "human.pedestrian.personal_mobility": 'p.mobility',
+    "human.pedestrian.police_officer": 'police',
+    "human.pedestrian.construction_worker": 'worker',
+}
+BICYCLE_TYPE = {
+    "vehicle.bicycle": 'bicycle',
+    "vehicle.motorcycle": 'motorcycle',
+}
+VEHICLE_TYPE = {
+    "vehicle.car": 'car',
+    "vehicle.bus.bendy": 'bus.bendy',
+    "vehicle.bus.rigid": 'bus.rigid',
+    "vehicle.truck": 'truck',
+    "vehicle.construction": 'constr. veh',
+    "vehicle.emergency.ambulance": 'ambulance',
+    "vehicle.emergency.police": 'police car',
+    "vehicle.trailer": 'trailer',
+    "vehicle.ego": "ego",
+    # ADDED:
+    "vehicle.other": "vehicle.other",
+    "vehicle.unknown": "vehicle.other"
+}
+OBSTACLE_TYPE = {
+    "movable_object.barrier": 'barrier',
+    "movable_object.trafficcone": 'trafficcone',
+}
+TERRAIN_TYPE = {
+    "flat.driveable_surface": 'driveable',
+    "flat.sidewalk": 'sidewalk',
+    "flat.terrain": 'terrain',
+    "flat.other": 'flat.other'
+}
diff --git a/scenarionet/converter/vod/utils.py b/scenarionet/converter/vod/utils.py
new file mode 100644
index 0000000..54c5932
--- /dev/null
+++ b/scenarionet/converter/vod/utils.py
@@ -0,0 +1,558 @@
+import copy
+import logging
+
+import geopandas as gpd
+import numpy as np
+from metadrive.scenario import ScenarioDescription as SD
+from metadrive.type import MetaDriveType
+from vod.eval.prediction.splits import get_prediction_challenge_split
+from shapely.ops import unary_union
+
+from scenarionet.converter.vod.type import (
+    ALL_TYPE,
+    HUMAN_TYPE,
+    BICYCLE_TYPE,
+    VEHICLE_TYPE,
+)
+
+logger = logging.getLogger(__name__)
+try:
+    import logging
+
+    logging.getLogger("shapely.geos").setLevel(logging.CRITICAL)
+    from vod import VOD
+    from vod.can_bus.can_bus_api import VODCanBus
+    from vod.eval.common.utils import quaternion_yaw
+    from vod.map_expansion.arcline_path_utils import discretize_lane
+    from vod.map_expansion.map_api import VODMap
+    from pyquaternion import Quaternion
+except ImportError as e:
+    logger.warning("Can not import vod-devkit: {}".format(e))
+
+EGO = "ego"
+
+
+def get_metadrive_type(obj_type):
+    meta_type = obj_type
+    md_type = None
+    if ALL_TYPE[obj_type] == "barrier":
+        md_type = MetaDriveType.TRAFFIC_BARRIER
+    elif ALL_TYPE[obj_type] == "trafficcone":
+        md_type = MetaDriveType.TRAFFIC_CONE
+    elif obj_type in VEHICLE_TYPE:
+        md_type = MetaDriveType.VEHICLE
+    elif obj_type in HUMAN_TYPE:
+        md_type = MetaDriveType.PEDESTRIAN
+    elif obj_type in BICYCLE_TYPE:
+        md_type = MetaDriveType.CYCLIST
+
+    # assert meta_type != MetaDriveType.UNSET and meta_type != "noise"
+    return md_type, meta_type
+
+
+def parse_frame(frame, vod: VOD):
+    ret = {}
+    for obj_id in frame["anns"]:
+        obj = vod.get("sample_annotation", obj_id)
+        # velocity = vod.box_velocity(obj_id)[:2]
+        # if np.nan in velocity:
+        velocity = np.array([0.0, 0.0])
+        ret[obj["instance_token"]] = {
+            "position": obj["translation"],
+            "obj_id": obj["instance_token"],
+            "heading": quaternion_yaw(Quaternion(*obj["rotation"])),
+            "rotation": obj["rotation"],
+            "velocity": velocity,
+            "size": obj["size"],
+            "visible": obj["visibility_token"],
+            "attribute": [vod.get("attribute", i)["name"] for i in obj["attribute_tokens"]],
+            "type": obj["category_name"],
+        }
+    # print(frame["data"]["dummy"])
+    ego_token = vod.get("sample_data", frame["data"]["dummy"])["ego_pose_token"]
+    # print(ego_token)
+    ego_state = vod.get("ego_pose", ego_token)
+    ret[EGO] = {
+        "position": ego_state["translation"],
+        "obj_id": EGO,
+        "heading": quaternion_yaw(Quaternion(*ego_state["rotation"])),
+        "rotation": ego_state["rotation"],
+        "type": "vehicle.car",
+        "velocity": np.array([0.0, 0.0]),
+        # size https://en.wikipedia.org/wiki/Renault_Zoe
+        "size": [4.08, 1.73, 1.56],
+    }
+    return ret
+
+
+def interpolate_heading(heading_data, old_valid, new_valid, num_to_interpolate=1):
+    new_heading_theta = np.zeros_like(new_valid)
+    for k, valid in enumerate(old_valid[:-1]):
+        if abs(valid) > 1e-1 and abs(old_valid[k + 1]) > 1e-1:
+            diff = (heading_data[k + 1] - heading_data[k] + np.pi) % (2 * np.pi) - np.pi
+            # step = diff
+            interpolate_heading = np.linspace(heading_data[k], heading_data[k] + diff, 2)  # not sure if 2 is correct
+            new_heading_theta[k * num_to_interpolate:(k + 1) * num_to_interpolate] = (interpolate_heading[:-1])
+        elif abs(valid) > 1e-1 and abs(old_valid[k + 1]) < 1e-1:
+            new_heading_theta[k * num_to_interpolate:(k + 1) * num_to_interpolate] = (heading_data[k])
+    new_heading_theta[-1] = heading_data[-1]
+    return new_heading_theta * new_valid
+
+
+def _interpolate_one_dim(data, old_valid, new_valid, num_to_interpolate=1):
+    new_data = np.zeros_like(new_valid)
+    for k, valid in enumerate(old_valid[:-1]):
+        if abs(valid) > 1e-1 and abs(old_valid[k + 1]) > 1e-1:
+            diff = data[k + 1] - data[k]
+            # step = diff
+            interpolate_data = np.linspace(data[k], data[k] + diff, num_to_interpolate + 1)
+            new_data[k * num_to_interpolate:(k + 1) * num_to_interpolate] = (interpolate_data[:-1])
+        elif abs(valid) > 1e-1 and abs(old_valid[k + 1]) < 1e-1:
+            new_data[k * num_to_interpolate:(k + 1) * num_to_interpolate] = data[k]
+    new_data[-1] = data[-1]
+    return new_data * new_valid
+
+
+def interpolate(origin_y, valid, new_valid):
+    if len(origin_y.shape) == 1:
+        ret = _interpolate_one_dim(origin_y, valid, new_valid)
+    elif len(origin_y.shape) == 2:
+        ret = []
+        for dim in range(origin_y.shape[-1]):
+            new_y = _interpolate_one_dim(origin_y[..., dim], valid, new_valid)
+            new_y = np.expand_dims(new_y, axis=-1)
+            ret.append(new_y)
+        ret = np.concatenate(ret, axis=-1)
+    else:
+        raise ValueError("Y has shape {}, Can not interpolate".format(origin_y.shape))
+    return ret
+
+
+def get_tracks_from_frames(vod: VOD, scene_info, frames, num_to_interpolate=5):
+    episode_len = len(frames)
+    # Fill tracks
+    all_objs = set()
+    for frame in frames:
+        all_objs.update(frame.keys())
+    tracks = {
+        k: dict(
+            type=MetaDriveType.UNSET,
+            state=dict(
+                position=np.zeros(shape=(episode_len, 3)),
+                heading=np.zeros(shape=(episode_len, )),
+                velocity=np.zeros(shape=(episode_len, 2)),
+                valid=np.zeros(shape=(episode_len, )),
+                length=np.zeros(shape=(episode_len, 1)),
+                width=np.zeros(shape=(episode_len, 1)),
+                height=np.zeros(shape=(episode_len, 1)),
+            ),
+            metadata=dict(
+                track_length=episode_len,
+                type=MetaDriveType.UNSET,
+                object_id=k,
+                original_id=k,
+            ),
+        )
+        for k in list(all_objs)
+    }
+
+    tracks_to_remove = set()
+    first = True
+    a = 0
+    for frame_idx in range(episode_len):
+        # Record all agents' states (position, velocity, ...)
+        # if frame_idx == 0:
+        #     continue
+        for id, state in frames[frame_idx].items():
+            # Fill type
+            md_type, meta_type = get_metadrive_type(state["type"])
+            tracks[id]["type"] = md_type
+            tracks[id][SD.METADATA]["type"] = meta_type
+            if md_type is None or md_type == MetaDriveType.UNSET:
+                tracks_to_remove.add(id)
+                continue
+            elif first:
+                first = False
+                id_f = id
+
+            if id == id_f:
+                a += 1
+                # print("FOOUND KEY: ", a, episode_len)
+                # print(state["position"])
+            tracks[id]["type"] = md_type
+            tracks[id][SD.METADATA]["type"] = meta_type
+
+            # Introducing the state item
+            if ((frame_idx == 0) or (frame_idx == 1)) and (id == list(frames[frame_idx].keys())[0]):
+                if state["position"][0] != 0:
+                    print(state["position"], md_type)
+            tracks[id]["state"]["position"][frame_idx] = state["position"]
+            tracks[id]["state"]["heading"][frame_idx] = state["heading"]
+            tracks[id]["state"]["velocity"][frame_idx] = tracks[id]["state"]["velocity"][frame_idx]
+            tracks[id]["state"]["valid"][frame_idx] = 1
+
+            tracks[id]["state"]["length"][frame_idx] = state["size"][1]
+            tracks[id]["state"]["width"][frame_idx] = state["size"][0]
+            tracks[id]["state"]["height"][frame_idx] = state["size"][2]
+
+            tracks[id]["metadata"]["original_id"] = id
+            tracks[id]["metadata"]["object_id"] = id
+
+    for track in tracks_to_remove:
+        track_data = tracks.pop(track)
+        obj_type = track_data[SD.METADATA]["type"]
+        print("\nWARNING: Can not map type: {} to any MetaDrive Type".format(obj_type))
+
+    new_episode_len = (episode_len - 1) * num_to_interpolate + 1
+
+    # interpolate
+    interpolate_tracks = {}
+    for (
+            id,
+            track,
+    ) in tracks.items():
+        interpolate_tracks[id] = copy.deepcopy(track)
+        interpolate_tracks[id]["metadata"]["track_length"] = new_episode_len
+
+        # valid first
+        new_valid = np.zeros(shape=(new_episode_len, ))
+        if track["state"]["valid"][0]:
+            new_valid[0] = 1
+        for k, valid in enumerate(track["state"]["valid"][1:], start=1):
+            if valid:
+                if abs(new_valid[(k - 1) * num_to_interpolate] - 1) < 1e-2:
+                    start_idx = (k - 1) * num_to_interpolate + 1
+                else:
+                    start_idx = k * num_to_interpolate
+                new_valid[start_idx:k * num_to_interpolate + 1] = 1
+        interpolate_tracks[id]["state"]["valid"] = new_valid
+
+        # position
+        interpolate_tracks[id]["state"]["position"] = interpolate(
+            track["state"]["position"], track["state"]["valid"], new_valid
+        )
+        # print(np.diff(track["state"]["position"], axis=0))
+        # print(interpolate_tracks[id]["state"]["position"], track["state"]["position"])
+        if id == "ego" and not scene_info.get("prediction", False):
+            assert "prediction" not in scene_info
+            # We can get it from canbus
+            try:
+                canbus = VODCanBus(dataroot=vod.dataroot)
+                imu_pos = np.asarray([state["pos"] for state in canbus.get_messages(scene_info["name"], "pose")[::5]])
+                min_len = min(len(imu_pos), new_episode_len)
+                interpolate_tracks[id]["state"]["position"][:min_len] = imu_pos[:min_len]
+            except:
+                logger.info("Fail to get canbus data for {}".format(scene_info["name"]))
+
+        # velocity
+        interpolate_tracks[id]["state"]["velocity"] = interpolate(
+            track["state"]["velocity"], track["state"]["valid"], new_valid
+        )
+        vel = (interpolate_tracks[id]["state"]["position"][1:] - interpolate_tracks[id]["state"]["position"][:-1])
+        interpolate_tracks[id]["state"]["velocity"][:-1] = vel[..., :2] / 0.1
+        for k, valid in enumerate(new_valid[1:], start=1):
+            if valid == 0 or not valid or abs(valid) < 1e-2:
+                interpolate_tracks[id]["state"]["velocity"][k] = np.array([0.0, 0.0])
+                interpolate_tracks[id]["state"]["velocity"][k - 1] = np.array([0.0, 0.0])
+        # speed outlier check
+        max_vel = np.max(np.linalg.norm(interpolate_tracks[id]["state"]["velocity"], axis=-1))
+        if max_vel > 30:
+            print("\nWARNING: Too large speed for {}: {}".format(id, max_vel))
+
+        # heading
+        # then update position
+        new_heading = interpolate_heading(track["state"]["heading"], track["state"]["valid"], new_valid)
+        interpolate_tracks[id]["state"]["heading"] = new_heading
+        if id == "ego" and not scene_info.get("prediction", False):
+            assert "prediction" not in scene_info
+            # We can get it from canbus
+            try:
+                canbus = VODCanBus(dataroot=vod.dataroot)
+                imu_heading = np.asarray(
+                    [
+                        quaternion_yaw(Quaternion(state["orientation"]))
+                        for state in canbus.get_messages(scene_info["name"], "pose")[::5]
+                    ]
+                )
+                min_len = min(len(imu_heading), new_episode_len)
+                interpolate_tracks[id]["state"]["heading"][:min_len] = imu_heading[:min_len]
+            except:
+                logger.info("Fail to get canbus data for {}".format(scene_info["name"]))
+
+        for k, v in track["state"].items():
+            if k in ["valid", "heading", "position", "velocity"]:
+                continue
+            else:
+                interpolate_tracks[id]["state"][k] = interpolate(v, track["state"]["valid"], new_valid)
+        # if id == "ego":
+        # ego is valid all time, so we can calculate the velocity in this way
+    return interpolate_tracks
+
+
+def get_map_features(scene_info, vod: VOD, map_center, radius=500, points_distance=1, only_lane=False):
+    """
+    Extract map features from vod data. The objects in specified region will be returned. Sampling rate determines
+    the distance between 2 points when extracting lane center line.
+    """
+    ret = {}
+    map_name = vod.get("log", scene_info["log_token"])["location"]
+    map_api = VODMap(dataroot=vod.dataroot, map_name=map_name)
+
+    layer_names = [
+        # "line",
+        # "polygon",
+        # "node",
+        "drivable_area",
+        "road_segment",
+        # 'road_block',
+        "lane",
+        "ped_crossing",
+        "walkway",
+        # 'stop_line',
+        # 'carpark_area',
+        "lane_connector",
+        # 'road_divider',
+        # 'lane_divider',
+        # 'traffic_light'
+    ]
+    # road segment includes all roadblocks (a list of lanes in the same direction), intersection and unstructured road
+
+    map_objs = map_api.get_records_in_radius(map_center[0], map_center[1], radius, layer_names)
+
+    if not only_lane:
+        # build map boundary
+        polygons = []
+        for id in map_objs["drivable_area"]:
+            seg_info = map_api.get("drivable_area", id)
+            assert seg_info["token"] == id
+            for polygon_token in seg_info["polygon_tokens"]:
+                polygon = map_api.extract_polygon(polygon_token)
+                polygons.append(polygon)
+        # for id in map_objs["road_segment"]:
+        #     seg_info = map_api.get("road_segment", id)
+        #     assert seg_info["token"] == id
+        #     polygon = map_api.extract_polygon(seg_info["polygon_token"])
+        #     polygons.append(polygon)
+        # for id in map_objs["road_block"]:
+        #     seg_info = map_api.get("road_block", id)
+        #     assert seg_info["token"] == id
+        #     polygon = map_api.extract_polygon(seg_info["polygon_token"])
+        #     polygons.append(polygon)
+        polygons = [geom if geom.is_valid else geom.buffer(0) for geom in polygons]
+        boundaries = gpd.GeoSeries(unary_union(polygons)).boundary.explode(index_parts=True)
+        for idx, boundary in enumerate(boundaries[0]):
+            block_points = np.array(list(i for i in zip(boundary.coords.xy[0], boundary.coords.xy[1])))
+            id = "boundary_{}".format(idx)
+            ret[id] = {
+                SD.TYPE: MetaDriveType.LINE_SOLID_SINGLE_WHITE,
+                SD.POLYLINE: block_points,
+            }
+
+        # broken line
+        # for id in map_objs["lane_divider"]:
+        #     line_info = map_api.get("lane_divider", id)
+        #     assert line_info["token"] == id
+        #     line = map_api.extract_line(line_info["line_token"]).coords.xy
+        #     line = np.asarray([[line[0][i], line[1][i]] for i in range(len(line[0]))])
+        #     ret[id] = {SD.TYPE: MetaDriveType.LINE_BROKEN_SINGLE_WHITE, SD.POLYLINE: line}
+
+        # # solid line
+        # for id in map_objs["road_divider"]:
+        #     line_info = map_api.get("road_divider", id)
+        #     assert line_info["token"] == id
+        #     line = map_api.extract_line(line_info["line_token"]).coords.xy
+        #     line = np.asarray([[line[0][i], line[1][i]] for i in range(len(line[0]))])
+        #     ret[id] = {SD.TYPE: MetaDriveType.LINE_SOLID_SINGLE_YELLOW, SD.POLYLINE: line}
+
+        # crosswalk
+        for id in map_objs["ped_crossing"]:
+            info = map_api.get("ped_crossing", id)
+            assert info["token"] == id
+            boundary = map_api.extract_polygon(info["polygon_token"]).exterior.xy
+            boundary_polygon = np.asarray([[boundary[0][i], boundary[1][i]] for i in range(len(boundary[0]))])
+            ret[id] = {
+                SD.TYPE: MetaDriveType.CROSSWALK,
+                SD.POLYGON: boundary_polygon,
+            }
+
+        # walkway
+        for id in map_objs["walkway"]:
+            info = map_api.get("walkway", id)
+            assert info["token"] == id
+            boundary = map_api.extract_polygon(info["polygon_token"]).exterior.xy
+            boundary_polygon = np.asarray([[boundary[0][i], boundary[1][i]] for i in range(len(boundary[0]))])
+            ret[id] = {
+                SD.TYPE: MetaDriveType.BOUNDARY_SIDEWALK,
+                SD.POLYGON: boundary_polygon,
+            }
+
+    # normal lane
+    for id in map_objs["lane"]:
+        lane_info = map_api.get("lane", id)
+        assert lane_info["token"] == id
+        boundary = map_api.extract_polygon(lane_info["polygon_token"]).boundary.xy
+        boundary_polygon = np.asarray([[boundary[0][i], boundary[1][i]] for i in range(len(boundary[0]))])
+        # boundary_polygon += [[boundary[0][i], boundary[1][i]] for i in range(len(boundary[0]))]
+        ret[id] = {
+            SD.TYPE: MetaDriveType.LANE_SURFACE_STREET,
+            SD.POLYLINE: np.asarray(discretize_lane(map_api.arcline_path_3[id], resolution_meters=points_distance)),
+            SD.POLYGON: boundary_polygon,
+            SD.ENTRY: map_api.get_incoming_lane_ids(id),
+            SD.EXIT: map_api.get_outgoing_lane_ids(id),
+            SD.LEFT_NEIGHBORS: [],
+            SD.RIGHT_NEIGHBORS: [],
+        }
+
+    # intersection lane
+    for id in map_objs["lane_connector"]:
+        lane_info = map_api.get("lane_connector", id)
+        assert lane_info["token"] == id
+        # boundary = map_api.extract_polygon(lane_info["polygon_token"]).boundary.xy
+        # boundary_polygon = [[boundary[0][i], boundary[1][i], 0.1] for i in range(len(boundary[0]))]
+        # boundary_polygon += [[boundary[0][i], boundary[1][i], 0.] for i in range(len(boundary[0]))]
+        ret[id] = {
+            SD.TYPE: MetaDriveType.LANE_SURFACE_UNSTRUCTURE,
+            SD.POLYLINE: np.asarray(discretize_lane(map_api.arcline_path_3[id], resolution_meters=points_distance)),
+            # SD.POLYGON: boundary_polygon,
+            "speed_limit_kmh": 100,
+            SD.ENTRY: map_api.get_incoming_lane_ids(id),
+            SD.EXIT: map_api.get_outgoing_lane_ids(id),
+        }
+
+    # # stop_line
+    # for id in map_objs["stop_line"]:
+    #     info = map_api.get("stop_line", id)
+    #     assert info["token"] == id
+    #     boundary = map_api.extract_polygon(info["polygon_token"]).exterior.xy
+    #     boundary_polygon = np.asarray([[boundary[0][i], boundary[1][i]] for i in range(len(boundary[0]))])
+    #     ret[id] = {
+    #         SD.TYPE: MetaDriveType.STOP_LINE,
+    #         SD.POLYGON: boundary_polygon ,
+    #     }
+
+    #         'stop_line',
+    #         'carpark_area',
+
+    return ret
+
+
+def convert_vod_scenario(
+    token,
+    version,
+    vodelft: VOD,
+    map_radius=500,
+    prediction=False,
+    past=2,
+    future=6,
+    only_lane=False,
+):
+    """
+    Data will be interpolated to 0.1s time interval, while the time interval of original key frames are 0.5s.
+    """
+    if prediction:
+        past_num = int(float(past) / 0.1)
+        future_num = int(float(future) / 0.1)
+        vode = vodelft
+        instance_token, sample_token = token.split("_")
+        current_sample = last_sample = next_sample = vode.get("sample", sample_token)
+        past_samples = []
+        future_samples = []
+        for _ in range(past_num):
+            if last_sample["prev"] == "":
+                break
+            last_sample = vode.get("sample", last_sample["prev"])
+            past_samples.append(parse_frame(last_sample, vode))
+
+        for _ in range(future_num):
+            if next_sample["next"] == "":
+                break
+            next_sample = vode.get("sample", next_sample["next"])
+            future_samples.append(parse_frame(next_sample, vode))
+        frames = (past_samples[::-1] + [parse_frame(current_sample, vode)] + future_samples)
+        scene_info = copy.copy(vode.get("scene", current_sample["scene_token"]))
+        scene_info["name"] = scene_info["name"] + "_" + token
+        scene_info["prediction"] = True
+        frames_scene_info = [frames, scene_info]
+    else:
+        frames_scene_info = extract_frames_scene_info(token, vodelft)
+
+    scenario_log_interval = 0.1
+    frames, scene_info = frames_scene_info
+    result = SD()
+    result[SD.ID] = scene_info["name"]
+    result[SD.VERSION] = "vod" + version
+    result[SD.LENGTH] = len(frames)
+    result[SD.METADATA] = {}
+    result[SD.METADATA]["dataset"] = "vod"
+    result[SD.METADATA][SD.METADRIVE_PROCESSED] = False
+    result[SD.METADATA]["map"] = vodelft.get("log", scene_info["log_token"])["location"]
+    result[SD.METADATA]["date"] = vodelft.get("log", scene_info["log_token"])["date_captured"]
+    result[SD.METADATA]["coordinate"] = "right-handed"
+    # result[SD.METADATA]["dscenario_token"] = scene_token
+    result[SD.METADATA][SD.ID] = scene_info["name"]
+    result[SD.METADATA]["scenario_id"] = scene_info["name"]
+    result[SD.METADATA]["sample_rate"] = scenario_log_interval
+    result[SD.METADATA][SD.TIMESTEP] = np.arange(0.0, len(frames), 1) * 0.1
+    # interpolating to 0.1s interval
+    result[SD.TRACKS] = get_tracks_from_frames(vodelft, scene_info, frames, num_to_interpolate=1)
+    result[SD.METADATA][SD.SDC_ID] = "ego"
+
+    # No traffic light in vod at this stage
+    result[SD.DYNAMIC_MAP_STATES] = {}
+    if prediction:
+        track_to_predict = result[SD.TRACKS][instance_token]
+        result[SD.METADATA]["tracks_to_predict"] = {
+            instance_token: {
+                "track_index": list(result[SD.TRACKS].keys()).index(instance_token),
+                "track_id": instance_token,
+                "difficulty": 0,
+                "object_type": track_to_predict["type"],
+            }
+        }
+
+    # map
+    print(result[SD.LENGTH], len(result[SD.METADATA][SD.TIMESTEP]))
+    map_center = np.array(result[SD.TRACKS]["ego"]["state"]["position"][0])
+    result[SD.MAP_FEATURES] = get_map_features(scene_info, vodelft, map_center, map_radius, only_lane=only_lane)
+    del frames_scene_info
+    del frames
+    del scene_info
+    return result
+
+
+def extract_frames_scene_info(scene, vod):
+    scene_token = scene["token"]
+    scene_info = vod.get("scene", scene_token)
+    scene_info["nbr_samples"] -= 1
+    frames = []
+    current_frame = vod.get("sample", scene_info["first_sample_token"])
+    while current_frame["token"] != scene_info["last_sample_token"]:
+        frames.append(parse_frame(current_frame, vod))
+        current_frame = vod.get("sample", current_frame["next"])
+    frames.append(parse_frame(current_frame, vod))
+    frames = frames[1:]
+    assert current_frame["next"] == ""
+    assert len(frames) == scene_info["nbr_samples"], "Number of sample mismatches! "
+    return frames, scene_info
+
+
+def get_vod_scenarios(dataroot, version, num_workers=2):
+    vode = VOD(version=version, dataroot=dataroot)
+
+    return vode.scene, [vode for _ in range(num_workers)]
+
+
+def get_vod_prediction_split(dataroot, version, past, future, num_workers=2):
+    # TODO do properly
+    split_to_scene = {
+        "mini_train": "v1.0-mini",
+        "mini_val": "v1.0-mini",
+        "train": "v1.0-trainval",
+        "train_val": "v1.0-trainval",
+        "val": "v1.0-trainval",
+        "test": "v1.0-test",
+    }
+
+    vode = VOD(version=split_to_scene[version], dataroot=dataroot)
+
+    return get_prediction_challenge_split(version, dataroot=dataroot), [vode for _ in range(num_workers)]