Generate segment URLs in dataframes

The segment_url DataFusion utility can be used to generate Rerun URLs that are clickable within the viewer. The generated URLs can optionally seek to a timestamp, select a time range, or select an entity path.

Setup setup

We start by loading sample data in a local Data Platform instance and creating a table with some segment metadata.

from __future__ import annotations

from datetime import datetime, timedelta
from pathlib import Path

import pyarrow as pa
import rerun as rr
from datafusion import lit
from rerun.utilities.datafusion.functions.url_generation import segment_url

sample_5_path = Path(__file__).parents[5] / "tests" / "assets" / "rrd" / "sample_5"

server = rr.server.Server(datasets={"sample_dataset": sample_5_path})
client = server.client()
dataset = client.get_dataset(name="sample_dataset")

# Pick 3 deterministic segment IDs and create a view filtered to them
segment_ids = sorted(dataset.segment_ids())[:3]
view = dataset.filter_segments(segment_ids)

# Build a synthetic metadata table keyed by rerun_segment_id
base_time = datetime(2023, 11, 14, 22, 13, 20)
event_times = [base_time + timedelta(seconds=i) for i in range(3)]

meta = pa.record_batch(
    {
        "rerun_segment_id": segment_ids,
        "event_time": pa.array(event_times, type=pa.timestamp("ns")),
        "range_start": pa.array(event_times, type=pa.timestamp("ns")),
        "range_end": pa.array(
            [t + timedelta(milliseconds=500) for t in event_times],
            type=pa.timestamp("ns"),
        ),
        "entity_path": ["/camera/rgb", "/observation/joint_positions", "/observation/gripper_state"],
    },
)

ctx = client.ctx
meta_df = ctx.from_arrow(meta)

Basic URL basic-url

With no extra arguments, segment_url produces a URL that opens the segment in the viewer.

basic = view.segment_table().select("rerun_segment_id").sort("rerun_segment_id")
basic = basic.with_column("url", segment_url(dataset))
for url in basic.select("url").to_pydict()["url"]:
    print(url)

Output:

rerun+http://localhost:51234/dataset/<DATASET_ID>?segment_id=<SEGMENT_ID_1>
rerun+http://localhost:51234/dataset/<DATASET_ID>?segment_id=<SEGMENT_ID_2>
rerun+http://localhost:51234/dataset/<DATASET_ID>?segment_id=<SEGMENT_ID_3>

Specify the time cursor position specify-the-time-cursor-position

Pass timestamp and timeline_name to generate a URL that tells the viewer to activate a specific timeline and set the time cursor to a specific value. If timestamp is a string, it will be interpreted as a column name. Alternatively, any DataFusion expression can be provided, including a literal.

ts = view.segment_table(join_meta=meta_df).select("rerun_segment_id", "event_time")
ts = ts.sort("rerun_segment_id")
ts = ts.with_column("url", segment_url(dataset, timestamp="event_time", timeline_name="real_time"))
for url in ts.select("url").to_pydict()["url"]:
    print(url)

Output:

rerun+http://localhost:51234/dataset/<DATASET_ID>?segment_id=<SEGMENT_ID_1>#when=real_time@2023-11-14T22:13:20Z
rerun+http://localhost:51234/dataset/<DATASET_ID>?segment_id=<SEGMENT_ID_2>#when=real_time@2023-11-14T22:13:21Z
rerun+http://localhost:51234/dataset/<DATASET_ID>?segment_id=<SEGMENT_ID_3>#when=real_time@2023-11-14T22:13:22Z

Selecting a time range selecting-a-time-range

Pass time_range_start and time_range_end together with timeline_name to generate a URL that specifies a time range to be selected. Both can be a column name or a DataFusion expression.

tr = view.segment_table(join_meta=meta_df).select("rerun_segment_id", "range_start", "range_end")
tr = tr.sort("rerun_segment_id")
tr = tr.with_column(
    "url",
    segment_url(
        dataset,
        time_range_start="range_start",
        time_range_end="range_end",
        timeline_name="real_time",
    ),
)
for url in tr.select("url").to_pydict()["url"]:
    print(url)

Output:

rerun+http://localhost:51234/dataset/<DATASET_ID>?segment_id=<SEGMENT_ID_1>#time_selection=real_time@2023-11-14T22:13:20Z..2023-11-14T22:13:20.5Z
rerun+http://localhost:51234/dataset/<DATASET_ID>?segment_id=<SEGMENT_ID_2>#time_selection=real_time@2023-11-14T22:13:21Z..2023-11-14T22:13:21.5Z
rerun+http://localhost:51234/dataset/<DATASET_ID>?segment_id=<SEGMENT_ID_3>#time_selection=real_time@2023-11-14T22:13:22Z..2023-11-14T22:13:22.5Z

Selecting an entity selecting-an-entity

Pass selection to generate a URL that specifies which entity path, instance, and/or component to select. The value must be a string using entity path syntax, optionally followed by an instance index in brackets and/or a component name after a colon. For example: /world/points, /world/points[#42], /world/points:Color, or /world/points[#42]:Color.

sel = view.segment_table(join_meta=meta_df).select("rerun_segment_id", "entity_path")
sel = sel.sort("rerun_segment_id")
sel = sel.with_column("url", segment_url(dataset, selection="entity_path"))
for url in sel.select("url").to_pydict()["url"]:
    print(url)

Output:

rerun+http://localhost:51234/dataset/<DATASET_ID>?segment_id=<SEGMENT_ID_1>#selection=/camera/rgb
rerun+http://localhost:51234/dataset/<DATASET_ID>?segment_id=<SEGMENT_ID_2>#selection=/observation/joint_positions
rerun+http://localhost:51234/dataset/<DATASET_ID>?segment_id=<SEGMENT_ID_3>#selection=/observation/gripper_state

Combining features combining-features

All three features can be used together. The generated URL includes every fragment that was specified.

combined = view.segment_table(join_meta=meta_df).select(
    "rerun_segment_id", "event_time", "range_start", "range_end", "entity_path"
)
combined = combined.sort("rerun_segment_id")
combined = combined.with_column(
    "url",
    segment_url(
        dataset,
        timestamp="event_time",
        timeline_name="real_time",
        time_range_start="range_start",
        time_range_end="range_end",
        selection="entity_path",
    ),
)
for url in combined.select("url").to_pydict()["url"]:
    print(url)

Output:

rerun+http://localhost:51234/dataset/<DATASET_ID>?segment_id=<SEGMENT_ID_1>#selection=/camera/rgb&when=real_time@2023-11-14T22:13:20Z&time_selection=real_time@2023-11-14T22:13:20Z..2023-11-14T22:13:20.5Z
rerun+http://localhost:51234/dataset/<DATASET_ID>?segment_id=<SEGMENT_ID_2>#selection=/observation/joint_positions&when=real_time@2023-11-14T22:13:21Z&time_selection=real_time@2023-11-14T22:13:21Z..2023-11-14T22:13:21.5Z
rerun+http://localhost:51234/dataset/<DATASET_ID>?segment_id=<SEGMENT_ID_3>#selection=/observation/gripper_state&when=real_time@2023-11-14T22:13:22Z&time_selection=real_time@2023-11-14T22:13:22Z..2023-11-14T22:13:22.5Z

Using expressions using-expressions

Every parameter that accepts a column name string also accepts an arbitrary DataFusion expression. This is useful when you want to supply a constant value for all rows using lit() or build more advanced expressions.

expr = view.segment_table(join_meta=meta_df).select("rerun_segment_id", "event_time")
expr = expr.sort("rerun_segment_id")
expr = expr.with_column(
    "url",
    segment_url(
        dataset,
        timestamp="event_time",
        timeline_name="real_time",
        selection=lit("/camera/rgb"),
    ),
)
for url in expr.select("url").to_pydict()["url"]:
    print(url)

Output:

rerun+http://localhost:51234/dataset/<DATASET_ID>?segment_id=<SEGMENT_ID_1>#selection=/camera/rgb&when=real_time@2023-11-14T22:13:20Z
rerun+http://localhost:51234/dataset/<DATASET_ID>?segment_id=<SEGMENT_ID_2>#selection=/camera/rgb&when=real_time@2023-11-14T22:13:21Z
rerun+http://localhost:51234/dataset/<DATASET_ID>?segment_id=<SEGMENT_ID_3>#selection=/camera/rgb&when=real_time@2023-11-14T22:13:22Z