Taxi Trips Study - Step-by-Step¶
This notebook analyzes taxi trip data, mapping pickups and dropoffs to street segments and visualizing counts.
Data Sources¶
- Yellow NYC Taxis 2015: Sample taxi trip data for NYC.
In [1]:
Copied!
import urban_mapper as um
# Initialise UrbanMapper
mapper = um.UrbanMapper()
# Step 1: Create urban layer for street segments
layer = (
mapper.urban_layer
.with_type("streets_roads")
.from_place("Downtown Brooklyn, New York City, USA", network_type="drive")
.build()
)
import urban_mapper as um
# Initialise UrbanMapper
mapper = um.UrbanMapper()
# Step 1: Create urban layer for street segments
layer = (
mapper.urban_layer
.with_type("streets_roads")
.from_place("Downtown Brooklyn, New York City, USA", network_type="drive")
.build()
)
/home/docs/checkouts/readthedocs.org/user_builds/urbanmapper/envs/80/lib/python3.10/site-packages/osmnx/convert.py:542: FutureWarning: <class 'geopandas.array.GeometryArray'>._reduce will require a `keepdims` parameter in the future dupes = edges[mask].dropna(subset=["geometry"])
In [2]:
Copied!
# Step 2: Load taxi trip data
# Note: For the documentation interactive mode, we only query 5000 records from the dataset. Feel free to remove for a more realistic analysis.
data = (
mapper.loader
.from_huggingface("oscur/taxisvis1M", number_of_rows=5000, streaming=True)
.with_columns(longitude_column="pickup_longitude", latitude_column="pickup_latitude")
.load()
)
data['pickup_longitude'] = data['pickup_longitude'].astype(float)
data['pickup_latitude'] = data['pickup_latitude'].astype(float)
data['dropoff_longitude'] = data['dropoff_longitude'].astype(float)
data['dropoff_latitude'] = data['dropoff_latitude'].astype(float)
# Step 2: Load taxi trip data
# Note: For the documentation interactive mode, we only query 5000 records from the dataset. Feel free to remove for a more realistic analysis.
data = (
mapper.loader
.from_huggingface("oscur/taxisvis1M", number_of_rows=5000, streaming=True)
.with_columns(longitude_column="pickup_longitude", latitude_column="pickup_latitude")
.load()
)
data['pickup_longitude'] = data['pickup_longitude'].astype(float)
data['pickup_latitude'] = data['pickup_latitude'].astype(float)
data['dropoff_longitude'] = data['dropoff_longitude'].astype(float)
data['dropoff_latitude'] = data['dropoff_latitude'].astype(float)
In [3]:
Copied!
# Step 3: Impute missing coordinates
imputer_pickup = (
mapper.imputer
.with_type("SimpleGeoImputer")
.on_columns("pickup_longitude", "pickup_latitude")
.build()
)
data = imputer_pickup.transform(data, layer)
imputer_dropoff = (
mapper.imputer
.with_type("SimpleGeoImputer")
.on_columns("dropoff_longitude", "dropoff_latitude")
.build()
)
data = imputer_dropoff.transform(data, layer)
# Step 3: Impute missing coordinates
imputer_pickup = (
mapper.imputer
.with_type("SimpleGeoImputer")
.on_columns("pickup_longitude", "pickup_latitude")
.build()
)
data = imputer_pickup.transform(data, layer)
imputer_dropoff = (
mapper.imputer
.with_type("SimpleGeoImputer")
.on_columns("dropoff_longitude", "dropoff_latitude")
.build()
)
data = imputer_dropoff.transform(data, layer)
In [4]:
Copied!
# Step 4: Filter to bounding box
filter_step = mapper.filter.with_type("BoundingBoxFilter").build()
data = filter_step.transform(data, layer)
# Step 4: Filter to bounding box
filter_step = mapper.filter.with_type("BoundingBoxFilter").build()
data = filter_step.transform(data, layer)
In [5]:
Copied!
# Step 5: Map pickups and dropoffs
import copy
tmp_layer = copy.deepcopy(layer)
_, mapped_pickups = layer.map_nearest_layer(
data,
longitude_column="pickup_longitude",
latitude_column="pickup_latitude",
output_column="pickup_segment"
)
_, mapped_dropoffs = tmp_layer.map_nearest_layer(
data,
longitude_column="dropoff_longitude",
latitude_column="dropoff_latitude",
output_column="dropoff_segment"
)
# Step 5: Map pickups and dropoffs
import copy
tmp_layer = copy.deepcopy(layer)
_, mapped_pickups = layer.map_nearest_layer(
data,
longitude_column="pickup_longitude",
latitude_column="pickup_latitude",
output_column="pickup_segment"
)
_, mapped_dropoffs = tmp_layer.map_nearest_layer(
data,
longitude_column="dropoff_longitude",
latitude_column="dropoff_latitude",
output_column="dropoff_segment"
)
In [6]:
Copied!
# Step 6: Enrich with counts
enricher_pickup = (
mapper.enricher
.with_data(group_by="pickup_segment")
.count_by(output_column="pickup_count")
.build()
)
enriched_layer_pickup = enricher_pickup.enrich(mapped_pickups, layer)
enricher_dropoff = (
mapper.enricher
.with_data(group_by="dropoff_segment")
.count_by(output_column="dropoff_count")
.build()
)
enriched_layer = enricher_dropoff.enrich(mapped_dropoffs, enriched_layer_pickup)
# Step 6: Enrich with counts
enricher_pickup = (
mapper.enricher
.with_data(group_by="pickup_segment")
.count_by(output_column="pickup_count")
.build()
)
enriched_layer_pickup = enricher_pickup.enrich(mapped_pickups, layer)
enricher_dropoff = (
mapper.enricher
.with_data(group_by="dropoff_segment")
.count_by(output_column="dropoff_count")
.build()
)
enriched_layer = enricher_dropoff.enrich(mapped_dropoffs, enriched_layer_pickup)
In [7]:
Copied!
# Step 7: Visualize interactively
visualiser = (
mapper.visual
.with_type("Interactive")
.with_style({"tiles": "CartoDB dark_matter", "colorbar_text_color": "white"})
.build()
)
fig = visualiser.render(enriched_layer.get_layer(), columns=["pickup_count", "dropoff_count"])
fig
# Step 7: Visualize interactively
visualiser = (
mapper.visual
.with_type("Interactive")
.with_style({"tiles": "CartoDB dark_matter", "colorbar_text_color": "white"})
.build()
)
fig = visualiser.render(enriched_layer.get_layer(), columns=["pickup_count", "dropoff_count"])
fig
Out[7]: