Taxi Trips Study - Pipeline¶
This notebook uses UrbanPipeline to analyze taxi trips, counting pickups and dropoffs.
Data Sources¶
- Yellow NYC Taxis 2015: Sample taxi trip data for NYC.
In [1]:
Copied!
import urban_mapper as um
from urban_mapper.pipeline import UrbanPipeline
# Define the pipeline
pipeline = UrbanPipeline([
("urban_layer", (
um.UrbanMapper().urban_layer
.with_type("streets_roads")
.from_place("Downtown Brooklyn, New York City, USA", network_type="drive")
.with_mapping(
longitude_column="pickup_longitude",
latitude_column="pickup_latitude",
output_column="pickup_segment"
)
.with_mapping(
longitude_column="dropoff_longitude",
latitude_column="dropoff_latitude",
output_column="dropoff_segment"
)
.build()
)),
# Note: For the documentation interactive mode, we only query 5000 records from the dataset. Feel free to remove for a more realistic analysis.
("loader", (
um.UrbanMapper().loader
.from_huggingface("oscur/taxisvis1M", number_of_rows=5000, streaming=True)
.with_columns(longitude_column="pickup_longitude", latitude_column="pickup_latitude")
.build()
)),
("impute_pickup", (
um.UrbanMapper().imputer
.with_type("SimpleGeoImputer")
.on_columns("pickup_longitude", "pickup_latitude")
.build()
)),
("impute_dropoff", (
um.UrbanMapper().imputer
.with_type("SimpleGeoImputer")
.on_columns("dropoff_longitude", "dropoff_latitude")
.build()
)),
("filter", um.UrbanMapper().filter.with_type("BoundingBoxFilter").build()),
("enrich_pickups", (
um.UrbanMapper().enricher
.with_data(group_by="pickup_segment")
.count_by(output_column="pickup_count")
.build()
)),
("enrich_dropoffs", (
um.UrbanMapper().enricher
.with_data(group_by="dropoff_segment")
.count_by(output_column="dropoff_count")
.build()
)),
("visualiser", (
um.UrbanMapper().visual
.with_type("Interactive")
.with_style({"tiles": "CartoDB Positron", "colorbar_text_color": "gray"})
.build()
))
])
import urban_mapper as um
from urban_mapper.pipeline import UrbanPipeline
# Define the pipeline
pipeline = UrbanPipeline([
("urban_layer", (
um.UrbanMapper().urban_layer
.with_type("streets_roads")
.from_place("Downtown Brooklyn, New York City, USA", network_type="drive")
.with_mapping(
longitude_column="pickup_longitude",
latitude_column="pickup_latitude",
output_column="pickup_segment"
)
.with_mapping(
longitude_column="dropoff_longitude",
latitude_column="dropoff_latitude",
output_column="dropoff_segment"
)
.build()
)),
# Note: For the documentation interactive mode, we only query 5000 records from the dataset. Feel free to remove for a more realistic analysis.
("loader", (
um.UrbanMapper().loader
.from_huggingface("oscur/taxisvis1M", number_of_rows=5000, streaming=True)
.with_columns(longitude_column="pickup_longitude", latitude_column="pickup_latitude")
.build()
)),
("impute_pickup", (
um.UrbanMapper().imputer
.with_type("SimpleGeoImputer")
.on_columns("pickup_longitude", "pickup_latitude")
.build()
)),
("impute_dropoff", (
um.UrbanMapper().imputer
.with_type("SimpleGeoImputer")
.on_columns("dropoff_longitude", "dropoff_latitude")
.build()
)),
("filter", um.UrbanMapper().filter.with_type("BoundingBoxFilter").build()),
("enrich_pickups", (
um.UrbanMapper().enricher
.with_data(group_by="pickup_segment")
.count_by(output_column="pickup_count")
.build()
)),
("enrich_dropoffs", (
um.UrbanMapper().enricher
.with_data(group_by="dropoff_segment")
.count_by(output_column="dropoff_count")
.build()
)),
("visualiser", (
um.UrbanMapper().visual
.with_type("Interactive")
.with_style({"tiles": "CartoDB Positron", "colorbar_text_color": "gray"})
.build()
))
])
/home/docs/checkouts/readthedocs.org/user_builds/urbanmapper/envs/80/lib/python3.10/site-packages/osmnx/convert.py:542: FutureWarning: <class 'geopandas.array.GeometryArray'>._reduce will require a `keepdims` parameter in the future dupes = edges[mask].dropna(subset=["geometry"])
In [2]:
Copied!
# Execute the pipeline
mapped_data, enriched_layer = pipeline.compose_transform()
# Execute the pipeline
mapped_data, enriched_layer = pipeline.compose_transform()
~> Loading: loader...
|█████▊ |
▁▃▅
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▂▄▆
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▃▅▇
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▄▆█
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▅▇▇
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▆█▆
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▇▇▅
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
█▆▄
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▇▅▃
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▆▄▂
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▅▃▁
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▄▂▂
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▃▁▃
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▂▂▄
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▁▃▅
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▂▄▆
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▃▅▇
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▄▆█
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▅▇▇
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▆█▆
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▇▇▅
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
█▆▄
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▇▅▃
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▆▄▂
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▅▃▁
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▄▂▂
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▃▁▃
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▂▂▄
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▁▃▅
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▂▄▆
1/7 [14%]
i
~> Loading: loader...
|█████▊ |
▃▅▇
1/7 [14%]
i
🗺️ Successfully composed pipeline with 7 steps!
|███████████████████████████████
In [3]:
Copied!
# Visualize results
fig = pipeline.visualise(["pickup_count", "dropoff_count"])
fig
# Visualize results
fig = pipeline.visualise(["pickup_count", "dropoff_count"])
fig
Out[3]:
In [4]:
Copied!
# Save the pipeline
pipeline.save("./taxi_pipeline.dill")
# Save the pipeline
pipeline.save("./taxi_pipeline.dill")