Skip to content

Instantly share code, notes, and snippets.

@calebrob6
Last active January 29, 2022 13:38
Show Gist options
  • Save calebrob6/59be4be6ae8ff700db9214b3b4c5ae54 to your computer and use it in GitHub Desktop.
Save calebrob6/59be4be6ae8ff700db9214b3b4c5ae54 to your computer and use it in GitHub Desktop.
An example notebook for showing how to download patches of NAIP imagery around a point using the Planetary Computer.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 6,
"id": "0aba142e",
"metadata": {},
"outputs": [],
"source": [
"from pystac_client import Client\n",
"import pyproj\n",
"import stac_vrt\n",
"\n",
"import tqdm\n",
"import numpy as np\n",
"import rasterio\n",
"import rasterio.features\n",
"import rioxarray\n",
"from rasterio.enums import Resampling\n",
"import shapely.geometry\n",
"\n",
"import azure.storage.blob\n",
"import io\n",
"\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "97c08a46-ce78-4479-9670-af50ecc05ea0",
"metadata": {},
"outputs": [],
"source": [
"connection_string = \"REPLACE\"\n",
"container_client = azure.storage.blob.ContainerClient.from_connection_string(\n",
" connection_string, container_name=\"datasets\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "bec43b49-b9db-4d2b-9f36-c55ca757ce98",
"metadata": {},
"outputs": [],
"source": [
"catalog = Client.open(\"https://planetarycomputer.microsoft.com/api/stac/v1\")\n",
"\n",
"def get_naip(point, date_range=\"2018-01-01/2020-01-01\", buffer=256):\n",
" lon, lat = point\n",
" \n",
" # 0.01 is degrees and is ~1km -- https://www.usna.edu/Users/oceano/pguth/md_help/html/approx_equivalents.htm\n",
" # we could be more precise here but I don't think it matters\n",
" geom = shapely.geometry.mapping(shapely.geometry.Point(lon, lat).buffer(0.01))\n",
" \n",
" search = catalog.search(\n",
" collections=[\"naip\"],\n",
" intersects=geom,\n",
" datetime=date_range,\n",
" )\n",
"\n",
" items = [item.to_dict() for item in search.get_items()]\n",
" \n",
" # TODO: Filter out items to only keep the most recent imagery per location. With this we can set a large date range and be fine.\n",
" \n",
" # This happens when we query over a state that doesn't have imagery in the given date range or the point is outside of NAIP bounds\n",
" if len(items) == 0:\n",
" return None\n",
" \n",
" # `stac_vrt.build_vrt` fails when we have a point that is at the border of two UTM zones because the CRSs of the items are different\n",
" # This is an example exception that is thrown, \"ValueError: STAC item wa_m_4711909_nw_11_060_20190731_20191217 (position 1) does not have the same CRS. 26911 != 26910\"\n",
" # QUESTION: would this be handled by stackstac (after multi-channel tiffs are supported)?\n",
" # TODO: catch and handle this exception correctly -- one potential solution is to just drop items in one of the UTM zones, another solution is to ignore all together\n",
" naip_vrt = stac_vrt.build_vrt(\n",
" items, block_width=256, block_height=256, data_type=\"Byte\"\n",
" )\n",
"\n",
" with rioxarray.open_rasterio(naip_vrt) as ds:\n",
" \n",
" x_utm, y_utm = pyproj.Proj(ds.rio.crs)(lon, lat)\n",
" aoi = ds.loc[..., y_utm+buffer:y_utm-buffer, x_utm-buffer:x_utm+buffer]\n",
"\n",
" aoi = aoi.rio.reproject(\n",
" aoi.rio.crs,\n",
" resolution=1,\n",
" resampling=Resampling.bilinear,\n",
" )\n",
" \n",
" data = aoi.compute()\n",
" \n",
" return data"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "c2570e3f-fa70-4fe5-93c2-e29b3c34008b",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/20 [00:00<?, ?it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4, 512, 512)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 5%|▌ | 1/20 [00:03<00:58, 3.06s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4, 512, 513)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 10%|█ | 2/20 [00:06<00:55, 3.06s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4, 512, 512)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 15%|█▌ | 3/20 [00:08<00:48, 2.84s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4, 512, 513)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 20%|██ | 4/20 [00:11<00:47, 2.98s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4, 512, 512)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 25%|██▌ | 5/20 [00:14<00:44, 2.99s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4, 513, 512)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 30%|███ | 6/20 [00:17<00:40, 2.90s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4, 512, 512)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 40%|████ | 8/20 [00:20<00:25, 2.16s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4, 512, 512)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 45%|████▌ | 9/20 [00:23<00:23, 2.14s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4, 513, 512)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 50%|█████ | 10/20 [00:25<00:22, 2.30s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4, 513, 512)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 55%|█████▌ | 11/20 [00:28<00:22, 2.45s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4, 513, 513)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 60%|██████ | 12/20 [00:31<00:20, 2.56s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4, 513, 513)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 65%|██████▌ | 13/20 [00:34<00:19, 2.80s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4, 513, 512)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 75%|███████▌ | 15/20 [00:39<00:12, 2.41s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4, 512, 512)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 80%|████████ | 16/20 [00:42<00:10, 2.58s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4, 513, 513)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 85%|████████▌ | 17/20 [00:45<00:08, 2.72s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4, 512, 512)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 90%|█████████ | 18/20 [00:48<00:05, 2.98s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4, 513, 513)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 95%|█████████▌| 19/20 [00:53<00:03, 3.34s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4, 513, 513)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 20/20 [00:57<00:00, 2.86s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 12.7 s, sys: 209 ms, total: 12.9 s\n",
"Wall time: 57.1 s\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"%%time\n",
"# A large box I drew over the western US to sample randomly from within\n",
"large_geom = {\"type\":\"Polygon\",\"coordinates\":[[[-120.34423828125,35.746512259918504],[-97.470703125,35.746512259918504],[-97.470703125,48.268569112964336],[-120.34423828125,48.268569112964336],[-120.34423828125,35.746512259918504]]]}\n",
"left, bottom, right, top = rasterio.features.bounds(large_geom)\n",
"area_width = right - left\n",
"area_height = top - bottom\n",
"\n",
"\n",
"date_range = \"2018-01-01/2020-01-01\" # this will not be valid for all states\n",
"buffer = 256 # this is in meters\n",
"\n",
"for i in tqdm.tqdm(range(20)):\n",
" \n",
" lat = np.random.random() * area_height + bottom\n",
" lon = np.random.random() * area_width + left\n",
" \n",
" ds = get_naip((lon, lat), date_range, buffer=buffer)\n",
" \n",
" if ds is not None:\n",
" print(ds.shape)\n",
" # data = np.rollaxis(ds.data, 0, 3)\n",
" # plt.figure()\n",
" # plt.imshow(data[:,:,:3])\n",
" # plt.axis(\"off\")\n",
" # plt.show()\n",
" \n",
" # write the geotiff to blob storage\n",
" with io.BytesIO() as f:\n",
" ds.rio.to_raster(f, driver=\"GTiff\", windowed=True, compress=\"deflate\", predictor=2)\n",
" f.seek(0)\n",
" blob_client = container_client.get_blob_client(f\"test/sample_{i}.tif\")\n",
" blob_client.upload_blob(f, overwrite=True)\n",
" else:\n",
" # TODO: record missing indices\n",
" pass"
]
},
{
"cell_type": "markdown",
"id": "0f9f3d39-6f96-49f6-851c-e0abe98a45fe",
"metadata": {},
"source": [
"## Example of failing due to query on UTM boundaries"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "1e156212-aca0-4bb4-9d00-8b4b0460d4cf",
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "STAC item wa_m_4711909_nw_11_060_20190731_20191217 (position 1) does not have the same CRS. 26911 != 26910",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"Input \u001b[0;32mIn [9]\u001b[0m, in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m lon \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m119.99926328659058\u001b[39m\n\u001b[1;32m 2\u001b[0m lat \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m47.83925871057489\u001b[39m\n\u001b[0;32m----> 4\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[43mget_naip\u001b[49m\u001b[43m(\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlon\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlat\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdate_range\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m256\u001b[39;49m\u001b[43m)\u001b[49m\n",
"Input \u001b[0;32mIn [5]\u001b[0m, in \u001b[0;36mget_naip\u001b[0;34m(point, date_range, buffer)\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 24\u001b[0m \u001b[38;5;66;03m# This fails when we have a point that is at the border of two UTM zones\u001b[39;00m\n\u001b[1;32m 25\u001b[0m \u001b[38;5;66;03m# E.g. \"ValueError: STAC item wa_m_4711909_nw_11_060_20190731_20191217 (position 1) does not have the same CRS. 26911 != 26910\"\u001b[39;00m\n\u001b[1;32m 26\u001b[0m \u001b[38;5;66;03m# TODO: catch and handle this exception correctly -- one potential solution is to just drop items in one of the UTM zones\u001b[39;00m\n\u001b[0;32m---> 27\u001b[0m naip_vrt \u001b[38;5;241m=\u001b[39m \u001b[43mstac_vrt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbuild_vrt\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 28\u001b[0m \u001b[43m \u001b[49m\u001b[43mitems\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblock_width\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m256\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblock_height\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m256\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mByte\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 29\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 30\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m rasterio\u001b[38;5;241m.\u001b[39mopen(naip_vrt) \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m 31\u001b[0m crs \u001b[38;5;241m=\u001b[39m f\u001b[38;5;241m.\u001b[39mcrs\n",
"File \u001b[0;32m/srv/conda/envs/notebook/lib/python3.8/site-packages/stac_vrt.py:221\u001b[0m, in \u001b[0;36mbuild_vrt\u001b[0;34m(stac_items, crs, res_x, res_y, shapes, bboxes, data_type, block_width, block_height, add_prefix)\u001b[0m\n\u001b[1;32m 219\u001b[0m image_crs \u001b[38;5;241m=\u001b[39m stac_item\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mproperties\u001b[39m\u001b[38;5;124m\"\u001b[39m, {})\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mproj:epsg\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 220\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m image_crs \u001b[38;5;129;01mand\u001b[39;00m image_crs \u001b[38;5;241m!=\u001b[39m crs_code:\n\u001b[0;32m--> 221\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 222\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSTAC item \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m (position \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m) does not have the \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 223\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msame CRS. \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m != \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(stac_item[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mid\u001b[39m\u001b[38;5;124m\"\u001b[39m], i, image_crs, crs_code)\n\u001b[1;32m 224\u001b[0m )\n\u001b[1;32m 225\u001b[0m image \u001b[38;5;241m=\u001b[39m stac_item[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124massets\u001b[39m\u001b[38;5;124m\"\u001b[39m][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mimage\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 226\u001b[0m x_off, y_off \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m~\u001b[39mout_transform \u001b[38;5;241m*\u001b[39m (bbox\u001b[38;5;241m.\u001b[39mleft, bbox\u001b[38;5;241m.\u001b[39mtop)\n",
"\u001b[0;31mValueError\u001b[0m: STAC item wa_m_4711909_nw_11_060_20190731_20191217 (position 1) does not have the same CRS. 26911 != 26910"
]
}
],
"source": [
"lon = -119.99926328659058\n",
"lat = 47.83925871057489\n",
"\n",
"data = get_naip((lon, lat), date_range, buffer=256)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment