Last active
January 29, 2022 13:38
-
-
Save calebrob6/59be4be6ae8ff700db9214b3b4c5ae54 to your computer and use it in GitHub Desktop.
An example notebook for showing how to download patches of NAIP imagery around a point using the Planetary Computer.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "0aba142e", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from pystac_client import Client\n", | |
"import pyproj\n", | |
"import stac_vrt\n", | |
"\n", | |
"import tqdm\n", | |
"import numpy as np\n", | |
"import rasterio\n", | |
"import rasterio.features\n", | |
"import rioxarray\n", | |
"from rasterio.enums import Resampling\n", | |
"import shapely.geometry\n", | |
"\n", | |
"import azure.storage.blob\n", | |
"import io\n", | |
"\n", | |
"import matplotlib.pyplot as plt" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "97c08a46-ce78-4479-9670-af50ecc05ea0", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"connection_string = \"REPLACE\"\n", | |
"container_client = azure.storage.blob.ContainerClient.from_connection_string(\n", | |
" connection_string, container_name=\"datasets\"\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "bec43b49-b9db-4d2b-9f36-c55ca757ce98", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"catalog = Client.open(\"https://planetarycomputer.microsoft.com/api/stac/v1\")\n", | |
"\n", | |
"def get_naip(point, date_range=\"2018-01-01/2020-01-01\", buffer=256):\n", | |
" lon, lat = point\n", | |
" \n", | |
" # 0.01 is degrees and is ~1km -- https://www.usna.edu/Users/oceano/pguth/md_help/html/approx_equivalents.htm\n", | |
" # we could be more precise here but I don't think it matters\n", | |
" geom = shapely.geometry.mapping(shapely.geometry.Point(lon, lat).buffer(0.01))\n", | |
" \n", | |
" search = catalog.search(\n", | |
" collections=[\"naip\"],\n", | |
" intersects=geom,\n", | |
" datetime=date_range,\n", | |
" )\n", | |
"\n", | |
" items = [item.to_dict() for item in search.get_items()]\n", | |
" \n", | |
" # TODO: Filter out items to only keep the most recent imagery per location. With this we can set a large date range and be fine.\n", | |
" \n", | |
" # This happens when we query over a state that doesn't have imagery in the given date range or the point is outside of NAIP bounds\n", | |
" if len(items) == 0:\n", | |
" return None\n", | |
" \n", | |
" # `stac_vrt.build_vrt` fails when we have a point that is at the border of two UTM zones because the CRSs of the items are different\n", | |
" # This is an example exception that is thrown, \"ValueError: STAC item wa_m_4711909_nw_11_060_20190731_20191217 (position 1) does not have the same CRS. 26911 != 26910\"\n", | |
" # QUESTION: would this be handled by stackstac (after multi-channel tiffs are supported)?\n", | |
" # TODO: catch and handle this exception correctly -- one potential solution is to just drop items in one of the UTM zones, another solution is to ignore all together\n", | |
" naip_vrt = stac_vrt.build_vrt(\n", | |
" items, block_width=256, block_height=256, data_type=\"Byte\"\n", | |
" )\n", | |
"\n", | |
" with rioxarray.open_rasterio(naip_vrt) as ds:\n", | |
" \n", | |
" x_utm, y_utm = pyproj.Proj(ds.rio.crs)(lon, lat)\n", | |
" aoi = ds.loc[..., y_utm+buffer:y_utm-buffer, x_utm-buffer:x_utm+buffer]\n", | |
"\n", | |
" aoi = aoi.rio.reproject(\n", | |
" aoi.rio.crs,\n", | |
" resolution=1,\n", | |
" resampling=Resampling.bilinear,\n", | |
" )\n", | |
" \n", | |
" data = aoi.compute()\n", | |
" \n", | |
" return data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "c2570e3f-fa70-4fe5-93c2-e29b3c34008b", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
" 0%| | 0/20 [00:00<?, ?it/s]" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(4, 512, 512)\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
" 5%|▌ | 1/20 [00:03<00:58, 3.06s/it]" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(4, 512, 513)\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
" 10%|█ | 2/20 [00:06<00:55, 3.06s/it]" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(4, 512, 512)\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
" 15%|█▌ | 3/20 [00:08<00:48, 2.84s/it]" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(4, 512, 513)\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
" 20%|██ | 4/20 [00:11<00:47, 2.98s/it]" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(4, 512, 512)\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
" 25%|██▌ | 5/20 [00:14<00:44, 2.99s/it]" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(4, 513, 512)\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
" 30%|███ | 6/20 [00:17<00:40, 2.90s/it]" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(4, 512, 512)\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
" 40%|████ | 8/20 [00:20<00:25, 2.16s/it]" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(4, 512, 512)\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
" 45%|████▌ | 9/20 [00:23<00:23, 2.14s/it]" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(4, 513, 512)\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
" 50%|█████ | 10/20 [00:25<00:22, 2.30s/it]" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(4, 513, 512)\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
" 55%|█████▌ | 11/20 [00:28<00:22, 2.45s/it]" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(4, 513, 513)\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
" 60%|██████ | 12/20 [00:31<00:20, 2.56s/it]" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(4, 513, 513)\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
" 65%|██████▌ | 13/20 [00:34<00:19, 2.80s/it]" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(4, 513, 512)\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
" 75%|███████▌ | 15/20 [00:39<00:12, 2.41s/it]" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(4, 512, 512)\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
" 80%|████████ | 16/20 [00:42<00:10, 2.58s/it]" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(4, 513, 513)\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
" 85%|████████▌ | 17/20 [00:45<00:08, 2.72s/it]" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(4, 512, 512)\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
" 90%|█████████ | 18/20 [00:48<00:05, 2.98s/it]" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(4, 513, 513)\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
" 95%|█████████▌| 19/20 [00:53<00:03, 3.34s/it]" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(4, 513, 513)\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"100%|██████████| 20/20 [00:57<00:00, 2.86s/it]" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 12.7 s, sys: 209 ms, total: 12.9 s\n", | |
"Wall time: 57.1 s\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"# A large box I drew over the western US to sample randomly from within\n", | |
"large_geom = {\"type\":\"Polygon\",\"coordinates\":[[[-120.34423828125,35.746512259918504],[-97.470703125,35.746512259918504],[-97.470703125,48.268569112964336],[-120.34423828125,48.268569112964336],[-120.34423828125,35.746512259918504]]]}\n", | |
"left, bottom, right, top = rasterio.features.bounds(large_geom)\n", | |
"area_width = right - left\n", | |
"area_height = top - bottom\n", | |
"\n", | |
"\n", | |
"date_range = \"2018-01-01/2020-01-01\" # this will not be valid for all states\n", | |
"buffer = 256 # this is in meters\n", | |
"\n", | |
"for i in tqdm.tqdm(range(20)):\n", | |
" \n", | |
" lat = np.random.random() * area_height + bottom\n", | |
" lon = np.random.random() * area_width + left\n", | |
" \n", | |
" ds = get_naip((lon, lat), date_range, buffer=buffer)\n", | |
" \n", | |
" if ds is not None:\n", | |
" print(ds.shape)\n", | |
" # data = np.rollaxis(ds.data, 0, 3)\n", | |
" # plt.figure()\n", | |
" # plt.imshow(data[:,:,:3])\n", | |
" # plt.axis(\"off\")\n", | |
" # plt.show()\n", | |
" \n", | |
" # write the geotiff to blob storage\n", | |
" with io.BytesIO() as f:\n", | |
" ds.rio.to_raster(f, driver=\"GTiff\", windowed=True, compress=\"deflate\", predictor=2)\n", | |
" f.seek(0)\n", | |
" blob_client = container_client.get_blob_client(f\"test/sample_{i}.tif\")\n", | |
" blob_client.upload_blob(f, overwrite=True)\n", | |
" else:\n", | |
" # TODO: record missing indices\n", | |
" pass" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "0f9f3d39-6f96-49f6-851c-e0abe98a45fe", | |
"metadata": {}, | |
"source": [ | |
"## Example of failing due to query on UTM boundaries" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "1e156212-aca0-4bb4-9d00-8b4b0460d4cf", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "ValueError", | |
"evalue": "STAC item wa_m_4711909_nw_11_060_20190731_20191217 (position 1) does not have the same CRS. 26911 != 26910", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", | |
"Input \u001b[0;32mIn [9]\u001b[0m, in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m lon \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m119.99926328659058\u001b[39m\n\u001b[1;32m 2\u001b[0m lat \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m47.83925871057489\u001b[39m\n\u001b[0;32m----> 4\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[43mget_naip\u001b[49m\u001b[43m(\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlon\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlat\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdate_range\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m256\u001b[39;49m\u001b[43m)\u001b[49m\n", | |
"Input \u001b[0;32mIn [5]\u001b[0m, in \u001b[0;36mget_naip\u001b[0;34m(point, date_range, buffer)\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 24\u001b[0m \u001b[38;5;66;03m# This fails when we have a point that is at the border of two UTM zones\u001b[39;00m\n\u001b[1;32m 25\u001b[0m \u001b[38;5;66;03m# E.g. \"ValueError: STAC item wa_m_4711909_nw_11_060_20190731_20191217 (position 1) does not have the same CRS. 26911 != 26910\"\u001b[39;00m\n\u001b[1;32m 26\u001b[0m \u001b[38;5;66;03m# TODO: catch and handle this exception correctly -- one potential solution is to just drop items in one of the UTM zones\u001b[39;00m\n\u001b[0;32m---> 27\u001b[0m naip_vrt \u001b[38;5;241m=\u001b[39m \u001b[43mstac_vrt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbuild_vrt\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 28\u001b[0m \u001b[43m \u001b[49m\u001b[43mitems\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblock_width\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m256\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblock_height\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m256\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mByte\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 29\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 30\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m rasterio\u001b[38;5;241m.\u001b[39mopen(naip_vrt) \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m 31\u001b[0m crs \u001b[38;5;241m=\u001b[39m f\u001b[38;5;241m.\u001b[39mcrs\n", | |
"File \u001b[0;32m/srv/conda/envs/notebook/lib/python3.8/site-packages/stac_vrt.py:221\u001b[0m, in \u001b[0;36mbuild_vrt\u001b[0;34m(stac_items, crs, res_x, res_y, shapes, bboxes, data_type, block_width, block_height, add_prefix)\u001b[0m\n\u001b[1;32m 219\u001b[0m image_crs \u001b[38;5;241m=\u001b[39m stac_item\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mproperties\u001b[39m\u001b[38;5;124m\"\u001b[39m, {})\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mproj:epsg\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 220\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m image_crs \u001b[38;5;129;01mand\u001b[39;00m image_crs \u001b[38;5;241m!=\u001b[39m crs_code:\n\u001b[0;32m--> 221\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 222\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSTAC item \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m (position \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m) does not have the \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 223\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msame CRS. \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m != \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(stac_item[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mid\u001b[39m\u001b[38;5;124m\"\u001b[39m], i, image_crs, crs_code)\n\u001b[1;32m 224\u001b[0m )\n\u001b[1;32m 225\u001b[0m image \u001b[38;5;241m=\u001b[39m stac_item[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124massets\u001b[39m\u001b[38;5;124m\"\u001b[39m][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mimage\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 226\u001b[0m x_off, y_off \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m~\u001b[39mout_transform \u001b[38;5;241m*\u001b[39m (bbox\u001b[38;5;241m.\u001b[39mleft, bbox\u001b[38;5;241m.\u001b[39mtop)\n", | |
"\u001b[0;31mValueError\u001b[0m: STAC item wa_m_4711909_nw_11_060_20190731_20191217 (position 1) does not have the same CRS. 26911 != 26910" | |
] | |
} | |
], | |
"source": [ | |
"lon = -119.99926328659058\n", | |
"lat = 47.83925871057489\n", | |
"\n", | |
"data = get_naip((lon, lat), date_range, buffer=256)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment