Skip to content

Instantly share code, notes, and snippets.

@psychemedia
Last active August 1, 2024 12:06
Show Gist options
  • Save psychemedia/cd2425975ffc1d87bf8cb3c4df264fca to your computer and use it in GitHub Desktop.
Save psychemedia/cd2425975ffc1d87bf8cb3c4df264fca to your computer and use it in GitHub Desktop.
First attempt at a yaml validator for ou-container-builder v3 yaml config scripts
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "d388bfdc-c605-42e0-8d09-e3a371c3d6b8",
"metadata": {},
"source": [
"## OU Container Builder YAML Configuration File builder\n",
"\n",
"Simple attempt at a validator for `ou-container-builder` YAML configuration file (v3).",
"\n",
"Handy docs: https://www.andrewvillazon.com/validate-yaml-python-schema/",
"\n",
"Repo: https://github.com/keleshev/schema"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "20cde7b6-8f11-43a0-be60-2699274bc49b",
"metadata": {},
"outputs": [],
"source": [
"#%pip install schema"
]
},
{
"cell_type": "code",
"execution_count": 134,
"id": "40efa6bc-8968-4a54-9401-c1ca5bb6f8cf",
"metadata": {},
"outputs": [],
"source": [
"from schema import Schema, SchemaError, Optional, Regex, Or\n",
"import yaml"
]
},
{
"cell_type": "code",
"execution_count": 184,
"id": "53075a86-70b5-4e3f-9725-0199292194ee",
"metadata": {},
"outputs": [],
"source": [
"full_test_yaml=\"\"\"version: 3\n",
"module:\n",
" code: TM129\n",
" presentation: 23B\n",
"image:\n",
" base: as\n",
" user: as\n",
"packs:\n",
" jupyterlab: {}\n",
" notebook: {}\n",
" ipykernel: {}\n",
"sources:\n",
" apt:\n",
" - name: mongodb\n",
" key_url: https://www.mongodb.org/static/pgp/server-7.0.asc\n",
" dearmor: true\n",
" deb:\n",
" url: https://repo.mongodb.org/apt/ubuntu\n",
" distribution: jammy/mongodb-org/7.0\n",
" component: multiverse\n",
"server:\n",
" access_token: as\n",
" default_path: as\n",
"packages:\n",
" apt:\n",
" build:\n",
" - gdal-bin\n",
" - libgdal-dev\n",
" deploy:\n",
" - mongodb-org\n",
" - postgresql\n",
" - openjdk-17-jre\n",
" - openjdk-17-jre-headless\n",
" - gdal-bin\n",
" - libgdal-dev\n",
" pip:\n",
" system:\n",
" - ou-tm351-jl-extensions>=0.2.8\n",
" user:\n",
" - pymongo\n",
" - jupysql\n",
" - psycopg2-binary\n",
" - pgspecial\n",
" - SQLAlchemy\n",
" - schemadisplay-magic>=0.0.7\n",
" - geopandas\n",
" - fiona\n",
" - Shapely\n",
" - geopy\n",
" - folium\n",
" - descartes\n",
" - pandas\n",
" - scipy\n",
" - seaborn\n",
" - xlrd\n",
" - openpyxl\n",
"content:\n",
" - source: as\n",
" target: as\n",
" overwrite: always\n",
" - source: ./db_setup/mongodb-org/mongod\n",
" target: /etc/init.d/mongod\n",
" overwrite: always\n",
" - source: ./db_setup/mongodb-org/mongod.conf\n",
" target: /etc/mongod.conf\n",
" overwrite: always\n",
" - source: asa\n",
" target: as\n",
" overwrite: always\n",
" - source: ./icons/openrefine.svg\n",
" target: /var/ou/icons/openrefine.svg\n",
" overwrite: always\n",
"environment:\n",
" - name: MONGO_DB_PATH\n",
" value: /var/db/data/mongo\n",
" - name: PG_VERSION\n",
" value: '15'\n",
" - name: PGDATA\n",
" value: /var/lib/postgresql/$PG_VERSION/main\n",
" - name: POSTGRES_USER\n",
" value: postgres\n",
" - name: POSTGRES_PASSWORD\n",
" value: postgres\n",
" - name: POSTGRES_DB\n",
" value: oudb\n",
" - name: PLOOMBER_STATS_ENABLED\n",
" value: 'false'\n",
" - name: PLOOMBER_VERSION_CHECK_DISABLED\n",
" value: 'false'\n",
" - name: OPENREFINE_VERSION\n",
" value: 3.8.0\n",
" - name: OPENREFINE_PATH\n",
" value: /var/openrefine\n",
"scripts:\n",
" - stage: deploy\n",
" commands: >-\n",
" chmod ugo+rx /etc/init.d/mongod,cp -p /etc/mongod.conf\n",
" /etc/ouseful/mongod.conf,chmod u-w /etc/ouseful/mongod.conf\n",
" - stage: deploy\n",
" commands: >-\n",
" sed -e \"s/[#]?listen_addresses = .*/listen_addresses = '*'/g\" -i\n",
" \"/etc/postgresql/$PG_VERSION/main/postgresql.conf\",usermod -aG users\n",
" postgres\n",
" - stage: build\n",
" commands: >-\n",
" wget -q -O openrefine-${OPENREFINE_VERSION}.tar.gz\n",
" https://github.com/OpenRefine/OpenRefine/releases/download/${OPENREFINE_VERSION}/openrefine-linux-${OPENREFINE_VERSION}.tar.gz,tar\n",
" xzf openrefine-${OPENREFINE_VERSION}.tar.gz,mv\n",
" openrefine-${OPENREFINE_VERSION} $OPENREFINE_PATH\n",
"output_blocks:\n",
" deploy:\n",
" - block: COPY --from=base /var/openrefine /var/openrefine\n",
" weight: 2333\n",
"web_apps:\n",
" - path: openrefine\n",
" options:\n",
" command:\n",
" - /var/openrefine/refine\n",
" - '-i'\n",
" - 127.0.0.1\n",
" - '-p'\n",
" - '{port}'\n",
" - '-d'\n",
" - /home/ou/TM351-24J/openrefine\n",
" - '-H'\n",
" - '*'\n",
" - '-x'\n",
" - refine.display.new.version.notice=false\n",
" timeout: '120'\n",
" launcher:\n",
" enabled: false\n",
"services:\n",
" - mongod\n",
" - postgresql\n",
" \"\"\""
]
},
{
"cell_type": "code",
"execution_count": 185,
"id": "ec87b344-c3ee-463f-a242-f4dd8c53b122",
"metadata": {},
"outputs": [],
"source": [
"# Need to schemas - one for the full schema, another for partial schema \n",
"# In the partial schema validation, all elements optional?\n",
"oucb_schema_ = {\"version\": int,\n",
" \"module\": {\"code\":Regex(r'[A-M]{1,5}\\d{3}$'),\n",
" \"presentation\": Regex(r'^\\d{2}[A-M]$')},\n",
" \"image\":{\n",
" \"base\": str,\n",
" \"user\": str\n",
" },\n",
" \"packs\": {Optional(\"jupyterlab\"): dict,\n",
" Optional(\"notebook\"): dict,\n",
" Optional(\"ipykernel\"): dict,\n",
" Optional(\"irkernel\"): dict,\n",
" Optional(\"code_server\"): dict,\n",
" Optional(\"xfce4\"): dict,\n",
" },\n",
" Optional(\"sources\"): {Optional(\"apt\"): [{\"name\":str, \"key_url\": str, \"dearmor\": bool,\n",
" \"deb\": {\"url\": str, \"distribution\": str, \"component\": str}}]},\n",
" Optional(\"server\"): {\"access_token\": str, \"default_path\": str},\n",
" Optional(\"packages\"): { Optional(\"apt\"):\n",
" {\n",
" Optional(\"build\"): list,\n",
" Optional(\"deploy\"): list\n",
" },\n",
" Optional(\"pip\"): {Optional(\"system\"): list, Optional(\"user\"): list}\n",
" },\n",
" Optional(\"content\"): [{\"source\": str, \"target\":str, \"overwrite\":Or(\"always\", \"never\")}],\n",
" Optional(\"environment\"): [{\"name\":str, \"value\":str}],\n",
" Optional(\"scripts\"): [{\"stage\": Or(\"build\", \"deploy\"), \"commands\": str}],\n",
" Optional(\"output_blocks\"): {\n",
" Optional(\"build\"): [{\"block\":str, \"weight\":int}],\n",
" Optional(\"deploy\"): [{\"block\":str, \"weight\":int}]\n",
" },\n",
" Optional(\"web_apps\"): [{\n",
" \"path\": str, \"options\":{\"command\": list, \"timeout\": Or(str, int)},\n",
" Optional(\"launcher\"): {Optional(\"title\"): str,\n",
" Optional(\"icon_path\"): str,\n",
" Optional(\"enabled\"): bool},\n",
" }],\n",
" Optional(\"services\"): list\n",
" }\n",
"\n",
"oucb_schema = Schema(oucb_schema_)"
]
},
{
"cell_type": "code",
"execution_count": 186,
"id": "5cf4c8f1-1522-410d-bc92-b20ff48cd88d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Configuration is valid.\n"
]
}
],
"source": [
"configuration = yaml.safe_load(full_test_yaml)\n",
"\n",
"try:\n",
" oucb_schema.validate(configuration)\n",
" print(\"Configuration is valid.\")\n",
"except SchemaError as se:\n",
" #raise se\n",
" print(se)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "420f4dd5-272c-4d53-81c0-b847ad31e42b",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "5ed6a253-6000-486e-ac69-50fe16cd0dd1",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment