Skip to content

Instantly share code, notes, and snippets.

@mrbungie
Created June 13, 2025 03:36
Show Gist options
  • Save mrbungie/ab8e2a2104e56c7d6dc8872d01d44f9a to your computer and use it in GitHub Desktop.
Save mrbungie/ab8e2a2104e56c7d6dc8872d01d44f9a to your computer and use it in GitHub Desktop.
Dockerfile with EntraID and some other requirements
FROM apache/airflow:3.0.0-python3.12
# Environment variables
ENV OPENTOFU_VERSION=1.9.1 \
AWS_CLI_VERSION=2.27.28 \
AZURE_CLI_VERSION=2.51.0 \
DOTNET_VERSION=9.0
# Build args for multi-arch (used with buildx)
ARG TARGETARCH
# Out of airflow user
USER root
# Remove as soon as possible, for some reason the apt-get update fails without this
RUN echo 'Acquire::AllowInsecureRepositories "true";' | sudo tee /etc/apt/apt.conf.d/99allow-insecure
# Install Azure CLI via official APT repo (auto-detects architecture)
RUN apt-get update && apt-get install -y unzip && \
curl -sL https://aka.ms/InstallAzureCLIDeb | bash && \
apt-get install -y dotnet-sdk-${DOTNET_VERSION} && \
rm -rf /var/lib/apt/lists/*
# Determine arch mapping and install OpenTofu + AWS CLI
RUN if [ -z "${TARGETARCH:-}" ]; then \
case "$(uname -m)" in \
x86_64) TARGETARCH="amd64" ;; \
aarch64|arm64) TARGETARCH="arm64" ;; \
*) echo "Unsupported architecture: $(uname -m)" && exit 1 ;; \
esac; \
fi && \
echo "Using architecture: $TARGETARCH" && \
case "$TARGETARCH" in \
amd64) TOFU_ARCH="amd64"; AWS_ARCH="x86_64" ;; \
arm64) TOFU_ARCH="arm64"; AWS_ARCH="aarch64" ;; \
*) echo "Unsupported architecture: $TARGETARCH" && exit 1 ;; \
esac && \
curl -sLo awscliv2.zip "https://awscli.amazonaws.com/awscli-exe-linux-${AWS_ARCH}-${AWS_CLI_VERSION}.zip" && \
unzip awscliv2.zip && ./aws/install && rm -rf awscliv2.zip aws/
USER airflow
# Hate doing this, but it's the only way to get the dotnet tools to work
ENV DOTNET_TOOLS_INSTALL_DIR=/home/airflow/.local/bin
RUN dotnet tool install azure-cost-cli --tool-path ${DOTNET_TOOLS_INSTALL_DIR}
# Install rest of dependencies (i.e. extra providers)
COPY requirements.txt /opt/airflow/requirements.txt
RUN pip install -r /opt/airflow/requirements.txt
COPY webserver_config.py /opt/airflow/webserver_config.py
# Airflow 3.0.0 dependencies
a2wsgi==1.10.8
adal==1.2.7
adlfs==2024.12.0
aiobotocore==2.21.1
aiofiles==24.1.0
aiohappyeyeballs==2.6.1
aiohttp==3.11.18
aioitertools==0.12.0
aiologic==0.14.0
aiomysql==0.2.0
aiosignal==1.3.2
aiosqlite==0.21.0
alembic==1.15.2
amqp==5.3.1
annotated-types==0.7.0
anyio==4.9.0
apache-airflow==3.0.0
apache-airflow-core==3.0.0
apache-airflow-providers-amazon==9.6.1
apache-airflow-providers-celery==3.10.5
apache-airflow-providers-cncf-kubernetes==10.4.3
apache-airflow-providers-common-compat==1.6.0
apache-airflow-providers-common-io==1.5.4
apache-airflow-providers-common-messaging==1.0.0
apache-airflow-providers-common-sql==1.26.0
apache-airflow-providers-docker==4.3.1
apache-airflow-providers-elasticsearch==6.2.2
apache-airflow-providers-fab==2.0.1
apache-airflow-providers-ftp==3.12.3
apache-airflow-providers-git==0.0.2
apache-airflow-providers-google==15.1.0
apache-airflow-providers-grpc==3.7.3
apache-airflow-providers-hashicorp==4.1.1
apache-airflow-providers-http==5.2.2
apache-airflow-providers-microsoft-azure==12.3.1
apache-airflow-providers-mysql==6.2.2
apache-airflow-providers-odbc==4.9.2
apache-airflow-providers-openlineage==2.2.0
apache-airflow-providers-postgres==6.1.3
apache-airflow-providers-redis==4.0.2
apache-airflow-providers-sendgrid==4.0.1
apache-airflow-providers-sftp==5.2.1
apache-airflow-providers-slack==9.0.5
apache-airflow-providers-smtp==2.0.3
apache-airflow-providers-snowflake==6.2.2
apache-airflow-providers-ssh==4.0.1
apache-airflow-providers-standard==1.0.0
apache-airflow-task-sdk==1.0.0
apispec==6.8.1
argcomplete==3.6.2
asgiref==3.8.1
asn1crypto==1.5.1
asyncpg==0.30.0
asyncssh==2.20.0
attrs==25.3.0
Authlib==1.3.1
azure-batch==14.2.0
azure-common==1.1.28
azure-core==1.33.0
azure-cosmos==4.9.0
azure-datalake-store==0.0.53
azure-identity==1.21.0
azure-keyvault-secrets==4.9.0
azure-kusto-data==5.0.2
azure-mgmt-containerinstance==10.1.0
azure-mgmt-containerregistry==13.0.0
azure-mgmt-core==1.5.0
azure-mgmt-cosmosdb==9.7.0
azure-mgmt-datafactory==9.2.0
azure-mgmt-datalake-nspkg==3.0.1
azure-mgmt-datalake-store==0.5.0
azure-mgmt-nspkg==3.0.2
azure-mgmt-resource==23.3.0
azure-mgmt-storage==22.2.0
azure-nspkg==3.0.2
azure-servicebus==7.14.2
azure-storage-blob==12.25.1
azure-storage-file-datalake==12.20.0
azure-storage-file-share==12.21.0
azure-synapse-artifacts==0.20.0
azure-synapse-spark==0.7.0
babel==2.17.0
backoff==2.2.1
bcrypt==4.3.0
beautifulsoup4==4.13.4
billiard==4.2.1
blinker==1.9.0
boto3==1.37.1
botocore==1.37.1
cachelib==0.13.0
cachetools==5.5.2
cadwyn==5.3.3
cattrs==24.1.3
celery==5.5.1
certifi==2025.1.31
cffi==1.17.1
chardet==5.2.0
charset-normalizer==3.4.1
click==8.1.8
click-didyoumean==0.3.1
click-plugins==1.1.1
click-repl==0.3.0
clickclick==20.10.2
colorama==0.4.6
colorlog==6.9.0
connexion==2.14.2
cron-descriptor==1.4.5
croniter==6.0.0
cryptography==42.0.8
db-dtypes==1.4.2
decorator==5.2.1
Deprecated==1.2.18
dill==0.3.1.1
dnspython==2.7.0
docker==7.1.0
docstring_parser==0.16
durationpy==0.9
elastic-transport==8.17.1
elasticsearch==8.18.0
email_validator==2.2.0
eventlet==0.39.1
fastapi==0.115.12
fastapi-cli==0.0.7
filelock==3.18.0
Flask==2.2.5
Flask-AppBuilder==4.5.3
Flask-Babel==2.0.0
Flask-JWT-Extended==4.7.1
Flask-Limiter==3.12
Flask-Login==0.6.3
Flask-Session==0.5.0
Flask-SQLAlchemy==2.5.1
Flask-WTF==1.2.2
flower==2.0.1
frozenlist==1.6.0
fsspec==2025.3.2
gcloud-aio-auth==5.4.1
gcloud-aio-bigquery==7.1.0
gcloud-aio-storage==9.4.0
gcsfs==2025.3.2
gevent==25.4.1
gitdb==4.0.12
GitPython==3.1.44
google-ads==26.1.0
google-analytics-admin==0.24.0
google-api-core==2.24.2
google-api-python-client==2.167.0
google-auth==2.39.0
google-auth-httplib2==0.2.0
google-auth-oauthlib==1.2.1
google-cloud-aiplatform==1.89.0
google-cloud-alloydb==0.4.5
google-cloud-appengine-logging==1.6.1
google-cloud-audit-log==0.3.2
google-cloud-automl==2.16.3
google-cloud-batch==0.17.35
google-cloud-bigquery==3.31.0
google-cloud-bigquery-datatransfer==3.19.1
google-cloud-bigtable==2.30.1
google-cloud-build==3.31.1
google-cloud-compute==1.30.0
google-cloud-container==2.56.1
google-cloud-core==2.4.3
google-cloud-datacatalog==3.26.1
google-cloud-dataflow-client==0.8.17
google-cloud-dataform==0.6.1
google-cloud-dataplex==2.10.1
google-cloud-dataproc==5.18.1
google-cloud-dataproc-metastore==1.18.2
google-cloud-dlp==3.29.0
google-cloud-kms==3.4.1
google-cloud-language==2.17.1
google-cloud-logging==3.12.0
google-cloud-managedkafka==0.1.9
google-cloud-memcache==1.12.1
google-cloud-monitoring==2.27.1
google-cloud-orchestration-airflow==1.17.5
google-cloud-os-login==2.17.1
google-cloud-pubsub==2.29.0
google-cloud-redis==2.18.1
google-cloud-resource-manager==1.14.2
google-cloud-run==0.10.17
google-cloud-secret-manager==2.23.3
google-cloud-spanner==3.53.0
google-cloud-speech==2.32.0
google-cloud-storage==2.19.0
google-cloud-storage-transfer==1.16.1
google-cloud-tasks==2.19.2
google-cloud-texttospeech==2.26.0
google-cloud-translate==3.20.2
google-cloud-videointelligence==2.16.1
google-cloud-vision==3.10.1
google-cloud-workflows==1.18.1
google-crc32c==1.7.1
google-resumable-media==2.7.2
googleapis-common-protos==1.70.0
graphviz==0.20.3
greenlet==3.2.0
grpc-google-iam-v1==0.14.2
grpc-interceptor==0.15.4
grpcio==1.71.0
grpcio-gcp==0.2.2
grpcio-status==1.62.3
gunicorn==23.0.0
h11==0.14.0
h2==4.2.0
hpack==4.1.0
httpcore==1.0.8
httplib2==0.22.0
httptools==0.6.4
httpx==0.27.0
humanize==4.12.2
hvac==2.3.0
hyperframe==6.1.0
idna==3.10
ijson==3.3.0
immutabledict==4.2.1
importlib_metadata==8.4.0
inflection==0.5.1
isodate==0.7.2
itsdangerous==2.2.0
Jinja2==3.1.6
jmespath==0.10.0
joblib==1.4.2
jsonpath-ng==1.7.0
jsonschema==4.23.0
jsonschema-specifications==2024.10.1
kombu==5.5.3
kubernetes==31.0.0
kubernetes_asyncio==30.3.1
lazy-object-proxy==1.11.0
libcst==1.7.0
limits==5.0.0
linkify-it-py==2.0.3
lockfile==0.12.2
looker-sdk==25.4.0
lxml==5.3.2
Mako==1.3.10
markdown-it-py==3.0.0
MarkupSafe==3.0.2
marshmallow==3.26.1
marshmallow-sqlalchemy==0.28.2
mdurl==0.1.2
methodtools==0.4.7
microsoft-kiota-abstractions==1.9.3
microsoft-kiota-authentication-azure==1.9.3
microsoft-kiota-http==1.9.3
microsoft-kiota-serialization-json==1.9.3
microsoft-kiota-serialization-text==1.9.3
more-itertools==10.6.0
msal==1.32.0
msal-extensions==1.3.1
msgraph-core==1.3.3
msgspec==0.19.0
msrest==0.7.1
msrestazure==0.6.4.post1
multidict==6.4.3
mysql-connector-python==9.3.0
mysqlclient==2.2.7
numpy==1.26.4
oauthlib==3.2.2
openlineage-integration-common==1.31.0
openlineage-python==1.31.0
openlineage_sql==1.31.0
opentelemetry-api==1.27.0
opentelemetry-exporter-otlp==1.27.0
opentelemetry-exporter-otlp-proto-common==1.27.0
opentelemetry-exporter-otlp-proto-grpc==1.27.0
opentelemetry-exporter-otlp-proto-http==1.27.0
opentelemetry-proto==1.27.0
opentelemetry-sdk==1.27.0
opentelemetry-semantic-conventions==0.48b0
ordered-set==4.1.0
packaging==24.2
pandas==2.1.4
pandas-gbq==0.28.0
paramiko==3.5.1
pathspec==0.12.1
pendulum==3.1.0
platformdirs==4.3.7
pluggy==1.5.0
ply==3.11
prison==0.2.1
prometheus_client==0.21.1
prompt_toolkit==3.0.51
propcache==0.3.1
proto-plus==1.26.1
protobuf==4.25.6
psutil==7.0.0
psycopg2-binary==2.9.10
pyarrow==16.1.0
pyasn1==0.6.1
pyasn1_modules==0.4.1
PyAthena==3.12.2
pycparser==2.22
pydantic==2.11.3
pydantic_core==2.33.1
pydata-google-auth==1.9.1
Pygments==2.19.1
PyJWT==2.10.1
PyMySQL==1.1.1
PyNaCl==1.5.0
pyodbc==5.2.0
pyOpenSSL==25.0.0
pyparsing==3.2.3
python-daemon==3.1.2
python-dateutil==2.9.0.post0
python-dotenv==1.1.0
python-http-client==3.3.7
python-ldap==3.4.4
python-multipart==0.0.20
python-slugify==8.0.4
python3-saml==1.16.0
pytz==2025.2
PyYAML==6.0.2
redis==5.2.1
redshift-connector==2.1.5
referencing==0.36.2
requests==2.32.3
requests-oauthlib==2.0.0
requests-toolbelt==1.0.0
retryhttp==1.3.2
rich==13.9.4
rich-argparse==1.7.0
rich-toolkit==0.14.1
rpds-py==0.24.0
rsa==4.9.1
ruamel.yaml==0.18.10
ruamel.yaml.clib==0.2.12
s3transfer==0.11.3
sagemaker_studio==1.0.13
scikit-learn==1.6.1
scipy==1.15.2
scramp==1.4.5
sendgrid==6.11.0
setproctitle==1.3.5
setuptools==78.1.0
shapely==2.1.0
shellingham==1.5.4
six==1.17.0
slack_sdk==3.35.0
smmap==5.0.2
sniffio==1.3.1
snowflake-connector-python==3.14.1
snowflake-sqlalchemy==1.7.3
sortedcontainers==2.4.0
soupsieve==2.7
SQLAlchemy==1.4.54
sqlalchemy-bigquery==1.13.0
SQLAlchemy-JSONField==1.0.2
sqlalchemy-spanner==1.10.0
SQLAlchemy-Utils==0.41.2
sqlparse==0.5.3
sshtunnel==0.4.0
starkbank-ecdsa==2.2.0
starlette==0.46.2
statsd==4.0.1
std-uritemplate==2.0.3
structlog==25.2.0
svcs==25.1.0
tabulate==0.9.0
tenacity==9.1.2
termcolor==3.0.1
text-unidecode==1.3
threadpoolctl==3.6.0
tomlkit==0.13.2
tornado==6.4.2
tqdm==4.67.1
typer==0.15.2
types-protobuf==5.29.1.20250403
types-requests==2.32.0.20250328
typing-inspection==0.4.0
typing_extensions==4.13.2
tzdata==2025.2
uc-micro-py==1.0.3
universal_pathlib==0.2.6
uritemplate==4.1.1
urllib3==2.4.0
uuid6==2024.7.10
uv==0.6.13
uvicorn==0.34.2
uvloop==0.21.0
vine==5.1.0
watchfiles==1.0.5
watchtower==3.4.0
wcwidth==0.2.13
websocket-client==1.8.0
websockets==15.0.1
Werkzeug==2.2.3
wirerope==1.0.0
wrapt==1.17.2
WTForms==3.2.1
xmlsec==1.3.14
yarl==1.20.0
zipp==3.21.0
zope.event==5.0
zope.interface==7.2
# Customized requirements
apache-airflow-providers-mongo==5.1.0
apache-airflow-providers-amazon[s3fs]==9.6.1
airflow-provider-duckdb==0.2.0
duckdb==1.3.0
import os
import logging
import jwt
import requests
import json
from airflow.configuration import conf
from flask_appbuilder.security.manager import AUTH_OAUTH
from airflow.providers.fab.auth_manager.security_manager.override import FabAirflowSecurityManagerOverride
from flask import current_app
from werkzeug.middleware.proxy_fix import ProxyFix
log = logging.getLogger(__name__)
# Core FAB Auth config
AUTH_TYPE = AUTH_OAUTH
AUTH_USER_REGISTRATION = True
AUTH_ROLES_SYNC_AT_LOGIN = True
AUTH_USER_REGISTRATION_ROLE = "Viewer" # Default role if no mapping matches
# Proxy fix, https://github.com/apache/airflow/issues/49781
if conf.getboolean("webserver", "ENABLE_PROXY_FIX") or conf.getboolean("fab", "ENABLE_PROXY_FIX"):
try:
current_app.wsgi_app = ProxyFix( # type: ignore
current_app.wsgi_app,
x_for=conf.getint("webserver", "PROXY_FIX_X_FOR", fallback=1),
x_proto=conf.getint("webserver", "PROXY_FIX_X_PROTO", fallback=1),
x_host=conf.getint("webserver", "PROXY_FIX_X_HOST", fallback=1),
x_port=conf.getint("webserver", "PROXY_FIX_X_PORT", fallback=1),
x_prefix=conf.getint("webserver", "PROXY_FIX_X_PREFIX", fallback=1),
)
except Exception as e:
log.info("Error setting proxy fix: %s", e)
# Entra ID / Azure AD OAuth configuration
OAUTH_PROVIDERS = [
{
'name': 'azure',
'icon': 'fa-windows',
'token_key': 'access_token',
'remote_app': {
'client_id': os.environ.get('AZURE_ENTRAID_APP_CLIENT_ID'),
'client_secret': os.environ.get('AZURE_ENTRAID_APP_CLIENT_SECRET_VALUE'),
'server_metadata_url': f'https://login.microsoftonline.com/{os.environ.get("AZURE_ENTRAID_APP_TENANT_ID")}/v2.0/.well-known/openid-configuration',
'client_kwargs': {
'scope': 'openid email profile offline_access'
},
},
}
]
# Optional mapping of Azure Groups to FAB roles
# You must create these roles in Airflow UI beforehand
AUTH_ROLES_MAPPING = {
"Viewer": ["Viewer"],
"Admin": ["Admin"],
"User": ["User"],
"Op": ["Op"],
"Public": ["Public"],
}
# Example: Azure Group Object IDs → FAB roles
# TODO: Make this dynamic from a configmap or something else
DEFAULT_AZURE_GROUP_MAPPING = {
#"xxxxx-xxxxx-xxx-xxx-xxxx": "Admin", # Must exist in Azure AD
}
AZURE_GROUP_MAPPING = json.loads(os.environ.get("AZURE_ENTRAID_APP_GROUP_MAPPING", json.dumps(DEFAULT_AZURE_GROUP_MAPPING)))
# Get public key automatically
OPENID_CONFIG_URL = f'https://login.microsoftonline.com/{os.environ.get("AZURE_ENTRAID_APP_TENANT_ID")}/v2.0/.well-known/openid-configuration'
openid_config = requests.get(OPENID_CONFIG_URL).json()
jwks_uri = openid_config['jwks_uri']
jwks_keys = requests.get(jwks_uri).json()
def get_public_key(kid):
for key in jwks_keys['keys']:
if key['kid'] == kid:
return jwt.algorithms.RSAAlgorithm.from_jwk(key)
raise Exception("Public key not found.")
class CustomAzureSecurityManager(FabAirflowSecurityManagerOverride):
def get_oauth_user_info(self, provider, response):
if provider == "azure":
token = response["id_token"]
unverified_header = jwt.get_unverified_header(token)
public_key = get_public_key(unverified_header['kid'])
decoded_token = jwt.decode(
token,
key=public_key,
algorithms=["RS256"],
audience=os.environ.get("AZURE_ENTRAID_APP_CLIENT_ID"),
options={"verify_exp": True}
)
log.info("Decoded Azure token: %s", decoded_token)
# Extract user basic info
username = decoded_token.get("preferred_username") or decoded_token.get("email")
email = decoded_token.get("email")
first_name = decoded_token.get("given_name", "")
last_name = decoded_token.get("family_name", "")
# Extract group memberships
azure_groups = decoded_token.get("groups", [])
roles = []
for group_id in azure_groups:
mapped_role = AZURE_GROUP_MAPPING.get(group_id)
if mapped_role:
roles.append(mapped_role)
if not roles:
roles = [AUTH_USER_REGISTRATION_ROLE] # fallback role
userinfo = {
"username": username,
"email": email,
"first_name": first_name,
"last_name": last_name,
"role_keys": roles,
}
log.info("Parsed user info: %s", userinfo)
return userinfo
return {}
# Finally, register security manager
SECURITY_MANAGER_CLASS = CustomAzureSecurityManager
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment