Last active
March 22, 2025 01:34
-
-
Save onefoursix/b15a836f2f3e362a086dd6d868f20d1b to your computer and use it in GitHub Desktop.
This script provides an example of how to create a STREAMSETS_ADLS_GEN2 Connection using the StreamSets SDK
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
FILE: create_connection.py | |
DESCRIPTION: This script provides an example of how to create a | |
STREAMSETS_ADLS_GEN2 Connection using the StreamSets SDK | |
USAGE: $ python3 create_connection.py | |
PREREQUISITES: | |
- Set your connection properties and engine_url in the variables at the top of the script | |
- Set the group_name_list to a list of groups you want to share the Connection with. Members | |
of the groups will be able to use the Connection but will not be able to view the | |
Connection details. | |
- Python 3.9+ | |
- StreamSets Platform SDK for Python v6.5+ | |
See: https://docs.streamsets.com/platform-sdk/latest/welcome/installation.html | |
- StreamSets Platform API Credentials for a user with Organization Administrator role | |
- Before running the script, export the environment variables CRED_ID and CRED_TOKEN | |
with the StreamSets Platform API Credentials, like this: | |
$ export CRED_ID="40af8..." | |
$ export CRED_TOKEN="eyJ0..." | |
""" | |
from streamsets.sdk import ControlHub | |
import sys | |
import os | |
# Connection properties | |
connection_name = 'ADLS Connection (SDK)' | |
connection_type = 'STREAMSETS_ADLS_GEN2' | |
connection_tags = ['tag1', 'tag2'] | |
# ADLS_GEN2 specific Connection properties | |
account_fqdn = '<redacted>' | |
auth_method = 'SHARED_KEY' | |
account_key = '<redacted>' | |
storage_container = '<redacted>' | |
# List of Groups to share the Connection with | |
group_name_list = ['developers', 'ops'] | |
# Engine URL | |
engine_url = 'http://localhost:18630' | |
# Get Control Hub Credentials from the environment | |
cred_id = os.getenv('CRED_ID') | |
cred_token = os.getenv('CRED_TOKEN') | |
# Connect to Control Hub | |
sch = None | |
try: | |
sch = ControlHub(credential_id=cred_id, token=cred_token) | |
except Exception as e: | |
print('Error connecting to Control Hub; check your CRED_ID and CRED_TOKEN environment variables') | |
print(str(e)) | |
sys.exit(1) | |
# Retrieve the Data Collector engine to be used as the authoring engine | |
engine = sch.engines.get(engine_url=engine_url) | |
# Get a Connection Builder | |
connection_builder = sch.get_connection_builder() | |
# Build the Connection instance | |
connection = connection_builder.build( | |
title=connection_name, | |
connection_type=connection_type, | |
authoring_data_collector=engine, | |
tags=connection_tags) | |
# Create a dictionary of the specific Connection type's properties: | |
props = {'accountFQDN': account_fqdn, | |
'authMethod': auth_method, | |
'accountKey': account_key, | |
'storageContainer': storage_container} | |
# Append the props to the Connection's config | |
connection.connection_definition.configuration.update(props) | |
# Add the Connection to SCH | |
sch.add_connection(connection) | |
# Verify the Connection | |
result = sch.verify_connection(connection) | |
print(result) | |
# Share the Connection | |
acl = connection.acl | |
actions = ['READ'] # Read permissions allow users to use the connection but not see its details | |
for group_name in group_name_list: | |
group = sch.groups.get(display_name=group_name) | |
group_permission = acl.permission_builder.build( | |
subject_id=group.group_id, subject_type='GROUP', actions=actions) | |
acl.add_permission(group_permission) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment