Files
platform-api/app/services/provisioner.py
Platform CI f9bfc3afbd init
2026-02-21 17:54:44 -05:00

718 lines
22 KiB
Python

"""
Tenant Provisioner — manages vCluster + tool lifecycle via GitOps.
Flow:
1. Tenant signs up → platform-api calls provision_tenant()
2. provisioner writes two files to the gitops repo:
data-plane/tenants/<slug>/vcluster.yaml ← ArgoCD Application (vCluster)
data-plane/tenants/<slug>/appset.yaml ← ApplicationSet (tool apps)
3. For each enabled tool it writes:
data-plane/tenants/<slug>/apps/<tool>.yaml ← ArgoCD Application (Helm chart)
4. Git push → ArgoCD on data-plane picks it all up automatically.
5. deprovision_tenant() removes the directory → ArgoCD prunes everything.
"""
import logging
import os
import re
import secrets
import shutil
import subprocess
from pathlib import Path
from ..core.config import get_settings
log = logging.getLogger(__name__)
# ── Tool Helm chart templates ────────────────────────────────────────────────
def _superset_app(tenant: str, domain: str, kc_base: str, kc_secret: str) -> str:
secret_key = secrets.token_urlsafe(42)
return f"""\
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: {tenant}-superset
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "3"
spec:
project: default
source:
repoURL: https://apache.github.io/superset
chart: superset
targetRevision: "0.15.2"
helm:
valuesObject:
replicaCount: 1
supersetNode:
replicaCount: 1
supersetCeleryBeat:
enabled: false
supersetWorker:
replicaCount: 0
init:
adminUser:
username: admin
firstname: Admin
lastname: {tenant}
email: admin@{domain}
password: "Architecture@9988#"
loadExamples: false
postgresql:
enabled: false
redis:
enabled: false
service:
type: ClusterIP
port: 8088
ingress:
enabled: true
ingressClassName: nginx
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
nginx.ingress.kubernetes.io/proxy-buffer-size: "16k"
hosts:
- host: {tenant}-superset.{domain}
paths:
- path: /
pathType: Prefix
tls:
- hosts:
- {tenant}-superset.{domain}
secretName: {tenant}-superset-tls
# initContainer installs psycopg2 (missing from apache/superset:5.x image)
initContainers:
- name: install-drivers
image: apachesuperset.docker.scarf.sh/apache/superset:5.0.0
command:
- sh
- -c
- pip install psycopg2-binary Authlib --user --no-cache-dir --quiet
volumeMounts:
- name: pip-packages
mountPath: /app/superset_home/.local
extraVolumes:
- name: pip-packages
emptyDir: {{}}
extraVolumeMounts:
- name: pip-packages
mountPath: /app/superset_home/.local
extraEnv:
PYTHONPATH: "/app/superset_home/.local/lib/python3.10/site-packages"
DB_HOST: "{tenant}-superset-pg"
DB_PORT: "5432"
DB_USER: "superset"
DB_PASS: "superset-pass"
DB_NAME: "superset"
REDIS_HOST: "{tenant}-superset-redis"
REDIS_PORT: "6379"
bootstrapScript: |
#!/bin/bash
echo "Drivers pre-installed via initContainer"
configOverrides:
keycloak_sso: |
from flask_appbuilder.security.manager import AUTH_OAUTH
from superset.security import SupersetSecurityManager
import logging
log = logging.getLogger(__name__)
AUTH_TYPE = AUTH_OAUTH
AUTH_USER_REGISTRATION = True
AUTH_USER_REGISTRATION_ROLE = "Gamma"
AUTH_ROLES_SYNC_AT_LOGIN = True
AUTH_ROLES_MAPPING = {{"admin": ["Admin"], "platform_admin": ["Admin"]}}
OAUTH_PROVIDERS = [{{
"name": "keycloak",
"icon": "fa-key",
"token_key": "access_token",
"remote_app": {{
"client_id": "superset",
"client_secret": "{kc_secret}",
"server_metadata_url": "{kc_base}/realms/{tenant}/.well-known/openid-configuration",
"api_base_url": "{kc_base}/realms/{tenant}/protocol/openid-connect",
"client_kwargs": {{"scope": "openid email profile roles"}},
}},
}}]
class KeycloakSM(SupersetSecurityManager):
def oauth_user_info(self, provider, response=None):
if provider == "keycloak":
me = self.appbuilder.sm.oauth_remoteapp.get("userinfo").json()
roles = me.get("realm_access", {{}}).get("roles", [])
return {{
"username": me.get("preferred_username", me.get("email")),
"first_name": me.get("given_name", ""),
"last_name": me.get("family_name", ""),
"email": me.get("email", ""),
"role_keys": roles,
}}
return {{}}
CUSTOM_SECURITY_MANAGER = KeycloakSM
secret: |
SECRET_KEY = '{secret_key}'
SQLALCHEMY_DATABASE_URI = 'postgresql+psycopg2://superset:superset-pass@{tenant}-superset-pg:5432/superset'
REDIS_HOST = '{tenant}-superset-redis'
REDIS_PORT = 6379
DATA_CACHE_CONFIG = {{'CACHE_TYPE': 'SimpleCache'}}
CACHE_CONFIG = {{'CACHE_TYPE': 'SimpleCache'}}
WTF_CSRF_ENABLED = False
TALISMAN_ENABLED = False
SESSION_COOKIE_SAMESITE = None
SESSION_COOKIE_SECURE = True
ENABLE_PROXY_FIX = True
PREFERRED_URL_SCHEME = 'https'
destination:
server: https://kubernetes.default.svc
namespace: {tenant}-superset
syncPolicy:
automated: {{prune: true, selfHeal: true}}
syncOptions: [CreateNamespace=true]
"""
def _superset_infra(tenant: str) -> str:
"""Standalone PostgreSQL + Redis for Superset (using registry-accessible images)."""
return f"""\
apiVersion: v1
kind: Secret
metadata:
name: {tenant}-superset-pg-secret
namespace: {tenant}-superset
stringData:
POSTGRES_DB: superset
POSTGRES_USER: superset
POSTGRES_PASSWORD: "superset-pass"
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: {tenant}-superset-pg
namespace: {tenant}-superset
spec:
serviceName: {tenant}-superset-pg
replicas: 1
selector:
matchLabels:
app: {tenant}-superset-pg
template:
metadata:
labels:
app: {tenant}-superset-pg
spec:
containers:
- name: postgres
image: postgres:16-alpine
envFrom:
- secretRef:
name: {tenant}-superset-pg-secret
ports:
- containerPort: 5432
volumeMounts:
- name: data
mountPath: /var/lib/postgresql/data
subPath: pgdata
volumeClaimTemplates:
- metadata:
name: data
spec:
accessModes: [ReadWriteOnce]
resources:
requests:
storage: 5Gi
---
apiVersion: v1
kind: Service
metadata:
name: {tenant}-superset-pg
namespace: {tenant}-superset
spec:
selector:
app: {tenant}-superset-pg
ports:
- port: 5432
targetPort: 5432
---
# Alias so the Superset Helm chart's default 'superset-postgresql' hostname resolves
apiVersion: v1
kind: Service
metadata:
name: superset-postgresql
namespace: {tenant}-superset
spec:
selector:
app: {tenant}-superset-pg
ports:
- port: 5432
targetPort: 5432
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: {tenant}-superset-redis
namespace: {tenant}-superset
spec:
replicas: 1
selector:
matchLabels:
app: {tenant}-superset-redis
template:
metadata:
labels:
app: {tenant}-superset-redis
spec:
containers:
- name: redis
image: redis:7-alpine
ports:
- containerPort: 6379
---
apiVersion: v1
kind: Service
metadata:
name: {tenant}-superset-redis
namespace: {tenant}-superset
spec:
selector:
app: {tenant}-superset-redis
ports:
- port: 6379
targetPort: 6379
---
# Alias for Helm chart default hostname
apiVersion: v1
kind: Service
metadata:
name: superset-redis-headless
namespace: {tenant}-superset
spec:
selector:
app: {tenant}-superset-redis
ports:
- port: 6379
targetPort: 6379
"""
def _airflow_app(tenant: str, domain: str, kc_base: str) -> str:
return f"""\
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: {tenant}-airflow
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "3"
spec:
project: default
source:
repoURL: https://airflow.apache.org
chart: airflow
targetRevision: "1.14.0"
helm:
valuesObject:
images:
airflow:
repository: quay.io/dlytica_dev/airflow
tag: v2-airflow-nifi-pipeline-utils-2.0.1
pullPolicy: IfNotPresent
executor: KubernetesExecutor
config:
core:
load_examples: "False"
postgresql:
enabled: true
image:
registry: quay.io
repository: dlytica_dev/postgresql
tag: "16.1"
redis:
enabled: true
ingress:
web:
enabled: true
ingressClassName: nginx
hosts:
- name: {tenant}-airflow.{domain}
tls:
enabled: true
secretName: {tenant}-airflow-tls
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
destination:
server: https://kubernetes.default.svc
namespace: {tenant}-airflow
syncPolicy:
automated: {{prune: true, selfHeal: true}}
syncOptions: [CreateNamespace=true]
"""
def _trino_app(tenant: str, domain: str) -> str:
return f"""\
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: {tenant}-trino
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "3"
spec:
project: default
source:
repoURL: https://trinodb.github.io/charts
chart: trino
targetRevision: "0.13.0"
helm:
valuesObject:
image:
repository: quay.io/dlytica_dev/trino
tag: "476"
server:
workers: 1
ingress:
enabled: true
className: nginx
hosts:
- host: {tenant}-trino.{domain}
paths:
- path: /
pathType: Prefix
tls:
- hosts: [{tenant}-trino.{domain}]
secretName: {tenant}-trino-tls
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
destination:
server: https://kubernetes.default.svc
namespace: {tenant}-trino
syncPolicy:
automated: {{prune: true, selfHeal: true}}
syncOptions: [CreateNamespace=true]
"""
def _gitea_app(tenant: str, domain: str) -> str:
return f"""\
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: {tenant}-gitea
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "2"
spec:
project: default
source:
repoURL: https://dl.gitea.com/charts
chart: gitea
targetRevision: "10.6.0"
helm:
valuesObject:
gitea:
admin:
username: admin
password: "Architecture@9988#"
email: admin@{domain}
ingress:
enabled: true
className: nginx
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
hosts:
- host: {tenant}-git.{domain}
paths:
- path: /
pathType: Prefix
tls:
- hosts: [{tenant}-git.{domain}]
secretName: {tenant}-gitea-tls
postgresql-ha:
enabled: false
postgresql:
enabled: true
image:
registry: quay.io
repository: dlytica_dev/postgresql
tag: "16.1"
destination:
server: https://kubernetes.default.svc
namespace: {tenant}-gitea
syncPolicy:
automated: {{prune: true, selfHeal: true}}
syncOptions: [CreateNamespace=true]
"""
# Map tool names to their generator functions
TOOL_GENERATORS = {
"superset": lambda tenant, domain, kc_base, kc_secret: _superset_app(tenant, domain, kc_base, kc_secret),
"airflow": lambda tenant, domain, kc_base, kc_secret: _airflow_app(tenant, domain, kc_base),
"trino": lambda tenant, domain, kc_base, kc_secret: _trino_app(tenant, domain),
"gitea": lambda tenant, domain, kc_base, kc_secret: _gitea_app(tenant, domain),
}
# Tools that need companion infra manifests
INFRA_GENERATORS = {
"superset": _superset_infra,
}
# Default tools for every new tenant (always on)
DEFAULT_TOOLS = {"superset": True}
# ── vCluster template ────────────────────────────────────────────────────────
def _vcluster_app(tenant: str, domain: str, namespace: str, gitops_repo: str, gitops_branch: str) -> str:
return f"""\
# vCluster for tenant: {tenant}
# ArgoCD on the data-plane deploys this. Once the vCluster is up, the
# ApplicationSet below drives all tool deployments inside it.
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: vcluster-{tenant}
namespace: argocd
labels:
tenant: {tenant}
managed-by: platform
annotations:
argocd.argoproj.io/sync-wave: "1"
spec:
project: default
source:
repoURL: https://charts.loft.sh
chart: vcluster
targetRevision: "0.20.0"
helm:
valuesObject:
sync:
ingresses:
enabled: true
exportKubeConfig:
secret:
name: vcluster-{tenant}-kubeconfig
controlPlane:
ingress:
enabled: true
host: vc-{tenant}.{domain}
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
init:
manifests: |
apiVersion: v1
kind: Namespace
metadata:
name: argocd
destination:
server: https://kubernetes.default.svc
namespace: {namespace}
syncPolicy:
automated: {{prune: true, selfHeal: true}}
syncOptions: [CreateNamespace=true]
"""
def _appset(tenant: str, gitops_repo: str, gitops_branch: str) -> str:
"""ApplicationSet that watches data-plane/tenants/<slug>/apps/ and deploys each app."""
return f"""\
# ApplicationSet for tenant: {tenant}
# Watches gitops/data-plane/tenants/{tenant}/apps/* and creates one ArgoCD Application per tool.
# Add/remove a file → ArgoCD adds/removes the tool automatically.
apiVersion: argoproj.io/v1alpha1
kind: ApplicationSet
metadata:
name: {tenant}-tools
namespace: argocd
labels:
tenant: {tenant}
managed-by: platform
spec:
generators:
- git:
repoURL: {gitops_repo}
revision: {gitops_branch}
directories:
- path: data-plane/tenants/{tenant}/apps/*
template:
metadata:
name: "{tenant}-{{{{path.basename}}}}"
namespace: argocd
labels:
tenant: {tenant}
spec:
project: default
source:
repoURL: {gitops_repo}
targetRevision: {gitops_branch}
path: "{{{{path}}}}"
destination:
server: https://kubernetes.default.svc
namespace: "{tenant}-{{{{path.basename}}}}"
syncPolicy:
automated: {{prune: true, selfHeal: true}}
syncOptions: [CreateNamespace=true]
retry:
limit: 5
backoff: {{duration: 30s, factor: 2, maxDuration: 5m}}
"""
# ── Git helpers ──────────────────────────────────────────────────────────────
def _git_clone_or_pull(repo_url: str, local_path: str, branch: str) -> None:
p = Path(local_path)
if p.exists():
subprocess.run(
["git", "-C", local_path, "pull", "--rebase", "origin", branch],
check=True, capture_output=True,
)
else:
subprocess.run(
["git", "clone", "--depth=1", "-b", branch, repo_url, local_path],
check=True, capture_output=True,
)
def _git_push(local_path: str, message: str, branch: str) -> None:
cmds = [
["git", "-C", local_path, "config", "user.email", "platform-api@dlytica.com"],
["git", "-C", local_path, "config", "user.name", "Platform API"],
["git", "-C", local_path, "add", "-A"],
["git", "-C", local_path, "commit", "-m", message, "--allow-empty"],
["git", "-C", local_path, "push", "origin", branch],
]
for cmd in cmds:
subprocess.run(cmd, check=True, capture_output=True)
def _slug_safe(s: str) -> str:
return re.sub(r"[^a-z0-9-]", "-", s.lower()).strip("-")[:40]
# ── Public API ───────────────────────────────────────────────────────────────
async def provision_tenant(
tenant_slug: str,
company_name: str,
plan_name: str,
tools_enabled: dict,
domain: str = None,
kc_client_secret: str = "",
) -> dict:
"""
Write vCluster + ApplicationSet + tool Applications to gitops repo.
ArgoCD on the data-plane picks them up automatically.
Returns dict with vcluster_name, vcluster_url, tool_urls.
"""
s = get_settings()
base_domain = domain or s.DATA_PLANE_DOMAIN
kc_base = s.KEYCLOAK_URL
tenant = _slug_safe(tenant_slug)
namespace = s.VCLUSTER_NAMESPACE
local = s.GITOPS_LOCAL_PATH
_git_clone_or_pull(s.GITOPS_REPO_URL, local, s.GITOPS_BRANCH)
tenant_dir = Path(local) / "data-plane" / "tenants" / tenant
tenant_dir.mkdir(parents=True, exist_ok=True)
apps_dir = tenant_dir / "apps"
apps_dir.mkdir(exist_ok=True)
# 1. vCluster Application
(tenant_dir / "vcluster.yaml").write_text(
_vcluster_app(tenant, base_domain, namespace, s.GITOPS_REPO_URL, s.GITOPS_BRANCH)
)
# 2. ApplicationSet (auto-discovers apps/)
(tenant_dir / "appset.yaml").write_text(
_appset(tenant, s.GITOPS_REPO_URL, s.GITOPS_BRANCH)
)
# 3. Tool Applications — always include superset; add others per plan
merged = {**DEFAULT_TOOLS, **tools_enabled}
tool_urls = {}
for tool, gen in TOOL_GENERATORS.items():
tool_dir = apps_dir / tool
infra_path = apps_dir / f"{tool}-infra.yaml"
if merged.get(tool):
tool_dir.mkdir(exist_ok=True)
(tool_dir / "application.yaml").write_text(
gen(tenant, base_domain, kc_base, kc_client_secret)
)
# Write companion infra manifests if needed (e.g. postgres + redis for superset)
if tool in INFRA_GENERATORS:
(tool_dir / "infra.yaml").write_text(INFRA_GENERATORS[tool](tenant))
tool_urls[tool] = f"https://{tenant}-{tool}.{base_domain}"
else:
# Remove if disabled
if tool_dir.exists():
shutil.rmtree(tool_dir)
infra_path.unlink(missing_ok=True)
_git_push(local, f"feat: provision tenant {tenant} ({company_name})", s.GITOPS_BRANCH)
return {
"vcluster_name": f"vcluster-{tenant}",
"vcluster_url": f"https://vc-{tenant}.{base_domain}",
"tool_urls": tool_urls,
"data_plane_url": f"https://data.{base_domain}?tenant={tenant}",
}
async def deprovision_tenant(tenant_slug: str) -> None:
"""
Remove tenant directory from gitops.
ArgoCD prunes the vCluster and all tool Applications automatically.
"""
s = get_settings()
tenant = _slug_safe(tenant_slug)
local = s.GITOPS_LOCAL_PATH
_git_clone_or_pull(s.GITOPS_REPO_URL, local, s.GITOPS_BRANCH)
tenant_dir = Path(local) / "data-plane" / "tenants" / tenant
if tenant_dir.exists():
shutil.rmtree(tenant_dir)
_git_push(local, f"feat: deprovision tenant {tenant}", s.GITOPS_BRANCH)
log.info("Deprovisioned tenant %s", tenant)
async def update_tenant_tools(
tenant_slug: str,
tools_enabled: dict,
domain: str = None,
kc_client_secret: str = "",
) -> dict:
"""
Enable/disable tools for a tenant by adding/removing Application files.
ArgoCD prunes disabled tools automatically.
"""
s = get_settings()
base_domain = domain or s.DATA_PLANE_DOMAIN
kc_base = s.KEYCLOAK_URL
tenant = _slug_safe(tenant_slug)
local = s.GITOPS_LOCAL_PATH
_git_clone_or_pull(s.GITOPS_REPO_URL, local, s.GITOPS_BRANCH)
apps_dir = Path(local) / "data-plane" / "tenants" / tenant / "apps"
apps_dir.mkdir(parents=True, exist_ok=True)
merged = {**DEFAULT_TOOLS, **tools_enabled}
tool_urls = {}
for tool, gen in TOOL_GENERATORS.items():
tool_dir = apps_dir / tool
if merged.get(tool):
tool_dir.mkdir(exist_ok=True)
(tool_dir / "application.yaml").write_text(
gen(tenant, base_domain, kc_base, kc_client_secret)
)
if tool in INFRA_GENERATORS:
(tool_dir / "infra.yaml").write_text(INFRA_GENERATORS[tool](tenant))
tool_urls[tool] = f"https://{tenant}-{tool}.{base_domain}"
else:
if tool_dir.exists():
shutil.rmtree(tool_dir)
_git_push(local, f"chore: update tools for tenant {tenant}", s.GITOPS_BRANCH)
return tool_urls