From 9dedc7e8a366dd96ab02c7624988db5442d48ba8 Mon Sep 17 00:00:00 2001 From: Andreas Klos Date: Fri, 13 Feb 2026 16:49:04 +0100 Subject: [PATCH 1/8] feat(infra): add optional external-secrets integration via rag-setup wrapper --- infrastructure/README.md | 10 + .../server-setup/rag-setup/Chart.lock | 9 + .../server-setup/rag-setup/Chart.yaml | 16 ++ .../rag-setup/templates/external-secrets.yaml | 238 ++++++++++++++++++ .../server-setup/rag-setup/values.yaml | 41 +++ 5 files changed, 314 insertions(+) create mode 100644 infrastructure/server-setup/rag-setup/Chart.lock create mode 100644 infrastructure/server-setup/rag-setup/Chart.yaml create mode 100644 infrastructure/server-setup/rag-setup/templates/external-secrets.yaml create mode 100644 infrastructure/server-setup/rag-setup/values.yaml diff --git a/infrastructure/README.md b/infrastructure/README.md index 27d2e3d9..a5322008 100644 --- a/infrastructure/README.md +++ b/infrastructure/README.md @@ -573,6 +573,16 @@ For deployment of the *NGINX Ingress Controller* and a cert-manager, the followi The email in the [cert-issuer template](server-setup/base-setup/templates/cert-issuer.yaml) should be changed from `` to a real email address. +For deploying RAG together with optional External Secrets Operator integration, use the wrapper chart: + +[rag-setup](server-setup/rag-setup/Chart.yaml) + +`rag-setup` keeps External Secrets optional behind `features.externalSecrets.enabled`. + +Notes: +- Local development with Tilt is unchanged: Tilt deploys `infrastructure/rag` directly, so External Secrets Operator from `rag-setup` is not deployed by default. +- For production with External Secrets, enable `features.externalSecrets.enabled=true` in `rag-setup` values and configure the `externalSecrets.secretStore` section. + ## 3. Contributing Please see the [CONTRIBUTING.md](CONTRIBUTING.md) file for more information on how to contribute to the RAG Infrastructure. diff --git a/infrastructure/server-setup/rag-setup/Chart.lock b/infrastructure/server-setup/rag-setup/Chart.lock new file mode 100644 index 00000000..44f8f5e8 --- /dev/null +++ b/infrastructure/server-setup/rag-setup/Chart.lock @@ -0,0 +1,9 @@ +dependencies: +- name: rag + repository: file://../../rag + version: 4.1.0 +- name: external-secrets + repository: https://charts.external-secrets.io + version: 1.2.1 +digest: sha256:ecb7f0bfd9021d264ff45286a9b1e99c9e3a34ed1d6b3fe481bc24fe40d57d0c +generated: "2026-02-13T16:40:56.676854+01:00" diff --git a/infrastructure/server-setup/rag-setup/Chart.yaml b/infrastructure/server-setup/rag-setup/Chart.yaml new file mode 100644 index 00000000..658010b3 --- /dev/null +++ b/infrastructure/server-setup/rag-setup/Chart.yaml @@ -0,0 +1,16 @@ +apiVersion: v2 +name: rag-setup +description: | + Wrapper chart that deploys rag and optional external-secrets resources. +type: application +version: 0.1.0 +appVersion: "0.1.0" +dependencies: +- name: rag + repository: "file://../../rag" + version: "4.1.0" + condition: features.rag.enabled +- name: external-secrets + repository: https://charts.external-secrets.io + version: "1.2.1" + condition: features.externalSecrets.enabled diff --git a/infrastructure/server-setup/rag-setup/templates/external-secrets.yaml b/infrastructure/server-setup/rag-setup/templates/external-secrets.yaml new file mode 100644 index 00000000..f6ecad84 --- /dev/null +++ b/infrastructure/server-setup/rag-setup/templates/external-secrets.yaml @@ -0,0 +1,238 @@ +{{- if and .Values.features.externalSecrets.enabled .Values.externalSecrets.resources.enabled }} +{{- $store := .Values.externalSecrets.secretStore }} +{{- $storeKind := default "SecretStore" $store.kind }} +{{- $storeName := default "stackit-vault" $store.name }} +{{- $remoteKey := default "rag-secrets" .Values.externalSecrets.remoteRefKey }} +{{- $refreshInterval := default "1h" .Values.externalSecrets.refreshInterval }} +{{- $userPassSecretRef := $store.auth.userPass.secretRef }} +{{- $clusterSecretNamespace := default .Release.Namespace $userPassSecretRef.namespace }} +{{- if $store.create }} +apiVersion: external-secrets.io/v1 +kind: {{ $storeKind }} +metadata: + name: {{ $storeName }} + {{- if ne $storeKind "ClusterSecretStore" }} + namespace: {{ .Release.Namespace }} + {{- end }} +spec: + provider: + vault: + server: {{ $store.server | quote }} + path: {{ $store.path | quote }} + version: {{ $store.version | quote }} + auth: + userPass: + path: {{ $store.auth.userPass.path | quote }} + username: {{ $store.auth.userPass.username | quote }} + secretRef: + name: {{ $store.auth.userPass.secretRef.name | quote }} + key: {{ $store.auth.userPass.secretRef.key | quote }} + {{- if eq $storeKind "ClusterSecretStore" }} + namespace: {{ $clusterSecretNamespace | quote }} + {{- end }} +--- +{{- end }} +{{- if and .Values.externalSecrets.userPassSecret.create .Values.externalSecrets.userPassSecret.password }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ .Values.externalSecrets.userPassSecret.name | quote }} + namespace: {{ default .Release.Namespace .Values.externalSecrets.userPassSecret.namespace | quote }} +type: Opaque +stringData: + password: {{ .Values.externalSecrets.userPassSecret.password | quote }} +{{- end }} +--- +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: rag-app-secrets + namespace: {{ .Release.Namespace }} +spec: + refreshInterval: {{ $refreshInterval }} + secretStoreRef: + name: {{ $storeName }} + kind: {{ $storeKind }} + target: + name: rag-app-secrets + creationPolicy: Owner + data: + - secretKey: LANGFUSE_PUBLIC_KEY + remoteRef: + key: {{ $remoteKey }} + property: LANGFUSE_PUBLIC_KEY + - secretKey: LANGFUSE_SECRET_KEY + remoteRef: + key: {{ $remoteKey }} + property: LANGFUSE_SECRET_KEY + - secretKey: STACKIT_EMBEDDER_API_KEY + remoteRef: + key: {{ $remoteKey }} + property: STACKIT_EMBEDDER_API_KEY + - secretKey: STACKIT_VLLM_API_KEY + remoteRef: + key: {{ $remoteKey }} + property: STACKIT_VLLM_API_KEY + - secretKey: RAGAS_OPENAI_API_KEY + remoteRef: + key: {{ $remoteKey }} + property: RAGAS_OPENAI_API_KEY + - secretKey: S3_ACCESS_KEY_ID + remoteRef: + key: {{ $remoteKey }} + property: S3_ACCESS_KEY_ID + - secretKey: S3_SECRET_ACCESS_KEY + remoteRef: + key: {{ $remoteKey }} + property: S3_SECRET_ACCESS_KEY + - secretKey: LANGFUSE_SALT + remoteRef: + key: {{ $remoteKey }} + property: LANGFUSE_SALT + - secretKey: LANGFUSE_NEXTAUTH + remoteRef: + key: {{ $remoteKey }} + property: LANGFUSE_NEXTAUTH +--- +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: vite-auth + namespace: {{ .Release.Namespace }} +spec: + refreshInterval: {{ $refreshInterval }} + secretStoreRef: + name: {{ $storeName }} + kind: {{ $storeKind }} + target: + name: vite-auth + creationPolicy: Owner + data: + - secretKey: VITE_AUTH_USERNAME + remoteRef: + key: {{ $remoteKey }} + property: BASIC_AUTH_USER + - secretKey: VITE_AUTH_PASSWORD + remoteRef: + key: {{ $remoteKey }} + property: BASIC_AUTH_PASSWORD +--- +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: basic-auth + namespace: {{ .Release.Namespace }} +spec: + refreshInterval: {{ $refreshInterval }} + secretStoreRef: + name: {{ $storeName }} + kind: {{ $storeKind }} + target: + name: basic-auth + creationPolicy: Owner + data: + - secretKey: auth + remoteRef: + key: {{ $remoteKey }} + property: BASIC_AUTH +--- +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: langfuse-init-secrets + namespace: {{ .Release.Namespace }} +spec: + refreshInterval: {{ $refreshInterval }} + secretStoreRef: + name: {{ $storeName }} + kind: {{ $storeKind }} + target: + name: langfuse-init-secrets + creationPolicy: Owner + data: + - secretKey: LANGFUSE_INIT_ORG_ID + remoteRef: + key: {{ $remoteKey }} + property: LANGFUSE_INIT_ORG_ID + - secretKey: LANGFUSE_INIT_PROJECT_ID + remoteRef: + key: {{ $remoteKey }} + property: LANGFUSE_INIT_PROJECT_ID + - secretKey: LANGFUSE_INIT_PROJECT_PUBLIC_KEY + remoteRef: + key: {{ $remoteKey }} + property: LANGFUSE_PUBLIC_KEY + - secretKey: LANGFUSE_INIT_PROJECT_SECRET_KEY + remoteRef: + key: {{ $remoteKey }} + property: LANGFUSE_SECRET_KEY + - secretKey: LANGFUSE_INIT_USER_EMAIL + remoteRef: + key: {{ $remoteKey }} + property: LANGFUSE_INIT_USER_EMAIL + - secretKey: LANGFUSE_INIT_USER_NAME + remoteRef: + key: {{ $remoteKey }} + property: LANGFUSE_INIT_USER_NAME + - secretKey: LANGFUSE_INIT_USER_PASSWORD + remoteRef: + key: {{ $remoteKey }} + property: LANGFUSE_INIT_USER_PASSWORD +--- +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: rag-postgres + namespace: {{ .Release.Namespace }} +spec: + refreshInterval: {{ $refreshInterval }} + secretStoreRef: + name: {{ $storeName }} + kind: {{ $storeKind }} + target: + name: rag-postgres + creationPolicy: Owner + data: + - secretKey: POSTGRES_PASSWORD + remoteRef: + key: {{ $remoteKey }} + property: POSTGRES_PASSWORD +--- +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: rag-redis + namespace: {{ .Release.Namespace }} +spec: + refreshInterval: {{ $refreshInterval }} + secretStoreRef: + name: {{ $storeName }} + kind: {{ $storeKind }} + target: + name: rag-redis + creationPolicy: Owner + data: + - secretKey: REDIS_PASSWORD + remoteRef: + key: {{ $remoteKey }} + property: REDIS_PASSWORD +--- +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: rag-clickhouse + namespace: {{ .Release.Namespace }} +spec: + refreshInterval: {{ $refreshInterval }} + secretStoreRef: + name: {{ $storeName }} + kind: {{ $storeKind }} + target: + name: rag-clickhouse + creationPolicy: Owner + data: + - secretKey: CLICKHOUSE_PASSWORD + remoteRef: + key: {{ $remoteKey }} + property: CLICKHOUSE_PASSWORD +{{- end }} diff --git a/infrastructure/server-setup/rag-setup/values.yaml b/infrastructure/server-setup/rag-setup/values.yaml new file mode 100644 index 00000000..1d1f916b --- /dev/null +++ b/infrastructure/server-setup/rag-setup/values.yaml @@ -0,0 +1,41 @@ +features: + rag: + enabled: true + externalSecrets: + # Keep disabled by default so dev/Tilt workflows can use the rag chart directly. + enabled: false + +rag: {} + +# Values for the external-secrets Helm dependency. +external-secrets: + installCRDs: true + +externalSecrets: + resources: + # ExternalSecret/SecretStore resources managed by this wrapper chart. + enabled: true + refreshInterval: "1h" + remoteRefKey: "rag-secrets" + secretStore: + # Set create=false if you want to reference an already existing (Cluster)SecretStore. + create: true + name: "stackit-vault" + kind: "ClusterSecretStore" + server: "https://prod.sm.eu01.stackit.cloud" + path: "<>" + version: "v2" + auth: + userPass: + path: "userpass" + username: "<>" + secretRef: + name: "stackit-vault-userpass" + key: "password" + # Required by ClusterSecretStore references in ESO. + namespace: "cert-manager" + userPassSecret: + create: false + name: "stackit-vault-userpass" + namespace: "cert-manager" + password: "" From ff870a12748a7e685191c676dba981abdb284918 Mon Sep 17 00:00:00 2001 From: Andreas Klos Date: Tue, 17 Feb 2026 12:02:34 +0100 Subject: [PATCH 2/8] feat: update external-secrets version to 2.0.0 in Chart.yaml and Chart.lock --- infrastructure/server-setup/rag-setup/Chart.lock | 6 +++--- infrastructure/server-setup/rag-setup/Chart.yaml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/infrastructure/server-setup/rag-setup/Chart.lock b/infrastructure/server-setup/rag-setup/Chart.lock index 44f8f5e8..6c293195 100644 --- a/infrastructure/server-setup/rag-setup/Chart.lock +++ b/infrastructure/server-setup/rag-setup/Chart.lock @@ -4,6 +4,6 @@ dependencies: version: 4.1.0 - name: external-secrets repository: https://charts.external-secrets.io - version: 1.2.1 -digest: sha256:ecb7f0bfd9021d264ff45286a9b1e99c9e3a34ed1d6b3fe481bc24fe40d57d0c -generated: "2026-02-13T16:40:56.676854+01:00" + version: 2.0.0 +digest: sha256:048c060fb05bd668c2c16b21f8c6f82438accee4ed2a9a1a84b6a976e1261611 +generated: "2026-02-17T11:58:06.494129+01:00" diff --git a/infrastructure/server-setup/rag-setup/Chart.yaml b/infrastructure/server-setup/rag-setup/Chart.yaml index 658010b3..48b80f28 100644 --- a/infrastructure/server-setup/rag-setup/Chart.yaml +++ b/infrastructure/server-setup/rag-setup/Chart.yaml @@ -12,5 +12,5 @@ dependencies: condition: features.rag.enabled - name: external-secrets repository: https://charts.external-secrets.io - version: "1.2.1" + version: "2.0.0" condition: features.externalSecrets.enabled From d9e3f4c037dd91a89c7207e85e0a9280c9e332b9 Mon Sep 17 00:00:00 2001 From: Andreas Klos Date: Tue, 17 Feb 2026 13:12:47 +0100 Subject: [PATCH 3/8] feat(terraform): expose redis connection outputs (#278) ## Summary - add Terraform outputs for managed Redis connection details - expose host, load-balanced host, port, username, password, and URI - mark secret-bearing outputs as sensitive --- infrastructure/terraform/redis.tf | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/infrastructure/terraform/redis.tf b/infrastructure/terraform/redis.tf index b70d9ce0..5380a694 100644 --- a/infrastructure/terraform/redis.tf +++ b/infrastructure/terraform/redis.tf @@ -16,3 +16,29 @@ resource "stackit_redis_credential" "rag_redis_cred" { project_id = var.project_id instance_id = stackit_redis_instance.rag_redis.instance_id } + +output "redis_host" { + value = stackit_redis_credential.rag_redis_cred.host +} + +output "redis_load_balanced_host" { + value = stackit_redis_credential.rag_redis_cred.load_balanced_host +} + +output "redis_port" { + value = stackit_redis_credential.rag_redis_cred.port +} + +output "redis_username" { + value = stackit_redis_credential.rag_redis_cred.username +} + +output "redis_password" { + value = stackit_redis_credential.rag_redis_cred.password + sensitive = true +} + +output "redis_uri" { + value = stackit_redis_credential.rag_redis_cred.uri + sensitive = true +} From 98cf188b9b1e19d722330d22339d7dccc9858222 Mon Sep 17 00:00:00 2001 From: Andreas Klos Date: Tue, 17 Feb 2026 16:04:20 +0100 Subject: [PATCH 4/8] feat: add one-command production deployment script and Helm value generation - Introduced `deploy-rag-prod.sh` script for streamlined production deployment, encompassing Terraform apply, secrets seeding, and Helm chart deployment. - Added `generate-rag-setup-prod-values.sh` script to generate production override values for the rag-setup Helm chart from Terraform outputs. - Updated `README.md` to document the new deployment process and Helm value generation. - Enhanced `object_storage.tf` to output additional object storage bucket details and endpoint. - Modified `seed-secrets` module to support optional overrides for sensitive values. --- infrastructure/README.md | 36 +- infrastructure/scripts/deploy-rag-prod.sh | 230 +++++ .../base-setup/templates/cert-issuer.yaml | 29 +- .../server-setup/base-setup/values.yaml | 8 + .../rag-setup/templates/external-secrets.yaml | 12 + .../server-setup/rag-setup/values.yaml | 840 +++++++++++++++++- infrastructure/terraform/README.md | 16 + infrastructure/terraform/object_storage.tf | 12 + .../scripts/generate-rag-setup-prod-values.sh | 236 +++++ .../terraform/seed-secrets/README.md | 8 + infrastructure/terraform/seed-secrets/main.tf | 2 +- .../seed-secrets/terraform.tfvars.example | 3 + .../terraform/seed-secrets/variables.tf | 7 + 13 files changed, 1417 insertions(+), 22 deletions(-) create mode 100755 infrastructure/scripts/deploy-rag-prod.sh create mode 100755 infrastructure/terraform/scripts/generate-rag-setup-prod-values.sh diff --git a/infrastructure/README.md b/infrastructure/README.md index 3becea6e..2d4d7f3c 100644 --- a/infrastructure/README.md +++ b/infrastructure/README.md @@ -598,17 +598,45 @@ For deployment of the *NGINX Ingress Controller* and a cert-manager, the followi [base-setup](server-setup/base-setup/Chart.yaml) -The email in the [cert-issuer template](server-setup/base-setup/templates/cert-issuer.yaml) should be changed from `` to a real email address. +Set a real ACME email for cert-manager via the deploy script (`--issuer-email`) or via `base-setup` value `certIssuer.email`. -For deploying RAG together with optional External Secrets Operator integration, use the wrapper chart: +For deploying RAG together with External Secrets Operator integration, use the wrapper chart: [rag-setup](server-setup/rag-setup/Chart.yaml) -`rag-setup` keeps External Secrets optional behind `features.externalSecrets.enabled`. +`rag-setup` defaults are production-oriented and reference ESO-managed secrets. Notes: - Local development with Tilt is unchanged: Tilt deploys `infrastructure/rag` directly, so External Secrets Operator from `rag-setup` is not deployed by default. -- For production with External Secrets, enable `features.externalSecrets.enabled=true` in `rag-setup` values and configure the `externalSecrets.secretStore` section. +- Fastest production path (one command after you prepared `seed-secrets/terraform.tfvars`): + +```bash +./infrastructure/scripts/deploy-rag-prod.sh \ + --issuer-email you@example.com \ + --auto-approve +``` + +- The script performs all steps: + - Terraform backend bootstrap (if needed) and infra apply + - seed-secrets apply (with infra-derived overrides for PostgreSQL/Redis/S3 keys) + - generation of `server-setup/rag-setup/values.prod.auto.yaml` + - deployment of `base-setup` and `rag-setup` + +- Manual fallback: generate the rag values override directly from Terraform outputs: + +```bash +cd infrastructure/terraform +./scripts/generate-rag-setup-prod-values.sh \ + --output ../server-setup/rag-setup/values.prod.auto.yaml +``` + +- Manual fallback: deploy rag-setup with: + +```bash +helm upgrade --install rag-setup infrastructure/server-setup/rag-setup \ + -f infrastructure/server-setup/rag-setup/values.yaml \ + -f infrastructure/server-setup/rag-setup/values.prod.auto.yaml +``` ## 3. Contributing diff --git a/infrastructure/scripts/deploy-rag-prod.sh b/infrastructure/scripts/deploy-rag-prod.sh new file mode 100755 index 00000000..67a1a5d6 --- /dev/null +++ b/infrastructure/scripts/deploy-rag-prod.sh @@ -0,0 +1,230 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +INFRA_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" +TF_DIR="${INFRA_DIR}/terraform" +SEED_DIR="${TF_DIR}/seed-secrets" +BASE_CHART_DIR="${INFRA_DIR}/server-setup/base-setup" +RAG_CHART_DIR="${INFRA_DIR}/server-setup/rag-setup" + +AUTO_APPROVE=0 +SKIP_BACKEND_BOOTSTRAP=0 +HELM_TIMEOUT="20m" +BASE_NAMESPACE="cert-manager" +RAG_NAMESPACE="rag" +VAULT_USERPASS_NAMESPACE="cert-manager" +VALUES_OUTPUT_FILE="${RAG_CHART_DIR}/values.prod.auto.yaml" +SEED_TFVARS_FILE="${SEED_DIR}/terraform.tfvars" +ISSUER_EMAIL="" + +usage() { + cat <<'EOF' +Deploy full RAG production setup in one run: +- Terraform infra apply +- Seed Secrets Manager (rag-secrets) +- Generate rag-setup prod Helm values from Terraform outputs +- Deploy base-setup and rag-setup charts + +Usage: + ./infrastructure/scripts/deploy-rag-prod.sh --issuer-email [options] + +Options: + --issuer-email Required ACME email for cert-manager ClusterIssuer. + --auto-approve Pass -auto-approve to Terraform apply commands. + --skip-backend-bootstrap Skip terraform backend bootstrap helper. + --seed-tfvars-file Seed secrets tfvars file (default: infrastructure/terraform/seed-secrets/terraform.tfvars). + --values-output-file Generated rag values file path (default: infrastructure/server-setup/rag-setup/values.prod.auto.yaml). + --base-namespace Namespace for base-setup release (default: cert-manager). + --rag-namespace Namespace for rag-setup release (default: rag). + --vault-userpass-namespace + Namespace for stackit-vault-userpass secret (default: cert-manager). + --helm-timeout Helm wait timeout (default: 20m). + -h, --help Show this help. + +Required precondition: + - seed-secrets tfvars exists and includes app/API secrets in rag_secrets. + Infra-derived keys (POSTGRES_PASSWORD, REDIS_USERNAME, REDIS_PASSWORD, + S3_ACCESS_KEY_ID, S3_SECRET_ACCESS_KEY) are auto-overridden from Terraform outputs. +EOF +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --issuer-email) + ISSUER_EMAIL="${2:-}" + shift 2 + ;; + --auto-approve) + AUTO_APPROVE=1 + shift + ;; + --skip-backend-bootstrap) + SKIP_BACKEND_BOOTSTRAP=1 + shift + ;; + --seed-tfvars-file) + SEED_TFVARS_FILE="${2:-}" + shift 2 + ;; + --values-output-file) + VALUES_OUTPUT_FILE="${2:-}" + shift 2 + ;; + --base-namespace) + BASE_NAMESPACE="${2:-}" + shift 2 + ;; + --rag-namespace) + RAG_NAMESPACE="${2:-}" + shift 2 + ;; + --vault-userpass-namespace) + VAULT_USERPASS_NAMESPACE="${2:-}" + shift 2 + ;; + --helm-timeout) + HELM_TIMEOUT="${2:-}" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + usage >&2 + exit 1 + ;; + esac +done + +if [[ -z "${ISSUER_EMAIL}" ]]; then + echo "--issuer-email is required." >&2 + usage >&2 + exit 1 +fi + +if [[ ! -f "${SEED_TFVARS_FILE}" ]]; then + echo "Seed tfvars file not found: ${SEED_TFVARS_FILE}" >&2 + echo "Copy ${SEED_DIR}/terraform.tfvars.example and fill app/API secrets first." >&2 + exit 1 +fi + +for cmd in terraform helm kubectl jq grep; do + if ! command -v "${cmd}" >/dev/null 2>&1; then + echo "Missing required command: ${cmd}" >&2 + exit 1 + fi +done + +if grep -Eq '<<[^>]+>>' "${SEED_TFVARS_FILE}"; then + echo "Warning: ${SEED_TFVARS_FILE} still contains placeholder markers (<<...>>)." >&2 + echo "Deployment will continue, but seed-secrets apply may fail or write placeholder values." >&2 +fi + +approve_args=() +if [[ "${AUTO_APPROVE}" -eq 1 ]]; then + approve_args+=("-auto-approve") +fi + +log() { + echo "[deploy-rag-prod] $*" +} + +if [[ "${SKIP_BACKEND_BOOTSTRAP}" -eq 0 ]]; then + log "Bootstrapping Terraform backend if needed" + BOOTSTRAP_AUTO_APPROVE="${AUTO_APPROVE}" "${TF_DIR}/scripts/init-backend.sh" +else + log "Skipping backend bootstrap by request" +fi + +log "Applying main Terraform stack" +terraform -chdir="${TF_DIR}" init +terraform -chdir="${TF_DIR}" apply "${approve_args[@]}" + +log "Reading Terraform outputs" +secretsmanager_instance_id="$(terraform -chdir="${TF_DIR}" output -raw secretsmanager_instance_id)" +secretsmanager_username="$(terraform -chdir="${TF_DIR}" output -raw secretsmanager_username)" +secretsmanager_password="$(terraform -chdir="${TF_DIR}" output -raw secretsmanager_password)" +postgres_password="$(terraform -chdir="${TF_DIR}" output -raw postgres_password)" +redis_username="$(terraform -chdir="${TF_DIR}" output -raw redis_username)" +redis_password="$(terraform -chdir="${TF_DIR}" output -raw redis_password)" +s3_access_key_id="$(terraform -chdir="${TF_DIR}" output -raw object_storage_access_key)" +s3_secret_access_key="$(terraform -chdir="${TF_DIR}" output -raw object_storage_secret_key)" +dns_name="$(terraform -chdir="${TF_DIR}" output -raw dns_name | sed 's/\.$//')" + +seed_override_file="$(mktemp)" +cleanup() { + rm -f "${seed_override_file}" +} +trap cleanup EXIT + +jq -n \ + --arg vault_mount_path "${secretsmanager_instance_id}" \ + --arg vault_username "${secretsmanager_username}" \ + --arg vault_password "${secretsmanager_password}" \ + --arg postgres_password "${postgres_password}" \ + --arg redis_username "${redis_username}" \ + --arg redis_password "${redis_password}" \ + --arg s3_access_key_id "${s3_access_key_id}" \ + --arg s3_secret_access_key "${s3_secret_access_key}" \ + '{ + vault_mount_path: $vault_mount_path, + vault_username: $vault_username, + vault_password: $vault_password, + rag_secrets_overrides: { + POSTGRES_PASSWORD: $postgres_password, + REDIS_USERNAME: $redis_username, + REDIS_PASSWORD: $redis_password, + S3_ACCESS_KEY_ID: $s3_access_key_id, + S3_SECRET_ACCESS_KEY: $s3_secret_access_key + } + }' > "${seed_override_file}" + +log "Applying seed-secrets stack" +terraform -chdir="${SEED_DIR}" init +terraform -chdir="${SEED_DIR}" apply \ + "${approve_args[@]}" \ + -var-file="${SEED_TFVARS_FILE}" \ + -var-file="${seed_override_file}" + +log "Generating rag-setup production values" +"${TF_DIR}/scripts/generate-rag-setup-prod-values.sh" \ + --terraform-dir "${TF_DIR}" \ + --output "${VALUES_OUTPUT_FILE}" + +log "Ensuring namespaces exist" +kubectl get namespace "${BASE_NAMESPACE}" >/dev/null 2>&1 || kubectl create namespace "${BASE_NAMESPACE}" +kubectl get namespace "${RAG_NAMESPACE}" >/dev/null 2>&1 || kubectl create namespace "${RAG_NAMESPACE}" +kubectl get namespace "${VAULT_USERPASS_NAMESPACE}" >/dev/null 2>&1 || kubectl create namespace "${VAULT_USERPASS_NAMESPACE}" + +log "Syncing stackit-vault-userpass secret in namespace ${VAULT_USERPASS_NAMESPACE}" +kubectl -n "${VAULT_USERPASS_NAMESPACE}" create secret generic stackit-vault-userpass \ + --from-literal=password="${secretsmanager_password}" \ + --dry-run=client -o yaml | kubectl apply -f - + +log "Updating Helm dependencies" +helm dependency update "${BASE_CHART_DIR}" +helm dependency update "${RAG_CHART_DIR}" + +log "Deploying base-setup chart" +helm upgrade --install base-setup "${BASE_CHART_DIR}" \ + -n "${BASE_NAMESPACE}" \ + --create-namespace \ + --set certIssuer.email="${ISSUER_EMAIL}" \ + --wait \ + --timeout "${HELM_TIMEOUT}" + +log "Deploying rag-setup chart" +helm upgrade --install rag-setup "${RAG_CHART_DIR}" \ + -n "${RAG_NAMESPACE}" \ + --create-namespace \ + -f "${RAG_CHART_DIR}/values.yaml" \ + -f "${VALUES_OUTPUT_FILE}" \ + --wait \ + --timeout "${HELM_TIMEOUT}" + +log "Done" +echo "RAG URL: https://rag.${dns_name}" +echo "Admin URL: https://admin.${dns_name}" diff --git a/infrastructure/server-setup/base-setup/templates/cert-issuer.yaml b/infrastructure/server-setup/base-setup/templates/cert-issuer.yaml index c71c44fb..45bced89 100644 --- a/infrastructure/server-setup/base-setup/templates/cert-issuer.yaml +++ b/infrastructure/server-setup/base-setup/templates/cert-issuer.yaml @@ -1,19 +1,18 @@ apiVersion: cert-manager.io/v1 kind: ClusterIssuer metadata: - name: letsencrypt-cluster-issuer + name: {{ .Values.certIssuer.name }} spec: - acme: - # The ACME server URL - server: https://acme-v02.api.letsencrypt.org/directory - #server: https://acme-staging-v02.api.letsencrypt.org/directory - # Email address used for ACME registration - email: - # Name of a secret used to store the ACME account private key - privateKeySecretRef: - name: letsencrypt-cluster-issuer-key - # Enable the HTTP-01 challenge provider - solvers: - - http01: - ingress: - class: nginx + acme: + # The ACME server URL + server: {{ .Values.certIssuer.server | quote }} + # Email address used for ACME registration + email: {{ .Values.certIssuer.email | quote }} + # Name of a secret used to store the ACME account private key + privateKeySecretRef: + name: {{ .Values.certIssuer.privateKeySecretName }} + # Enable the HTTP-01 challenge provider + solvers: + - http01: + ingress: + class: {{ .Values.certIssuer.ingressClass | quote }} diff --git a/infrastructure/server-setup/base-setup/values.yaml b/infrastructure/server-setup/base-setup/values.yaml index 2f6f1764..4c1620d7 100644 --- a/infrastructure/server-setup/base-setup/values.yaml +++ b/infrastructure/server-setup/base-setup/values.yaml @@ -1,6 +1,14 @@ cert-manager: installCRDs: false +certIssuer: + name: letsencrypt-cluster-issuer + server: https://acme-v02.api.letsencrypt.org/directory + # Use a real email address for ACME account registration. + email: "" + privateKeySecretName: letsencrypt-cluster-issuer-key + ingressClass: nginx + # Configuration overrides for the official ingress-nginx chart dependency. # Pin the controller image tag to v1.13.3 (matching the script installation). ingress-nginx: diff --git a/infrastructure/server-setup/rag-setup/templates/external-secrets.yaml b/infrastructure/server-setup/rag-setup/templates/external-secrets.yaml index f6ecad84..c78a40f5 100644 --- a/infrastructure/server-setup/rag-setup/templates/external-secrets.yaml +++ b/infrastructure/server-setup/rag-setup/templates/external-secrets.yaml @@ -212,10 +212,22 @@ spec: name: rag-redis creationPolicy: Owner data: + - secretKey: REDIS_USERNAME + remoteRef: + key: {{ $remoteKey }} + property: REDIS_USERNAME - secretKey: REDIS_PASSWORD remoteRef: key: {{ $remoteKey }} property: REDIS_PASSWORD + - secretKey: USECASE_KEYVALUE_USERNAME + remoteRef: + key: {{ $remoteKey }} + property: REDIS_USERNAME + - secretKey: USECASE_KEYVALUE_PASSWORD + remoteRef: + key: {{ $remoteKey }} + property: REDIS_PASSWORD --- apiVersion: external-secrets.io/v1 kind: ExternalSecret diff --git a/infrastructure/server-setup/rag-setup/values.yaml b/infrastructure/server-setup/rag-setup/values.yaml index 1d1f916b..c011bd52 100644 --- a/infrastructure/server-setup/rag-setup/values.yaml +++ b/infrastructure/server-setup/rag-setup/values.yaml @@ -2,10 +2,844 @@ features: rag: enabled: true externalSecrets: - # Keep disabled by default so dev/Tilt workflows can use the rag chart directly. + # Enabled by default because rag values in this file reference ESO-managed secrets. + # Disable only if you provide those secrets by another mechanism. + enabled: true + +rag: + # Production-focused defaults for the wrapped rag chart. + # Replace placeholder domains/endpoints with your environment values. + global: + security: + # -- Allow insecure images to use bitnami legacy repository. Can be set to false if secure images are being used (Paid). + allowInsecureImages: false + + features: + ollama: + enabled: false + minio: + enabled: false + langfuse: + enabled: true + qdrant: + enabled: true + frontend: + enabled: true + keydb: + enabled: false + mcp: + enabled: true + + backend: + + mcp: + name: "mcp" + port: "8000" + host: "0.0.0.0" + + # Chat simple tool configuration + # The following configuration for the chat_simple tool will render as follows: + # """Send a message to the RAG system and get a simple text response. + + # This is the simplest way to interact with the RAG system - just provide a message and get back the answer as plain text. + # + # Parameters + # ---------- + # session_id : str + # Unique identifier for the chat session. + # message : str + # The message/question to ask the RAG system. + # + # Returns + # ------- + # str + # The answer from the RAG system as plain text. + # """ + chatSimpleDescription: "Send a message to the RAG system and get a simple text response.\n\nThis is the simplest way to interact with the RAG system - just provide a message and get back the answer as plain text." + chatSimpleParameterDescriptions: + session_id: "Unique identifier for the chat session." + message: "The message/question to ask the RAG system." + chatSimpleReturns: "The answer from the RAG system as plain text." + chatSimpleNotes: "" + # If you add a Value to chatSimpleNotes e.g. "This tool is best for simple questions that don't require conversation context." + # it will render to: + # Notes + # ----- + # This tool is best for simple questions that don't require conversation context. + chatSimpleExamples: "" + # If you add a Value to chatSimpleExamples e.g. "chat_simple(session_id='my-session', message='What is the main topic of the document?')" + # it will render to: + # Examples + # -------- + # chat_simple(session_id='my-session', message='What is the main topic of the document?') + + # Chat with history tool configuration + chatWithHistoryDescription: "Send a message with conversation history and get structured response.\n\nProvide conversation history as a simple list of dictionaries.\nEach history item should have 'role' (either 'user' or 'assistant') and 'message' keys." + chatWithHistoryParameterDescriptions: + session_id: "Unique identifier for the chat session." + message: "The current message/question to ask." + history: "Previous conversation history. Each item should be:\n {\"role\": \"user\" or \"assistant\", \"message\": \"the message text\"}" + chatWithHistoryReturns: "Response containing:\n - answer: The response text\n - finish_reason: Why the response ended\n - citations: List of source documents used (simplified)" + chatWithHistoryNotes: "" + chatWithHistoryExamples: "" + + image: + repository: ghcr.io/stackitcloud/rag-template/mcp-server + pullPolicy: Always + tag: "" + + name: backend + replicaCount: 1 + + image: + repository: ghcr.io/stackitcloud/rag-template/rag-backend + pullPolicy: Always + tag: "" + + command: + - "poetry" + - "run" + args: + - "python" + - "-m" + - "uvicorn" + - "main:perfect_rag_app" + - "--host" + - "0.0.0.0" + - "--port" + - "8080" + - "--loop" + - "asyncio" + + # Note: Each uvicorn worker is a separate Python process and can significantly + # increase memory usage. + workers: 3 + wsMaxQueue: 6 + + debugArgs: + - "python" + - "-Xfrozen_modules=off" + - "-m" + - "debugpy" + - "--wait-for-client" + - "--listen" + - "0.0.0.0:31415" + - "-m" + - "uvicorn" + - "main:perfect_rag_app" + - "--host" + - "0.0.0.0" + - "--port" + - "8080" + - "--reload" + - "--reload-dir" + - "/app/services/rag-backend" + - "--reload-dir" + - "/app/libs/rag-core-api" + - "--reload-dir" + - "/app/libs/rag-core-lib" + - "--loop" + - "asyncio" + + service: + type: ClusterIP + port: 8080 + annotations: {} + + pythonPathEnv: + PYTHONPATH: src + + ingress: + enabled: true + host: + name: rag.example.com + path: /api/chat(/|$)(.*) + pathType: ImplementationSpecific + port: 8080 + + secrets: + langfuse: + publicKey: + value: "" + secretKeyRef: + name: "rag-app-secrets" + key: "LANGFUSE_PUBLIC_KEY" + secretKey: + value: "" + secretKeyRef: + name: "rag-app-secrets" + key: "LANGFUSE_SECRET_KEY" + stackitEmbedder: + apiKey: + value: "" + secretKeyRef: + name: "rag-app-secrets" + key: "STACKIT_EMBEDDER_API_KEY" + stackitVllm: + apiKey: + value: "" + secretKeyRef: + name: "rag-app-secrets" + key: "STACKIT_VLLM_API_KEY" + ragas: + openaiApikey: + value: "" + secretKeyRef: + name: "rag-app-secrets" + key: "RAGAS_OPENAI_API_KEY" + + envs: + stackitVllm: + STACKIT_VLLM_MODEL: openai/gpt-oss-120b + STACKIT_VLLM_BASE_URL: https://api.openai-compat.model-serving.eu01.onstackit.cloud/v1 + database: + VECTOR_DB_COLLECTION_NAME: rag-db + VECTOR_DB_LOCATION: http://rag-qdrant:6333 + VECTOR_DB_VALIDATE_COLLECTION_CONFIG: false + retriever: + RETRIEVER_THRESHOLD: 0.3 + RETRIEVER_K_DOCUMENTS: 10 + # Canonical global cap across all retrievers. Replaces legacy RETRIEVER_TOTAL_K / RETRIEVER_OVERALL_K_DOCUMENTS + RETRIEVER_TOTAL_K_DOCUMENTS: 7 + RETRIEVER_SUMMARY_THRESHOLD: 0.3 + RETRIEVER_SUMMARY_K_DOCUMENTS: 10 + RETRIEVER_TABLE_THRESHOLD: 0.3 + RETRIEVER_TABLE_K_DOCUMENTS: 10 + RETRIEVER_IMAGE_THRESHOLD: 0.7 + RETRIEVER_IMAGE_K_DOCUMENTS: 10 + errorMessages: + ERROR_MESSAGES_NO_DOCUMENTS_MESSAGE: "I'm sorry, my responses are limited. You must ask the right questions." + ERROR_MESSAGES_NO_OR_EMPTY_COLLECTION: "No documents were provided for searching." + ERROR_MESSAGES_HARMFUL_QUESTION: "I'm sorry, but harmful requests cannot be processed." + ERROR_MESSAGES_NO_ANSWER_FOUND: "I'm sorry, I couldn't find an answer with the context provided." + ERROR_MESSAGE_EMPTY_MESSAGE: "I'm sorry, but I can't answer an empty question." + langfuse: + LANGFUSE_DATASET_NAME: "rag_test_ds" + LANGFUSE_DATASET_FILENAME: "/app/test_data.json" + LANGFUSE_HOST: "http://rag-langfuse-web:3000" #NOTE: http protocol needs to be defined! + ragClassTypes: + RAG_CLASS_TYPE_LLM_TYPE: "stackit" + ragas: + RAGAS_IS_DEBUG: false + RAGAS_MODEL: "gpt-4o-mini" + RAGAS_USE_OPENAI: true + RAGAS_TIMEOUT: 60 + RAGAS_EVALUATION_DATASET_NAME: "eval-data" + RAGAS_MAX_CONCURRENCY: "5" + embedderClassTypes: + EMBEDDER_CLASS_TYPE_EMBEDDER_TYPE: "stackit" + stackitEmbedder: + STACKIT_EMBEDDER_MODEL: "Qwen/Qwen3-VL-Embedding-8B" + STACKIT_EMBEDDER_BASE_URL: https://api.openai-compat.model-serving.eu01.onstackit.cloud/v1 + # Retry settings (optional). If omitted, fall back to shared RETRY_DECORATOR_* values. + STACKIT_EMBEDDER_MAX_RETRIES: "5" + STACKIT_EMBEDDER_RETRY_BASE_DELAY: "0.5" + STACKIT_EMBEDDER_RETRY_MAX_DELAY: "600" + STACKIT_EMBEDDER_BACKOFF_FACTOR: "2" + STACKIT_EMBEDDER_ATTEMPT_CAP: "6" + STACKIT_EMBEDDER_JITTER_MIN: "0.05" + STACKIT_EMBEDDER_JITTER_MAX: "0.25" + ollama: + OLLAMA_MODEL: "llama3.2:3b-instruct-fp16" + OLLAMA_BASE_URL: "http://rag-ollama:11434" + OLLAMA_TOP_K: 0 + OLLAMA_TOP_P: 0 + OLLAMA_TEMPERATURE: 0 + ollamaEmbedder: + OLLAMA_EMBEDDER_MODEL: "bge-m3" + OLLAMA_EMBEDDER_BASE_URL: "http://rag-ollama:11434" + fakeEmbedder: + FAKE_EMBEDDER_SIZE: 386 + reranker: + RERANKER_K_DOCUMENTS: 5 + RERANKER_MIN_RELEVANCE_SCORE: 0.001 + RERANKER_ENABLED: true + RERANKER_MODEL: "ms-marco-MultiBERT-L-12" + chatHistory: + CHAT_HISTORY_LIMIT: 4 + CHAT_HISTORY_REVERSE: true + + frontend: + name: frontend + replicaCount: 1 + image: + repository: ghcr.io/stackitcloud/rag-template/frontend + pullPolicy: Always + tag: "" + + service: + type: ClusterIP + port: 8080 + + ingress: + enabled: true + host: + name: rag.example.com + path: / + pathType: ImplementationSpecific + port: 8080 + + envs: + vite: + VITE_CHAT_AUTH_ENABLED: true + VITE_API_URL: "https://rag.example.com/api" + VITE_CHAT_URL: "https://rag.example.com" + VITE_ADMIN_URL: "https://admin.rag.example.com" + VITE_ADMIN_API_URL: "https://admin.rag.example.com/api" + + adminBackend: + replicaCount: 1 + + name: admin-backend + + image: + repository: ghcr.io/stackitcloud/rag-template/admin-backend + pullPolicy: Always + tag: "" + + command: + - "poetry" + - "run" + args: + - "python" + - "-m" + - "uvicorn" + - "main:perfect_admin_app" + - "--host" + - "0.0.0.0" + - "--port" + - "8080" + - "--root-path" + - "/api" + debugArgs: + - "python" + - "-Xfrozen_modules=off" + - "-m" + - "debugpy" + - "--wait-for-client" + - "--listen" + - "0.0.0.0:31415" + - "-m" + - "uvicorn" + - "main:perfect_admin_app" + - "--host" + - "0.0.0.0" + - "--port" + - "8080" + - "--reload" + - "--reload-dir" + - "/app/services/admin-backend" + - "--reload-dir" + - "/app/libs/rag-core-lib" + - "--reload-dir" + - "/app/libs/admin-api-lib" + - "--root-path" + - "/api" + + service: + type: ClusterIP + port: 8080 + + pythonPathEnv: + PYTHONPATH: src + + ingress: + enabled: true + host: + name: admin.rag.example.com + path: /api(/|$)(.*) + pathType: ImplementationSpecific + port: 8080 + + secrets: + keyValueStore: + username: + value: "" + secretKeyRef: + name: "rag-redis" + key: "USECASE_KEYVALUE_USERNAME" + password: + value: "" + secretKeyRef: + name: "rag-redis" + key: "USECASE_KEYVALUE_PASSWORD" + + envs: + summarizer: + SUMMARIZER_MAXIMUM_INPUT_SIZE: "8000" + SUMMARIZER_MAXIMUM_CONCURRENCY: "10" + # Retry settings (optional). If omitted, fall back to shared RETRY_DECORATOR_* values. + SUMMARIZER_MAX_RETRIES: "5" + SUMMARIZER_RETRY_BASE_DELAY: "0.5" + SUMMARIZER_RETRY_MAX_DELAY: "600" + SUMMARIZER_BACKOFF_FACTOR: "2" + SUMMARIZER_ATTEMPT_CAP: "6" + SUMMARIZER_JITTER_MIN: "0.05" + SUMMARIZER_JITTER_MAX: "0.25" + ragapi: + RAG_API_HOST: "http://backend:8080" + chunker: + # Select which chunker implementation to use. Supported values: "semantic", "recursive" + # Defaults to "semantic" which leverages sentence-aware rebalancing. + CHUNKER_CLASS_TYPE_CHUNKER_TYPE: "recursive" + CHUNKER_MAX_SIZE: 1000 + CHUNKER_OVERLAP: 100 + # The following settings for the Chunker are only used when CHUNKER_CLASS_TYPE_CHUNKER_TYPE is set to "semantic". + CHUNKER_BREAKPOINT_THRESHOLD_TYPE: "percentile" + CHUNKER_BREAKPOINT_THRESHOLD_AMOUNT: 95 + CHUNKER_BUFFER_SIZE: 1 + CHUNKER_MIN_SIZE: 200 + keyValueStore: + USECASE_KEYVALUE_PORT: 6379 + USECASE_KEYVALUE_HOST: "redis.example.com" + USECASE_KEYVALUE_USE_SSL: true + USECASE_KEYVALUE_SSL_CERT_REQS: "" + USECASE_KEYVALUE_SSL_CA_CERTS: "" + USECASE_KEYVALUE_SSL_CERTFILE: "" + USECASE_KEYVALUE_SSL_KEYFILE: "" + USECASE_KEYVALUE_SSL_CHECK_HOSTNAME: true + sourceUploader: + # Large sitemap ingestions (per-page summaries) can take > 1 hour. + SOURCE_UPLOADER_TIMEOUT: 3600 + + extractor: + replicaCount: 1 + name: extractor + image: + repository: ghcr.io/stackitcloud/rag-template/document-extractor + pullPolicy: Always + tag: "" + + command: + - "poetry" + - "run" + args: + - "python" + - "-m" + - "uvicorn" + - "main:perfect_extractor_app" + - "--host" + - "0.0.0.0" + - "--port" + - "8080" + debugArgs: + - "python" + - "-Xfrozen_modules=off" + - "-m" + - "debugpy" + - "--wait-for-client" + - "--listen" + - "0.0.0.0:31415" + - "-m" + - "uvicorn" + - "main:perfect_extractor_app" + - "--host" + - "0.0.0.0" + - "--port" + - "8080" + - "--reload" + - "--reload-dir" + - "/app/services/document-extractor" + - "--reload-dir" + - "/app/libs/extractor-api-lib" + + service: + type: ClusterIP + port: 8080 + + pythonPathEnv: + PYTHONPATH: src + huggingfaceCacheDir: /tmp/hf-cache + # Directory inside the container to use as writable cache for ModelScope / OCR models + modelscopeCacheDir: /var/modelscope + + envs: + sitemap: + # Controls how HTML pages are parsed when loading from an XML sitemap. + # Options: "docusaurus" (default), "astro", "generic" + # Note: https://docs.stackit.cloud is built with Astro/Starlight -> use "astro". + SITEMAP_PARSER: docusaurus + + adminFrontend: + name: admin-frontend + replicaCount: 1 + image: + repository: ghcr.io/stackitcloud/rag-template/admin-frontend + pullPolicy: Always + tag: "" + + service: + type: ClusterIP + port: 8080 + + exports: + chart_name: + adminFrontendChartName: admin-frontend + + ingress: + enabled: true + host: + name: admin.rag.example.com + path: / + pathType: ImplementationSpecific + port: 8080 + + shared: + # These values are used across all templates + ssl: true + + debug: + backend: + enabled: false + + imagePullSecret: + # create: false + # name: cr-credentials + # auths: + # username: github-username # replace with your github username + # pat: github-pat # replace with your github personal access token + # email: email-address@domain.de # replace with your email address + # registry: ghcr.io + + config: + dns: + - rag.example.com + - admin.rag.example.com + basicAuth: + enabled: true + tls: + enabled: true + host: rag.example.com + secretName: tls-certificate + issuerName: letsencrypt-cluster-issuer + issuerKind: ClusterIssuer + + secrets: + basicAuth: + auth: + value: "" # Optional: precomputed htpasswd line (e.g., "user:$apr1$..."); overrides inline user/pass hashing. + secretKeyRef: + name: "basic-auth" + key: "auth" + user: + value: "" + secretKeyRef: + name: "" + key: "BASIC_AUTH_USER" + password: + value: "" + secretKeyRef: + name: "" + key: "BASIC_AUTH_PASSWORD" + s3: + accessKey: + value: "" + secretKeyRef: + name: "rag-app-secrets" + key: "S3_ACCESS_KEY_ID" + secretKey: + value: "" + secretKeyRef: + name: "rag-app-secrets" + key: "S3_SECRET_ACCESS_KEY" + usecaseExistingSecretName: "rag-app-secrets" # Optional: existing Secret name (e.g., from ESO). If set, chart will not create usecase secret. + usecase: {} + + + envs: + s3: + S3_ENDPOINT: https://s3.example.com + S3_BUCKET: documents + retryDecorator: + RETRY_DECORATOR_MAX_RETRIES: "5" + RETRY_DECORATOR_RETRY_BASE_DELAY: "0.5" + RETRY_DECORATOR_RETRY_MAX_DELAY: "600" + RETRY_DECORATOR_BACKOFF_FACTOR: "2" + RETRY_DECORATOR_ATTEMPT_CAP: "6" + RETRY_DECORATOR_JITTER_MIN: "0.05" + RETRY_DECORATOR_JITTER_MAX: "0.25" + usecase: + + + langfuse: + # Core Langfuse Configuration + langfuse: + # Used to hash API keys + salt: + value: "" + secretKeyRef: + name: "rag-app-secrets" + key: "LANGFUSE_SALT" + + # Authentication settings + features: + telemetryEnabled: true + signUpDisabled: false + + # Web deployment configuration + web: + image: + repository: langfuse/langfuse + tag: "3.152.0" + pullPolicy: Always + + # Worker deployment configuration + worker: + image: + repository: langfuse/langfuse-worker + tag: "3.152.0" + pullPolicy: Always + port: 3030 + + # NextAuth configuration + nextauth: + # Set to your public Langfuse URL when exposing Langfuse. + url: https://langfuse.example.com + secret: + value: "" + secretKeyRef: + name: "rag-app-secrets" + key: "LANGFUSE_NEXTAUTH" + + # Additional environment variables (only for init values) + additionalEnv: + - name: LANGFUSE_INIT_ORG_ID + valueFrom: + secretKeyRef: + name: "langfuse-init-secrets" + key: "LANGFUSE_INIT_ORG_ID" + - name: LANGFUSE_INIT_PROJECT_ID + valueFrom: + secretKeyRef: + name: "langfuse-init-secrets" + key: "LANGFUSE_INIT_PROJECT_ID" + - name: LANGFUSE_INIT_PROJECT_PUBLIC_KEY + valueFrom: + secretKeyRef: + name: "langfuse-init-secrets" + key: "LANGFUSE_INIT_PROJECT_PUBLIC_KEY" + - name: LANGFUSE_INIT_PROJECT_SECRET_KEY + valueFrom: + secretKeyRef: + name: "langfuse-init-secrets" + key: "LANGFUSE_INIT_PROJECT_SECRET_KEY" + - name: LANGFUSE_INIT_USER_EMAIL + valueFrom: + secretKeyRef: + name: "langfuse-init-secrets" + key: "LANGFUSE_INIT_USER_EMAIL" + - name: LANGFUSE_INIT_USER_NAME + valueFrom: + secretKeyRef: + name: "langfuse-init-secrets" + key: "LANGFUSE_INIT_USER_NAME" + - name: LANGFUSE_INIT_USER_PASSWORD + valueFrom: + secretKeyRef: + name: "langfuse-init-secrets" + key: "LANGFUSE_INIT_USER_PASSWORD" + + # Additional init containers + # Generated prod overrides add a wait-for-postgres init container with the real host/port. + extraInitContainers: [] + + # PostgreSQL Configuration (use external PostgreSQL) + postgresql: + deploy: true + host: "rag-postgresql" + port: 5432 + image: + repository: bitnami/postgresql + auth: + username: postgres + password: "" + database: langfuse + existingSecret: "rag-postgres" # NOTE: for production use existing secret to fetch password + secretKeys: + userPasswordKey: "POSTGRES_PASSWORD" + + # Redis Configuration (external KeyDB) + redis: + deploy: false + host: "redis.example.com" + port: 6379 + auth: + # Generated automatically by infrastructure/terraform/scripts/generate-rag-setup-prod-values.sh + username: "<>" + password: "" + existingSecret: "rag-redis" # NOTE: for production use existing secret to fetch password + existingSecretPasswordKey: "REDIS_PASSWORD" + + # ClickHouse Configuration (external ClickHouse) + clickhouse: + deploy: true + host: "rag-clickhouse" + httpPort: 8123 + nativePort: 9000 + image: + repository: bitnami/clickhouse + auth: + username: "default" + password: "" + existingSecret: "rag-clickhouse" # NOTE: for production use existing secret to fetch password + existingSecretKey: "CLICKHOUSE_PASSWORD" + migration: + url: "clickhouse://rag-clickhouse:9000" + ssl: false + autoMigrate: true + resources: + limits: + cpu: "2" + memory: "8Gi" + requests: + cpu: "2" + memory: "4Gi" + + zookeeper: + image: + repository: bitnami/zookeeper + resources: + limits: + cpu: "2" + memory: "2Gi" + requests: + cpu: "1" + memory: "1Gi" + + valkey: + image: + repository: bitnami/valkey + + # S3/MinIO Configuration (external MinIO) + s3: + deploy: false + bucket: "langfuse" + region: "auto" + endpoint: "https://s3.example.com" + forcePathStyle: true + accessKeyId: + value: "" + secretKeyRef: + name: "rag-app-secrets" + key: "S3_ACCESS_KEY_ID" + secretAccessKey: + value: "" + secretKeyRef: + name: "rag-app-secrets" + key: "S3_SECRET_ACCESS_KEY" + eventUpload: + enabled: true + bucket: "langfuse" + region: "auto" + endpoint: "https://s3.example.com" + forcePathStyle: true + accessKeyId: + value: "" + secretKeyRef: + name: "rag-app-secrets" + key: "S3_ACCESS_KEY_ID" + secretAccessKey: + value: "" + secretKeyRef: + name: "rag-app-secrets" + key: "S3_SECRET_ACCESS_KEY" + + # Optional: enforce a ClickHouse TTL for Langfuse traces without Enterprise data retention management. + # This runs as a CronJob and applies idempotent ALTER TABLE ... MODIFY TTL commands. + langfuseRetention: enabled: false + retentionDays: 365 + schedule: "15 */6 * * *" + podSecurityContext: + runAsUser: 1001 + runAsNonRoot: true + securityContext: + allowPrivilegeEscalation: false + # Optional resources for both retention CronJobs. + # Example: + # resources: + # requests: + # cpu: 100m + # memory: 128Mi + # limits: + # cpu: 500m + # memory: 512Mi + resources: {} + # Optional deterministic deletion in addition to TTL. + # Uses ALTER TABLE ... DELETE WHERE ... and can run nightly. + hardDelete: + enabled: false + schedule: "30 3 * * *" + # ClickHouse mutations_sync setting: + # 0 = async (default), 1 = wait for local completion, 2 = wait for all replicas. + mutationSync: 0 + image: + repository: "bitnami/clickhouse" + tag: "25.7.5-debian-12-r0" + pullPolicy: IfNotPresent + clickhouse: + # Connection/auth are taken from langfuse.clickhouse.*. + # Align this with the database Langfuse actually uses in ClickHouse. + database: "default" + # Set to true only for clustered ClickHouse deployments where clusterName exists. + # Keep false for single-node/non-clustered deployments. + onCluster: false + clusterName: "default" + tables: + # timestampColumn should be a Date/DateTime/DateTime64 column in the target table. + - name: "traces" + timestampColumn: "timestamp" + - name: "observations" + timestampColumn: "event_ts" + - name: "scores" + timestampColumn: "timestamp" + + minio: + image: + repository: bitnami/minio + auth: + ## @param auth.rootUser MinIO® root username + ## + rootUser: admin + ## @param auth.rootPassword Password for MinIO® root user + ## + rootPassword: "adminpassword" + ## @param defaultBuckets Comma, semi-colon or space separated list of buckets to create at initialization (only in standalone mode) + ## e.g: + ## defaultBuckets: "my-bucket, my-second-bucket" + ## + defaultBuckets: "documents,langfuse" + networkPolicy: + enabled: false + mode: standalone + + ollama: + image: + tag: 0.12.3 + ollama: + models: + pull: + - llama3.2:3b-instruct-fp16 + - bge-m3 + runs: + - llama3.2:3b-instruct-fp16 + - bge-m3 + + qdrant: + image: + tag: v1.15.4 + + keydb: + multiMaster: "no" + activeReplicas: "no" + nodes: 1 + # Authentication for the bundled KeyDB chart and the admin backend client. + password: "" # Inline password for development. Leave empty to disable auth or prefer existingSecret for production. + existingSecret: "" # Optional existing secret containing the KeyDB password. + existingSecretPasswordKey: "password" + auth: + username: "default" -rag: {} # Values for the external-secrets Helm dependency. external-secrets: @@ -23,11 +857,13 @@ externalSecrets: name: "stackit-vault" kind: "ClusterSecretStore" server: "https://prod.sm.eu01.stackit.cloud" + # Generated automatically by infrastructure/terraform/scripts/generate-rag-setup-prod-values.sh path: "<>" version: "v2" auth: userPass: path: "userpass" + # Generated automatically by infrastructure/terraform/scripts/generate-rag-setup-prod-values.sh username: "<>" secretRef: name: "stackit-vault-userpass" diff --git a/infrastructure/terraform/README.md b/infrastructure/terraform/README.md index a172191a..18f29797 100644 --- a/infrastructure/terraform/README.md +++ b/infrastructure/terraform/README.md @@ -71,6 +71,22 @@ Important: The sa_key.json file contains sensitive credentials. Never commit it terraform apply rag.tfplan ``` +6. **Run one-command production deployment (recommended)** + ```bash + cd .. + ./scripts/deploy-rag-prod.sh --issuer-email you@example.com --auto-approve + ``` + This wraps Terraform + Helm orchestration end-to-end. + +7. **Or only generate Helm overrides for rag-setup** + ```bash + ./scripts/generate-rag-setup-prod-values.sh \ + --output ../server-setup/rag-setup/values.prod.auto.yaml + ``` + This command reads Terraform outputs and writes a production override for the wrapper chart. + It pre-fills DNS-based hostnames, Secrets Manager path/user, Postgres connection values, + Redis host/port/username, and object storage endpoint/buckets. + ## Single-Node SKE Configuration The SKE cluster is configured with a single node setup which is suitable for development and testing: diff --git a/infrastructure/terraform/object_storage.tf b/infrastructure/terraform/object_storage.tf index 58735569..f5bb11ea 100644 --- a/infrastructure/terraform/object_storage.tf +++ b/infrastructure/terraform/object_storage.tf @@ -43,3 +43,15 @@ output "object_storage_secret_key" { output "object_storage_bucket" { value = stackit_objectstorage_bucket.tfstate.name } + +output "object_storage_documents_bucket" { + value = stackit_objectstorage_bucket.documents.name +} + +output "object_storage_langfuse_bucket" { + value = stackit_objectstorage_bucket.langfuse.name +} + +output "object_storage_endpoint" { + value = "https://object.storage.${var.region}.onstackit.cloud" +} diff --git a/infrastructure/terraform/scripts/generate-rag-setup-prod-values.sh b/infrastructure/terraform/scripts/generate-rag-setup-prod-values.sh new file mode 100755 index 00000000..b8e0d3f0 --- /dev/null +++ b/infrastructure/terraform/scripts/generate-rag-setup-prod-values.sh @@ -0,0 +1,236 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +TF_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" +OUTPUT_FILE="-" +STATE_FILE="" + +usage() { + cat <<'EOF' +Generate a production override values file for the rag-setup Helm chart from Terraform outputs. + +Usage: + ./scripts/generate-rag-setup-prod-values.sh [--output ] [--state-file ] [--terraform-dir ] + +Options: + --output Write YAML to this file. Use "-" (default) for stdout. + --state-file Read outputs from a local Terraform state file (JSON) with jq. + --terraform-dir Terraform root directory (default: infrastructure/terraform). + +Environment overrides: + RAG_HOST Default: rag. + ADMIN_HOST Default: admin. + LANGFUSE_HOST Default: langfuse. + S3_ENDPOINT Default: object_storage_endpoint output + DOCUMENTS_BUCKET Default: object_storage_documents_bucket output + LANGFUSE_BUCKET Default: object_storage_langfuse_bucket output +EOF +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --output) + OUTPUT_FILE="${2:-}" + shift 2 + ;; + --state-file) + STATE_FILE="${2:-}" + shift 2 + ;; + --terraform-dir) + TF_DIR="${2:-}" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + usage >&2 + exit 1 + ;; + esac +done + +if [[ -n "${STATE_FILE}" ]]; then + if ! command -v jq >/dev/null 2>&1; then + echo "jq is required when --state-file is used." >&2 + exit 1 + fi + if [[ ! -f "${STATE_FILE}" ]]; then + echo "State file not found: ${STATE_FILE}" >&2 + exit 1 + fi +else + if ! command -v terraform >/dev/null 2>&1; then + echo "terraform is required when --state-file is not used." >&2 + exit 1 + fi +fi + +get_output() { + local key="$1" + if [[ -n "${STATE_FILE}" ]]; then + local value + value="$(jq -er --arg key "${key}" '.outputs[$key].value // empty' "${STATE_FILE}" 2>/dev/null || true)" + if [[ -n "${value}" && "${value}" != "null" ]]; then + printf '%s\n' "${value}" + return 0 + fi + + case "${key}" in + redis_host) + jq -er '.resources[] | select(.type=="stackit_redis_credential" and .name=="rag_redis_cred") | .instances[0].attributes.host' "${STATE_FILE}" + ;; + redis_port) + jq -er '.resources[] | select(.type=="stackit_redis_credential" and .name=="rag_redis_cred") | .instances[0].attributes.port' "${STATE_FILE}" + ;; + redis_username) + jq -er '.resources[] | select(.type=="stackit_redis_credential" and .name=="rag_redis_cred") | .instances[0].attributes.username' "${STATE_FILE}" + ;; + object_storage_documents_bucket) + jq -er '.resources[] | select(.type=="stackit_objectstorage_bucket" and .name=="documents") | .instances[0].attributes.name' "${STATE_FILE}" + ;; + object_storage_langfuse_bucket) + jq -er '.resources[] | select(.type=="stackit_objectstorage_bucket" and .name=="langfuse") | .instances[0].attributes.name' "${STATE_FILE}" + ;; + object_storage_endpoint) + local region + region="$(jq -er '.resources[] | select(.type=="stackit_objectstorage_bucket" and .name=="documents") | .instances[0].attributes.region' "${STATE_FILE}" 2>/dev/null || true)" + if [[ -z "${region}" || "${region}" == "null" ]]; then + region="${STACKIT_REGION:-eu01}" + fi + printf 'https://object.storage.%s.onstackit.cloud\n' "${region}" + ;; + *) + echo "Missing output '${key}' in ${STATE_FILE}. Run terraform apply to refresh outputs." >&2 + return 1 + ;; + esac + else + terraform -chdir="${TF_DIR}" output -raw "${key}" + fi +} + +dns_name="$(get_output dns_name)" +dns_name="${dns_name%.}" +secretsmanager_instance_id="$(get_output secretsmanager_instance_id)" +secretsmanager_username="$(get_output secretsmanager_username)" +postgres_host="$(get_output postgres_host)" +postgres_port="$(get_output postgres_port)" +postgres_username="$(get_output postgres_username)" +postgres_database="$(get_output postgres_database)" +redis_host="$(get_output redis_host)" +redis_port="$(get_output redis_port)" +redis_username="$(get_output redis_username)" +object_storage_endpoint="$(get_output object_storage_endpoint)" +documents_bucket="$(get_output object_storage_documents_bucket)" +langfuse_bucket="$(get_output object_storage_langfuse_bucket)" + +rag_host="${RAG_HOST:-rag.${dns_name}}" +admin_host="${ADMIN_HOST:-admin.${dns_name}}" +langfuse_host="${LANGFUSE_HOST:-langfuse.${dns_name}}" +s3_endpoint="${S3_ENDPOINT:-${object_storage_endpoint}}" +documents_bucket="${DOCUMENTS_BUCKET:-${documents_bucket}}" +langfuse_bucket="${LANGFUSE_BUCKET:-${langfuse_bucket}}" + +render_yaml() { + cat < "${OUTPUT_FILE}" + echo "Wrote ${OUTPUT_FILE}" >&2 +fi diff --git a/infrastructure/terraform/seed-secrets/README.md b/infrastructure/terraform/seed-secrets/README.md index 502d5d7c..8e373c0f 100644 --- a/infrastructure/terraform/seed-secrets/README.md +++ b/infrastructure/terraform/seed-secrets/README.md @@ -14,6 +14,7 @@ Why this is a separate step: - `vault_username`/`vault_password` are from the `secretsmanager_*` outputs. - `rag_secrets` should include all keys referenced by your ExternalSecret resources. For the cert-manager webhook, store the service account key JSON under `STACKIT_CERT_MANAGER_SA_JSON` (use a heredoc to avoid escaping). + - Optional: `rag_secrets_overrides` can be used to force specific keys (for example infra-derived PostgreSQL/Redis/S3 values). 3. Run: ```bash terraform init @@ -21,6 +22,13 @@ Why this is a separate step: terraform apply ``` +When using `infrastructure/scripts/deploy-rag-prod.sh`, the script passes `rag_secrets_overrides` automatically for: +- `POSTGRES_PASSWORD` +- `REDIS_USERNAME` +- `REDIS_PASSWORD` +- `S3_ACCESS_KEY_ID` +- `S3_SECRET_ACCESS_KEY` + ## Security note All values written by `vault_kv_secret_v2` are stored in Terraform state. Use a secure backend and restrict access. diff --git a/infrastructure/terraform/seed-secrets/main.tf b/infrastructure/terraform/seed-secrets/main.tf index 463c7421..7ea286c1 100644 --- a/infrastructure/terraform/seed-secrets/main.tf +++ b/infrastructure/terraform/seed-secrets/main.tf @@ -22,5 +22,5 @@ provider "vault" { resource "vault_kv_secret_v2" "rag_docs" { mount = var.vault_mount_path name = var.vault_secret_name - data_json = jsonencode(var.rag_secrets) + data_json = jsonencode(merge(var.rag_secrets, var.rag_secrets_overrides)) } diff --git a/infrastructure/terraform/seed-secrets/terraform.tfvars.example b/infrastructure/terraform/seed-secrets/terraform.tfvars.example index 0ae4ec2b..32f95c42 100644 --- a/infrastructure/terraform/seed-secrets/terraform.tfvars.example +++ b/infrastructure/terraform/seed-secrets/terraform.tfvars.example @@ -20,7 +20,10 @@ rag_secrets = { BASIC_AUTH_USER = "<>" BASIC_AUTH_PASSWORD = "<>" BASIC_AUTH = "<>" + # The production deploy script auto-populates the following from Terraform outputs: + # POSTGRES_PASSWORD, REDIS_USERNAME, REDIS_PASSWORD, S3_ACCESS_KEY_ID, S3_SECRET_ACCESS_KEY POSTGRES_PASSWORD = "<>" + REDIS_USERNAME = "<>" REDIS_PASSWORD = "<>" CLICKHOUSE_PASSWORD = "<>" STACKIT_CERT_MANAGER_SA_JSON = < Date: Wed, 18 Feb 2026 11:52:35 +0100 Subject: [PATCH 5/8] feat: enhance deployment scripts and documentation for external secrets integration --- infrastructure/README.md | 8 +- infrastructure/scripts/deploy-rag-prod.sh | 235 ++++++++++++++++-- .../server-setup/rag-setup/Chart.lock | 8 +- .../server-setup/rag-setup/Chart.yaml | 4 +- .../server-setup/rag-setup/values.yaml | 4 +- infrastructure/terraform/README.md | 18 +- .../terraform/scripts/init-backend.sh | 96 ++++++- .../terraform/seed-secrets/README.md | 32 ++- .../seed-secrets/terraform.tfvars.example | 42 ++-- .../terraform/terraform.tfvars.example | 10 + rag-core-library | 1 + rag-infrastructure | 1 + 12 files changed, 400 insertions(+), 59 deletions(-) create mode 160000 rag-core-library create mode 160000 rag-infrastructure diff --git a/infrastructure/README.md b/infrastructure/README.md index 2d4d7f3c..d356499d 100644 --- a/infrastructure/README.md +++ b/infrastructure/README.md @@ -605,10 +605,12 @@ For deploying RAG together with External Secrets Operator integration, use the w [rag-setup](server-setup/rag-setup/Chart.yaml) `rag-setup` defaults are production-oriented and reference ESO-managed secrets. +Its `rag` dependency is resolved from the published GitHub Pages chart repo (`https://stackitcloud.github.io/rag-template`) with a pinned chart version. Notes: - Local development with Tilt is unchanged: Tilt deploys `infrastructure/rag` directly, so External Secrets Operator from `rag-setup` is not deployed by default. -- Fastest production path (one command after you prepared `seed-secrets/terraform.tfvars`): +- Fastest production path (one command after you prepared `seed-secrets/terraform.tfvars` with app/API values in `rag_secrets`): +- Before running, ensure `infrastructure/terraform/terraform.tfvars` includes at least `project_id`, `dns_name`, and `rag_cluster_name`. ```bash ./infrastructure/scripts/deploy-rag-prod.sh \ @@ -618,10 +620,12 @@ Notes: - The script performs all steps: - Terraform backend bootstrap (if needed) and infra apply - - seed-secrets apply (with infra-derived overrides for PostgreSQL/Redis/S3 keys) + - seed-secrets apply (with Terraform-derived overrides for PostgreSQL/Redis/S3 and STACKIT model-serving keys, plus auto-generated app secrets when placeholders are present) - generation of `server-setup/rag-setup/values.prod.auto.yaml` - deployment of `base-setup` and `rag-setup` +- `STACKIT_CERT_MANAGER_SA_JSON` is not generated by Terraform in this flow. Provide it in `seed-secrets/terraform.tfvars`. + - Manual fallback: generate the rag values override directly from Terraform outputs: ```bash diff --git a/infrastructure/scripts/deploy-rag-prod.sh b/infrastructure/scripts/deploy-rag-prod.sh index 67a1a5d6..1725a78b 100755 --- a/infrastructure/scripts/deploy-rag-prod.sh +++ b/infrastructure/scripts/deploy-rag-prod.sh @@ -5,6 +5,7 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" INFRA_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" TF_DIR="${INFRA_DIR}/terraform" SEED_DIR="${TF_DIR}/seed-secrets" +BACKEND_CONFIG_FILE="${TF_DIR}/.backend.hcl" BASE_CHART_DIR="${INFRA_DIR}/server-setup/base-setup" RAG_CHART_DIR="${INFRA_DIR}/server-setup/rag-setup" @@ -43,9 +44,15 @@ Options: -h, --help Show this help. Required precondition: - - seed-secrets tfvars exists and includes app/API secrets in rag_secrets. - Infra-derived keys (POSTGRES_PASSWORD, REDIS_USERNAME, REDIS_PASSWORD, - S3_ACCESS_KEY_ID, S3_SECRET_ACCESS_KEY) are auto-overridden from Terraform outputs. + - seed-secrets tfvars exists and includes user-provided app/API values in rag_secrets. + This script auto-injects vault_mount_path, vault_username, vault_password and + overrides Terraform-derived keys (POSTGRES_PASSWORD, REDIS_USERNAME, REDIS_PASSWORD, + S3_ACCESS_KEY_ID, S3_SECRET_ACCESS_KEY, STACKIT_EMBEDDER_API_KEY, + STACKIT_VLLM_API_KEY, RAGAS_OPENAI_API_KEY). + STACKIT_CERT_MANAGER_SA_JSON must be provided in tfvars (rag_secrets or rag_secrets_overrides). + It also auto-generates selected secrets with openssl when missing/placeholder: + LANGFUSE_INIT_USER_PASSWORD, LANGFUSE_PUBLIC_KEY, LANGFUSE_SECRET_KEY, + LANGFUSE_SALT, LANGFUSE_NEXTAUTH, BASIC_AUTH_PASSWORD, CLICKHOUSE_PASSWORD. EOF } @@ -107,20 +114,46 @@ fi if [[ ! -f "${SEED_TFVARS_FILE}" ]]; then echo "Seed tfvars file not found: ${SEED_TFVARS_FILE}" >&2 - echo "Copy ${SEED_DIR}/terraform.tfvars.example and fill app/API secrets first." >&2 + echo "Copy ${SEED_DIR}/terraform.tfvars.example and fill app/API values in rag_secrets first." >&2 + echo "The deploy script injects vault_* and Terraform-derived DB/Redis/S3/model-serving values automatically." >&2 exit 1 fi -for cmd in terraform helm kubectl jq grep; do +for cmd in terraform helm kubectl jq grep awk openssl; do if ! command -v "${cmd}" >/dev/null 2>&1; then echo "Missing required command: ${cmd}" >&2 exit 1 fi done +rag_dependency_repo="$( + awk ' + BEGIN { in_rag = 0 } + /^[[:space:]]*-[[:space:]]*name:[[:space:]]*rag[[:space:]]*$/ { in_rag = 1; next } + in_rag && /^[[:space:]]*repository:[[:space:]]*/ { + line = $0 + sub(/^[[:space:]]*repository:[[:space:]]*/, "", line) + gsub(/["'\''[:space:]]/, "", line) + print line + exit + } + ' "${RAG_CHART_DIR}/Chart.yaml" +)" + +if [[ "${rag_dependency_repo}" == file://* ]]; then + echo "Production deploy requires rag-setup to use a published rag chart repository, not ${rag_dependency_repo}." >&2 + echo "Update ${RAG_CHART_DIR}/Chart.yaml dependency 'rag.repository' to the published GitHub chart repo first." >&2 + exit 1 +fi + if grep -Eq '<<[^>]+>>' "${SEED_TFVARS_FILE}"; then echo "Warning: ${SEED_TFVARS_FILE} still contains placeholder markers (<<...>>)." >&2 - echo "Deployment will continue, but seed-secrets apply may fail or write placeholder values." >&2 + echo "Deployment will continue, but placeholder values may be written for unreplaced keys." >&2 +fi + +if ! grep -Eq '^[[:space:]]*STACKIT_CERT_MANAGER_SA_JSON[[:space:]]*=' "${SEED_TFVARS_FILE}"; then + echo "Seed tfvars must define STACKIT_CERT_MANAGER_SA_JSON (in rag_secrets or rag_secrets_overrides)." >&2 + exit 1 fi approve_args=() @@ -132,6 +165,43 @@ log() { echo "[deploy-rag-prod] $*" } +extract_rag_secret_value() { + local key="$1" + local raw_value + + raw_value="$(awk -v wanted="${key}" ' + BEGIN { in_map = 0 } + /^[[:space:]]*rag_secrets[[:space:]]*=[[:space:]]*{/ { in_map = 1; next } + in_map && /^[[:space:]]*}/ { in_map = 0; next } + in_map { + line = $0 + sub(/#.*/, "", line) + if (match(line, "^[[:space:]]*" wanted "[[:space:]]*=")) { + sub("^[[:space:]]*" wanted "[[:space:]]*=[[:space:]]*", "", line) + gsub(/^[[:space:]]+|[[:space:]]+$/, "", line) + print line + exit + } + } + ' "${SEED_TFVARS_FILE}")" + + raw_value="${raw_value#"${raw_value%%[![:space:]]*}"}" + raw_value="${raw_value%"${raw_value##*[![:space:]]}"}" + + if [[ "${raw_value}" == \"*\" && "${raw_value}" == *\" ]]; then + raw_value="${raw_value:1:${#raw_value}-2}" + elif [[ "${raw_value}" == \'*\' && "${raw_value}" == *\' ]]; then + raw_value="${raw_value:1:${#raw_value}-2}" + fi + + printf '%s' "${raw_value}" +} + +is_missing_or_placeholder() { + local value="$1" + [[ -z "${value}" || "${value}" == *"<<"*">>"* ]] +} + if [[ "${SKIP_BACKEND_BOOTSTRAP}" -eq 0 ]]; then log "Bootstrapping Terraform backend if needed" BOOTSTRAP_AUTO_APPROVE="${AUTO_APPROVE}" "${TF_DIR}/scripts/init-backend.sh" @@ -140,7 +210,11 @@ else fi log "Applying main Terraform stack" -terraform -chdir="${TF_DIR}" init +if [[ -f "${BACKEND_CONFIG_FILE}" ]]; then + terraform -chdir="${TF_DIR}" init -reconfigure -backend-config="${BACKEND_CONFIG_FILE}" +else + terraform -chdir="${TF_DIR}" init -reconfigure +fi terraform -chdir="${TF_DIR}" apply "${approve_args[@]}" log "Reading Terraform outputs" @@ -152,8 +226,70 @@ redis_username="$(terraform -chdir="${TF_DIR}" output -raw redis_username)" redis_password="$(terraform -chdir="${TF_DIR}" output -raw redis_password)" s3_access_key_id="$(terraform -chdir="${TF_DIR}" output -raw object_storage_access_key)" s3_secret_access_key="$(terraform -chdir="${TF_DIR}" output -raw object_storage_secret_key)" +model_serving_bearer_token="$(terraform -chdir="${TF_DIR}" output -raw model_serving_bearer_token 2>/dev/null || true)" dns_name="$(terraform -chdir="${TF_DIR}" output -raw dns_name | sed 's/\.$//')" +if [[ -n "${model_serving_bearer_token}" ]]; then + log "Found model serving bearer token output for STACKIT_* and RAGAS_OPENAI_API_KEY overrides" +else + log "No model_serving_bearer_token output found; STACKIT_EMBEDDER_API_KEY/STACKIT_VLLM_API_KEY/RAGAS_OPENAI_API_KEY will not be auto-overridden" +fi + +generated_langfuse_init_user_password="" +generated_langfuse_public_key="" +generated_langfuse_secret_key="" +generated_langfuse_salt="" +generated_langfuse_nextauth="" +generated_basic_auth_password="" +generated_clickhouse_password="" +generated_basic_auth="" + +if is_missing_or_placeholder "$(extract_rag_secret_value "LANGFUSE_INIT_USER_PASSWORD")"; then + generated_langfuse_init_user_password="$(openssl rand -hex 24)" + log "Generated LANGFUSE_INIT_USER_PASSWORD override" +fi + +if is_missing_or_placeholder "$(extract_rag_secret_value "LANGFUSE_PUBLIC_KEY")"; then + generated_langfuse_public_key="pk-lf-$(openssl rand -hex 16)" + log "Generated LANGFUSE_PUBLIC_KEY override" +fi + +if is_missing_or_placeholder "$(extract_rag_secret_value "LANGFUSE_SECRET_KEY")"; then + generated_langfuse_secret_key="sk-lf-$(openssl rand -hex 32)" + log "Generated LANGFUSE_SECRET_KEY override" +fi + +if is_missing_or_placeholder "$(extract_rag_secret_value "LANGFUSE_SALT")"; then + generated_langfuse_salt="$(openssl rand -hex 32)" + log "Generated LANGFUSE_SALT override" +fi + +if is_missing_or_placeholder "$(extract_rag_secret_value "LANGFUSE_NEXTAUTH")"; then + generated_langfuse_nextauth="$(openssl rand -hex 32)" + log "Generated LANGFUSE_NEXTAUTH override" +fi + +if is_missing_or_placeholder "$(extract_rag_secret_value "BASIC_AUTH_PASSWORD")"; then + generated_basic_auth_password="$(openssl rand -hex 24)" + log "Generated BASIC_AUTH_PASSWORD override" +fi + +if is_missing_or_placeholder "$(extract_rag_secret_value "CLICKHOUSE_PASSWORD")"; then + generated_clickhouse_password="$(openssl rand -hex 24)" + log "Generated CLICKHOUSE_PASSWORD override" +fi + +if [[ -n "${generated_basic_auth_password}" ]]; then + basic_auth_user="$(extract_rag_secret_value "BASIC_AUTH_USER")" + if ! is_missing_or_placeholder "${basic_auth_user}"; then + generated_basic_auth_hash="$(openssl passwd -apr1 "${generated_basic_auth_password}")" + generated_basic_auth="${basic_auth_user}:${generated_basic_auth_hash}" + log "Generated BASIC_AUTH override from BASIC_AUTH_USER + generated BASIC_AUTH_PASSWORD" + else + log "BASIC_AUTH_USER missing/placeholder; BASIC_AUTH override was not generated" + fi +fi + seed_override_file="$(mktemp)" cleanup() { rm -f "${seed_override_file}" @@ -169,17 +305,88 @@ jq -n \ --arg redis_password "${redis_password}" \ --arg s3_access_key_id "${s3_access_key_id}" \ --arg s3_secret_access_key "${s3_secret_access_key}" \ + --arg model_serving_bearer_token "${model_serving_bearer_token}" \ + --arg generated_langfuse_init_user_password "${generated_langfuse_init_user_password}" \ + --arg generated_langfuse_public_key "${generated_langfuse_public_key}" \ + --arg generated_langfuse_secret_key "${generated_langfuse_secret_key}" \ + --arg generated_langfuse_salt "${generated_langfuse_salt}" \ + --arg generated_langfuse_nextauth "${generated_langfuse_nextauth}" \ + --arg generated_basic_auth_password "${generated_basic_auth_password}" \ + --arg generated_clickhouse_password "${generated_clickhouse_password}" \ + --arg generated_basic_auth "${generated_basic_auth}" \ '{ vault_mount_path: $vault_mount_path, vault_username: $vault_username, vault_password: $vault_password, - rag_secrets_overrides: { - POSTGRES_PASSWORD: $postgres_password, - REDIS_USERNAME: $redis_username, - REDIS_PASSWORD: $redis_password, - S3_ACCESS_KEY_ID: $s3_access_key_id, - S3_SECRET_ACCESS_KEY: $s3_secret_access_key - } + rag_secrets_overrides: ( + { + POSTGRES_PASSWORD: $postgres_password, + REDIS_USERNAME: $redis_username, + REDIS_PASSWORD: $redis_password, + S3_ACCESS_KEY_ID: $s3_access_key_id, + S3_SECRET_ACCESS_KEY: $s3_secret_access_key + } + ( + if $model_serving_bearer_token != "" then + ( + { + STACKIT_EMBEDDER_API_KEY: $model_serving_bearer_token, + STACKIT_VLLM_API_KEY: $model_serving_bearer_token, + RAGAS_OPENAI_API_KEY: $model_serving_bearer_token + } + ) + else + {} + end + ) + ( + if $generated_langfuse_init_user_password != "" then + { LANGFUSE_INIT_USER_PASSWORD: $generated_langfuse_init_user_password } + else + {} + end + ) + ( + if $generated_langfuse_public_key != "" then + { LANGFUSE_PUBLIC_KEY: $generated_langfuse_public_key } + else + {} + end + ) + ( + if $generated_langfuse_secret_key != "" then + { LANGFUSE_SECRET_KEY: $generated_langfuse_secret_key } + else + {} + end + ) + ( + if $generated_langfuse_salt != "" then + { LANGFUSE_SALT: $generated_langfuse_salt } + else + {} + end + ) + ( + if $generated_langfuse_nextauth != "" then + { LANGFUSE_NEXTAUTH: $generated_langfuse_nextauth } + else + {} + end + ) + ( + if $generated_basic_auth_password != "" then + { BASIC_AUTH_PASSWORD: $generated_basic_auth_password } + else + {} + end + ) + ( + if $generated_clickhouse_password != "" then + { CLICKHOUSE_PASSWORD: $generated_clickhouse_password } + else + {} + end + ) + ( + if $generated_basic_auth != "" then + { BASIC_AUTH: $generated_basic_auth } + else + {} + end + ) + ) }' > "${seed_override_file}" log "Applying seed-secrets stack" diff --git a/infrastructure/server-setup/rag-setup/Chart.lock b/infrastructure/server-setup/rag-setup/Chart.lock index 6c293195..422c46b7 100644 --- a/infrastructure/server-setup/rag-setup/Chart.lock +++ b/infrastructure/server-setup/rag-setup/Chart.lock @@ -1,9 +1,9 @@ dependencies: - name: rag - repository: file://../../rag - version: 4.1.0 + repository: https://stackitcloud.github.io/rag-template + version: 4.2.0 - name: external-secrets repository: https://charts.external-secrets.io version: 2.0.0 -digest: sha256:048c060fb05bd668c2c16b21f8c6f82438accee4ed2a9a1a84b6a976e1261611 -generated: "2026-02-17T11:58:06.494129+01:00" +digest: sha256:4752701651a586417a192aced414d7c9f465c085179fc3ffa977f809e77a03c7 +generated: "2026-02-18T11:12:32.037377+01:00" diff --git a/infrastructure/server-setup/rag-setup/Chart.yaml b/infrastructure/server-setup/rag-setup/Chart.yaml index 48b80f28..57d33813 100644 --- a/infrastructure/server-setup/rag-setup/Chart.yaml +++ b/infrastructure/server-setup/rag-setup/Chart.yaml @@ -7,8 +7,8 @@ version: 0.1.0 appVersion: "0.1.0" dependencies: - name: rag - repository: "file://../../rag" - version: "4.1.0" + repository: "https://stackitcloud.github.io/rag-template" + version: "4.2.0" condition: features.rag.enabled - name: external-secrets repository: https://charts.external-secrets.io diff --git a/infrastructure/server-setup/rag-setup/values.yaml b/infrastructure/server-setup/rag-setup/values.yaml index c011bd52..eb4ad4a0 100644 --- a/infrastructure/server-setup/rag-setup/values.yaml +++ b/infrastructure/server-setup/rag-setup/values.yaml @@ -221,8 +221,8 @@ rag: RAG_CLASS_TYPE_LLM_TYPE: "stackit" ragas: RAGAS_IS_DEBUG: false - RAGAS_MODEL: "gpt-4o-mini" - RAGAS_USE_OPENAI: true + RAGAS_MODEL: "openai/gpt-oss-120b" + RAGAS_USE_OPENAI: false RAGAS_TIMEOUT: 60 RAGAS_EVALUATION_DATASET_NAME: "eval-data" RAGAS_MAX_CONCURRENCY: "5" diff --git a/infrastructure/terraform/README.md b/infrastructure/terraform/README.md index 18f29797..7e11180c 100644 --- a/infrastructure/terraform/README.md +++ b/infrastructure/terraform/README.md @@ -16,6 +16,8 @@ This Terraform configuration deploys: - STACKIT account with service account credentials - Service account JSON key file +Bootstrap note: Terraform needs an existing service-account key file (`sa_key.json`) to authenticate the provider. + ## Setup Instructions 1. **Prepare credentials** @@ -52,9 +54,14 @@ Important: The sa_key.json file contains sensitive credentials. Never commit it 2. **Review and customize variables** ``` + cp terraform.tfvars.example terraform.tfvars # Edit terraform.tfvars with your project ID and other preferences vim terraform.tfvars ``` + Required values in `terraform.tfvars`: + - `project_id` + - `dns_name` (must match `^([a-z0-9.-]+)$`, typically `*.runs.onstackit.cloud`) + - `rag_cluster_name` (max 11 chars, lowercase letters/numbers/hyphens) 3. **Initialize Terraform** ``` @@ -77,6 +84,9 @@ Important: The sa_key.json file contains sensitive credentials. Never commit it ./scripts/deploy-rag-prod.sh --issuer-email you@example.com --auto-approve ``` This wraps Terraform + Helm orchestration end-to-end. + `STACKIT_CERT_MANAGER_SA_JSON` must be provided in `seed-secrets/terraform.tfvars` (or via `rag_secrets_overrides`). + It auto-injects `STACKIT_EMBEDDER_API_KEY`, `STACKIT_VLLM_API_KEY`, and `RAGAS_OPENAI_API_KEY` from `model_serving_bearer_token`. + It auto-generates selected app secrets (Langfuse/basic auth/clickhouse) when placeholders are still present. 7. **Or only generate Helm overrides for rag-setup** ```bash @@ -115,7 +125,7 @@ Note: backend blocks cannot reference resources, so you must hardcode or pass th ### Bootstrap script (recommended) -Note: `backend "s3" {}` is already defined in `main.tf`. The bootstrap step still works because it runs `terraform init -backend=false`, which ignores the backend block. +Note: `backend "s3" {}` is already defined in `main.tf`. During bootstrap, the helper script temporarily removes the backend stanza to run a local apply for bucket/credentials creation, restores `main.tf`, then migrates state to S3. Use the helper script to bootstrap the backend in two phases: 1) Run a local-only apply to create the bucket + credentials. @@ -135,14 +145,16 @@ If you want non-interactive bootstrap: BOOTSTRAP_AUTO_APPROVE=1 ./scripts/init-backend.sh ``` -Manual phase 1 (if you want to see the exact commands the script runs): +Manual phase 1 (advanced; the script is recommended): ```bash -terraform init -backend=false +# temporarily remove backend "s3" {} from main.tf terraform apply \ -target=stackit_objectstorage_bucket.tfstate \ -target=stackit_objectstorage_credentials_group.rag_creds_group \ -target=stackit_objectstorage_credential.rag_creds +# restore main.tf, write .backend.hcl, then: +# terraform init -reconfigure -backend-config=.backend.hcl -force-copy ``` ### Manual backend block diff --git a/infrastructure/terraform/scripts/init-backend.sh b/infrastructure/terraform/scripts/init-backend.sh index af474be6..0cc6af87 100755 --- a/infrastructure/terraform/scripts/init-backend.sh +++ b/infrastructure/terraform/scripts/init-backend.sh @@ -6,6 +6,8 @@ root_dir="$(cd "${script_dir}/.." && pwd)" backend_config_file="${BACKEND_CONFIG_FILE:-${root_dir}/.backend.hcl}" auto_approve="${BOOTSTRAP_AUTO_APPROVE:-0}" +main_tf="${root_dir}/main.tf" +main_tf_backup="" cd "${root_dir}" @@ -14,15 +16,83 @@ if ! command -v terraform >/dev/null 2>&1; then exit 1 fi +read_output_raw() { + local output_name="$1" + terraform output -raw "${output_name}" 2>/dev/null || true +} + +is_valid_output_value() { + local value="$1" + [[ -n "${value}" ]] || return 1 + [[ "${value}" != *"No outputs found"* ]] || return 1 + [[ "${value}" != *$'\n'* ]] || return 1 + return 0 +} + if [ -f "${backend_config_file}" ]; then - terraform init -backend-config="${backend_config_file}" - exit 0 + # Recover from previously corrupted backend config files. + if grep -q "No outputs found" "${backend_config_file}"; then + echo "Detected invalid ${backend_config_file}; regenerating it." + rm -f "${backend_config_file}" + else + terraform init -reconfigure -backend-config="${backend_config_file}" + exit 0 + fi fi +restore_main_tf() { + if [ -n "${main_tf_backup}" ] && [ -f "${main_tf_backup}" ]; then + cp "${main_tf_backup}" "${main_tf}" + rm -f "${main_tf_backup}" + main_tf_backup="" + fi +} + +prepare_local_bootstrap_config() { + if [ ! -f "${main_tf}" ]; then + echo "Expected Terraform file not found: ${main_tf}" >&2 + exit 1 + fi + + main_tf_backup="$(mktemp)" + cp "${main_tf}" "${main_tf_backup}" + + awk ' + BEGIN { + skip = 0 + depth = 0 + } + { + line = $0 + if (!skip && line ~ /^[[:space:]]*backend[[:space:]]+"s3"[[:space:]]*{/) { + skip = 1 + } + if (skip) { + opens = gsub(/{/, "{", line) + closes = gsub(/}/, "}", line) + depth += opens - closes + if (depth <= 0) { + skip = 0 + depth = 0 + } + next + } + print $0 + } + ' "${main_tf_backup}" > "${main_tf}" +} + +trap restore_main_tf EXIT + echo "Bootstrapping object storage for Terraform state (local backend)." -terraform init -backend=false +prepare_local_bootstrap_config +terraform init -reconfigure -if ! bucket="$(terraform output -raw object_storage_bucket 2>/dev/null)"; then +bucket="$(read_output_raw object_storage_bucket)" +access_key="$(read_output_raw object_storage_access_key)" +secret_key="$(read_output_raw object_storage_secret_key)" + +if ! is_valid_output_value "${bucket}" || ! is_valid_output_value "${access_key}" || ! is_valid_output_value "${secret_key}"; then apply_args=( "-target=stackit_objectstorage_bucket.tfstate" "-target=stackit_objectstorage_credentials_group.rag_creds_group" @@ -37,11 +107,18 @@ if ! bucket="$(terraform output -raw object_storage_bucket 2>/dev/null)"; then else terraform apply "${apply_args[@]}" fi - bucket="$(terraform output -raw object_storage_bucket)" + bucket="$(read_output_raw object_storage_bucket)" + access_key="$(read_output_raw object_storage_access_key)" + secret_key="$(read_output_raw object_storage_secret_key)" fi -access_key="$(terraform output -raw object_storage_access_key)" -secret_key="$(terraform output -raw object_storage_secret_key)" +if ! is_valid_output_value "${bucket}" || ! is_valid_output_value "${access_key}" || ! is_valid_output_value "${secret_key}"; then + echo "Failed to read valid backend outputs after bootstrap apply." >&2 + echo "bucket='${bucket}'" >&2 + echo "access_key length=${#access_key}" >&2 + echo "secret_key length=${#secret_key}" >&2 + exit 1 +fi cat > "${backend_config_file}" <> +EOF BASIC_AUTH_USER = "<>" BASIC_AUTH_PASSWORD = "<>" BASIC_AUTH = "<>" - # The production deploy script auto-populates the following from Terraform outputs: - # POSTGRES_PASSWORD, REDIS_USERNAME, REDIS_PASSWORD, S3_ACCESS_KEY_ID, S3_SECRET_ACCESS_KEY - POSTGRES_PASSWORD = "<>" - REDIS_USERNAME = "<>" - REDIS_PASSWORD = "<>" CLICKHOUSE_PASSWORD = "<>" - STACKIT_CERT_MANAGER_SA_JSON = <> -EOF } + +# Optional only when running this module directly. +# deploy-rag-prod.sh injects these from Terraform outputs automatically: +# and auto-generates selected values when missing/placeholders. +# rag_secrets_overrides = { +# POSTGRES_PASSWORD = "<>" +# REDIS_USERNAME = "<>" +# REDIS_PASSWORD = "<>" +# S3_ACCESS_KEY_ID = "<>" +# S3_SECRET_ACCESS_KEY = "<>" +# STACKIT_EMBEDDER_API_KEY = "<>" +# STACKIT_VLLM_API_KEY = "<>" +# RAGAS_OPENAI_API_KEY = "<>" +# } diff --git a/infrastructure/terraform/terraform.tfvars.example b/infrastructure/terraform/terraform.tfvars.example index 30b05abc..7facffbf 100644 --- a/infrastructure/terraform/terraform.tfvars.example +++ b/infrastructure/terraform/terraform.tfvars.example @@ -1,3 +1,13 @@ project_id = "XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXX" name_prefix = "rag-prod" region = "eu01" + +# Required: +# - lowercase letters, numbers, hyphens, dots +# - should end with .runs.onstackit.cloud +dns_name = "rag-prod-xxxxx.runs.onstackit.cloud" + +# Required: +# - lowercase letters, numbers, hyphens +# - max length 11 +rag_cluster_name = "rag-prod01" diff --git a/rag-core-library b/rag-core-library new file mode 160000 index 00000000..7b7d6b00 --- /dev/null +++ b/rag-core-library @@ -0,0 +1 @@ +Subproject commit 7b7d6b00f908c385d507050d17132ccbd3f741ef diff --git a/rag-infrastructure b/rag-infrastructure new file mode 160000 index 00000000..50be2d3a --- /dev/null +++ b/rag-infrastructure @@ -0,0 +1 @@ +Subproject commit 50be2d3a0ef8f8667766a1d7297b8d98507dfce8 From 87db5e33e192c98e404070ebe1a6d217bc687719 Mon Sep 17 00:00:00 2001 From: Andreas Klos Date: Wed, 18 Feb 2026 11:53:33 +0100 Subject: [PATCH 6/8] feat: remove rag-core-library and rag-infrastructure submodules --- rag-core-library | 1 - rag-infrastructure | 1 - 2 files changed, 2 deletions(-) delete mode 160000 rag-core-library delete mode 160000 rag-infrastructure diff --git a/rag-core-library b/rag-core-library deleted file mode 160000 index 7b7d6b00..00000000 --- a/rag-core-library +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 7b7d6b00f908c385d507050d17132ccbd3f741ef diff --git a/rag-infrastructure b/rag-infrastructure deleted file mode 160000 index 50be2d3a..00000000 --- a/rag-infrastructure +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 50be2d3a0ef8f8667766a1d7297b8d98507dfce8 From 186aec3607f9ce73ed7d3bb9779210f8371e16ad Mon Sep 17 00:00:00 2001 From: Andreas Klos Date: Wed, 18 Feb 2026 12:26:20 +0100 Subject: [PATCH 7/8] feat(terraform): update DNS name validation and documentation for clarity --- infrastructure/terraform/README.md | 2 +- infrastructure/terraform/terraform.tfvars.example | 1 + infrastructure/terraform/variables.tf | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/infrastructure/terraform/README.md b/infrastructure/terraform/README.md index 7e11180c..3b46510f 100644 --- a/infrastructure/terraform/README.md +++ b/infrastructure/terraform/README.md @@ -60,7 +60,7 @@ Important: The sa_key.json file contains sensitive credentials. Never commit it ``` Required values in `terraform.tfvars`: - `project_id` - - `dns_name` (must match `^([a-z0-9.-]+)$`, typically `*.runs.onstackit.cloud`) + - `dns_name` (valid multi-level domain with at least one dot, typically `*.runs.onstackit.cloud`) - `rag_cluster_name` (max 11 chars, lowercase letters/numbers/hyphens) 3. **Initialize Terraform** diff --git a/infrastructure/terraform/terraform.tfvars.example b/infrastructure/terraform/terraform.tfvars.example index 7facffbf..32dcfb17 100644 --- a/infrastructure/terraform/terraform.tfvars.example +++ b/infrastructure/terraform/terraform.tfvars.example @@ -4,6 +4,7 @@ region = "eu01" # Required: # - lowercase letters, numbers, hyphens, dots +# - at least one dot (multi-level DNS name) # - should end with .runs.onstackit.cloud dns_name = "rag-prod-xxxxx.runs.onstackit.cloud" diff --git a/infrastructure/terraform/variables.tf b/infrastructure/terraform/variables.tf index b99534bd..15354722 100644 --- a/infrastructure/terraform/variables.tf +++ b/infrastructure/terraform/variables.tf @@ -7,8 +7,8 @@ variable "dns_name" { description = "DNS name for the service. Desired free sub‑domain (e.g. ends with .runs.onstackit.cloud)." type = string validation { - condition = can(regex("^([a-z0-9.-]+)$", var.dns_name)) - error_message = "The DNS name must only contain lowercase letters, numbers, hyphens, and dots." + condition = can(regex("^([a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?$", var.dns_name)) + error_message = "The DNS name must be a valid multi-level domain (at least one dot), using lowercase letters, numbers, and hyphens." } } From c39bc4d8343a26a928f854aeb7ac65b61c1deca4 Mon Sep 17 00:00:00 2001 From: Andreas Klos Date: Wed, 18 Feb 2026 15:42:52 +0100 Subject: [PATCH 8/8] feat: enhance deploy script for external secrets integration and add namespace discovery --- infrastructure/.gitignore | 2 +- infrastructure/scripts/deploy-rag-prod.sh | 96 ++++++++++++++++++++++- 2 files changed, 96 insertions(+), 2 deletions(-) diff --git a/infrastructure/.gitignore b/infrastructure/.gitignore index a32fd5bc..642d58f0 100644 --- a/infrastructure/.gitignore +++ b/infrastructure/.gitignore @@ -4,7 +4,7 @@ **/.backend.hcl **/kubeconfig.yaml **/*.lock.* - +**/values.prod.auto.yaml auth .DS_Store diff --git a/infrastructure/scripts/deploy-rag-prod.sh b/infrastructure/scripts/deploy-rag-prod.sh index 1725a78b..dd5b4666 100755 --- a/infrastructure/scripts/deploy-rag-prod.sh +++ b/infrastructure/scripts/deploy-rag-prod.sh @@ -15,6 +15,8 @@ HELM_TIMEOUT="20m" BASE_NAMESPACE="cert-manager" RAG_NAMESPACE="rag" VAULT_USERPASS_NAMESPACE="cert-manager" +BASE_NAMESPACE_EXPLICIT=0 +VAULT_USERPASS_NAMESPACE_EXPLICIT=0 VALUES_OUTPUT_FILE="${RAG_CHART_DIR}/values.prod.auto.yaml" SEED_TFVARS_FILE="${SEED_DIR}/terraform.tfvars" ISSUER_EMAIL="" @@ -80,6 +82,7 @@ while [[ $# -gt 0 ]]; do ;; --base-namespace) BASE_NAMESPACE="${2:-}" + BASE_NAMESPACE_EXPLICIT=1 shift 2 ;; --rag-namespace) @@ -88,6 +91,7 @@ while [[ $# -gt 0 ]]; do ;; --vault-userpass-namespace) VAULT_USERPASS_NAMESPACE="${2:-}" + VAULT_USERPASS_NAMESPACE_EXPLICIT=1 shift 2 ;; --helm-timeout) @@ -165,6 +169,58 @@ log() { echo "[deploy-rag-prod] $*" } +discover_base_setup_namespace() { + local ns="" + local annotated_release_name="" + local annotated_release_namespace="" + + ns="$( + helm list -A -o json 2>/dev/null \ + | jq -r '.[] | select(.name == "base-setup") | .namespace' \ + | head -n1 || true + )" + if [[ -n "${ns}" && "${ns}" != "null" ]]; then + printf '%s' "${ns}" + return + fi + + annotated_release_name="$( + kubectl get clusterrole base-setup-cert-manager-cainjector \ + -o jsonpath='{.metadata.annotations.meta\.helm\.sh/release-name}' 2>/dev/null || true + )" + annotated_release_namespace="$( + kubectl get clusterrole base-setup-cert-manager-cainjector \ + -o jsonpath='{.metadata.annotations.meta\.helm\.sh/release-namespace}' 2>/dev/null || true + )" + + if [[ "${annotated_release_name}" == "base-setup" && -n "${annotated_release_namespace}" ]]; then + printf '%s' "${annotated_release_namespace}" + return + fi + + if [[ -n "${annotated_release_name}" && "${annotated_release_name}" != "base-setup" ]]; then + echo "ClusterRole base-setup-cert-manager-cainjector is owned by Helm release ${annotated_release_name}, not base-setup." >&2 + echo "Clean up that existing release/resource or choose a different base release name before continuing." >&2 + exit 1 + fi + + return 0 +} + +existing_base_namespace="$(discover_base_setup_namespace)" +if [[ -n "${existing_base_namespace}" && "${existing_base_namespace}" != "${BASE_NAMESPACE}" ]]; then + if [[ "${BASE_NAMESPACE_EXPLICIT}" -eq 0 ]]; then + log "Detected existing base-setup ownership in namespace ${existing_base_namespace}; using it instead of ${BASE_NAMESPACE}" + BASE_NAMESPACE="${existing_base_namespace}" + else + log "base-setup ownership exists in namespace ${existing_base_namespace}, but --base-namespace=${BASE_NAMESPACE} was explicitly requested" + fi + + if [[ "${VAULT_USERPASS_NAMESPACE_EXPLICIT}" -eq 0 ]]; then + VAULT_USERPASS_NAMESPACE="${existing_base_namespace}" + fi +fi + extract_rag_secret_value() { local key="$1" local raw_value @@ -290,7 +346,7 @@ if [[ -n "${generated_basic_auth_password}" ]]; then fi fi -seed_override_file="$(mktemp)" +seed_override_file="$(mktemp "${TMPDIR:-/tmp}/rag-seed-overrides.XXXXXX.tfvars.json")" cleanup() { rm -f "${seed_override_file}" } @@ -423,6 +479,44 @@ helm upgrade --install base-setup "${BASE_CHART_DIR}" \ --wait \ --timeout "${HELM_TIMEOUT}" +rag_setup_release_exists=0 +if helm status rag-setup -n "${RAG_NAMESPACE}" >/dev/null 2>&1; then + rag_setup_release_exists=1 +fi + +if ! kubectl get crd clustersecretstores.external-secrets.io >/dev/null 2>&1; then + if [[ "${rag_setup_release_exists}" -eq 1 ]]; then + echo "External Secrets CRD clustersecretstores.external-secrets.io is missing, but rag-setup release already exists." >&2 + echo "Refusing automatic bootstrap to avoid changing an existing release unexpectedly." >&2 + echo "Fix CRDs/operator first, then rerun deploy." >&2 + exit 1 + fi + + log "External Secrets CRDs not found; bootstrapping rag-setup dependencies (operator/CRDs) first" + helm upgrade --install rag-setup "${RAG_CHART_DIR}" \ + -n "${RAG_NAMESPACE}" \ + --create-namespace \ + -f "${RAG_CHART_DIR}/values.yaml" \ + -f "${VALUES_OUTPUT_FILE}" \ + --set features.rag.enabled=false \ + --set externalSecrets.resources.enabled=false \ + --wait \ + --timeout "${HELM_TIMEOUT}" + + log "Waiting for External Secrets CRDs to become available" + for _ in $(seq 1 30); do + if kubectl get crd clustersecretstores.external-secrets.io >/dev/null 2>&1; then + break + fi + sleep 2 + done + + if ! kubectl get crd clustersecretstores.external-secrets.io >/dev/null 2>&1; then + echo "External Secrets CRDs are still missing after bootstrap step." >&2 + exit 1 + fi +fi + log "Deploying rag-setup chart" helm upgrade --install rag-setup "${RAG_CHART_DIR}" \ -n "${RAG_NAMESPACE}" \