> $GITHUB_ENV
+
+ - name: Comment coverage summary on PR
+ uses: ./.github/actions/post-coverage-comment
+ with:
+ pr_number: ${{ env.PR_NUMBER }}
+ coverage_percentage: ${{ env.COVERAGE_PERCENTAGE }}
+ covered_lines: ${{ env.COVERED_LINES }}
+ total_lines: ${{ env.TOTAL_LINES }}
+ patch_coverage_pct: ${{ env.PATCH_COVERAGE_PCT }}
+ low_coverage_files: ${{ env.LOW_COVERAGE_FILES }}
+ patch_coverage_summary: ${{ env.PATCH_COVERAGE_SUMMARY }}
+ ado_url: ${{ env.ADO_URL }}
diff --git a/.github/workflows/lint-check.yml b/.github/workflows/lint-check.yml
new file mode 100644
index 000000000..761620d10
--- /dev/null
+++ b/.github/workflows/lint-check.yml
@@ -0,0 +1,179 @@
+name: Linting Check
+
+on:
+ pull_request:
+ types: [opened, edited, reopened, synchronize]
+
+ paths:
+ - '**.py'
+ - '**.cpp'
+ - '**.c'
+ - '**.h'
+ - '**.hpp'
+ - '.github/workflows/lint-check.yml'
+ - 'pyproject.toml'
+ - '.flake8'
+ - '.clang-format'
+ push:
+ branches:
+ - main
+
+permissions:
+ pull-requests: write
+
+jobs:
+ python-lint:
+ name: Python Linting
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.13'
+ cache: 'pip'
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install black flake8 pylint autopep8
+ if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+
+ - name: Check Python formatting with Black
+ run: |
+ echo "::group::Black Formatting Check"
+ black --check --line-length=100 --diff mssql_python/ tests/ || {
+ echo "::error::Black formatting issues found. Run 'black --line-length=100 mssql_python/ tests/' locally to fix."
+ exit 1
+ }
+ echo "::endgroup::"
+
+ - name: Lint with Flake8
+ run: |
+ echo "::group::Flake8 Linting"
+ flake8 mssql_python/ tests/ --max-line-length=100 --extend-ignore=E203,W503,E501,E722,F401,F841,W293,W291,F541,F811,E402,E711,E712,E721,F821 --count --statistics || {
+ echo "::warning::Flake8 found linting issues (informational only, not blocking)"
+ }
+ echo "::endgroup::"
+ continue-on-error: true
+
+ - name: Lint with Pylint
+ run: |
+ echo "::group::Pylint Analysis"
+ pylint mssql_python/ --max-line-length=100 \
+ --disable=fixme,no-member,too-many-arguments,too-many-positional-arguments,invalid-name,useless-parent-delegation \
+ --exit-zero --output-format=colorized --reports=y || true
+ echo "::endgroup::"
+
+ - name: Check Type Hints (mypy)
+ run: |
+ echo "::group::Type Checking"
+ pip install mypy
+ mypy mssql_python/ --ignore-missing-imports --no-strict-optional --check-untyped-defs || {
+ echo "::warning::Type checking found potential issues. Review the output above."
+ }
+ echo "::endgroup::"
+ continue-on-error: true
+
+ cpp-lint:
+ name: C++ Linting
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up Python (for cpplint)
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.13'
+
+ - name: Install clang-format
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y clang-format
+ clang-format --version
+
+ - name: Install cpplint
+ run: |
+ python -m pip install --upgrade pip
+ pip install cpplint
+
+ - name: Check C++ formatting with clang-format
+ run: |
+ echo "::group::clang-format Check"
+ # Check formatting without Werror (informational only)
+ find mssql_python/pybind -type f \( -name "*.cpp" -o -name "*.c" -o -name "*.h" -o -name "*.hpp" \) | while read file; do
+ echo "Checking $file"
+ clang-format --dry-run "$file" 2>&1 || true
+ done
+
+ echo "✅ clang-format check completed (informational only)"
+ echo "::endgroup::"
+ continue-on-error: true
+
+ - name: Lint with cpplint
+ run: |
+ echo "::group::cpplint Check"
+ python -m cpplint \
+ --filter=-legal/copyright,-build/include_subdir,-build/c++11 \
+ --linelength=100 \
+ --recursive \
+ --quiet \
+ mssql_python/pybind 2>&1 | tee cpplint_output.txt || true
+
+ # Count errors and warnings
+ ERROR_COUNT=$(grep -c "Total errors found:" cpplint_output.txt || echo "0")
+
+ if [ -s cpplint_output.txt ] && grep -q "Total errors found:" cpplint_output.txt; then
+ TOTAL_ERRORS=$(grep "Total errors found:" cpplint_output.txt | awk '{print $4}')
+ echo "::warning::cpplint found $TOTAL_ERRORS issues. These are informational and don't block the PR."
+
+ # Show summary but don't fail (informational only)
+ echo "cpplint found $TOTAL_ERRORS style guideline issues (not blocking)"
+ else
+ echo "✅ cpplint check passed with minimal issues"
+ fi
+ echo "::endgroup::"
+ continue-on-error: true
+
+ lint-summary:
+ name: Linting Summary
+ runs-on: ubuntu-latest
+ needs: [python-lint, cpp-lint]
+ if: always()
+
+ steps:
+ - name: Check results
+ run: |
+ echo "## Linting Summary" >> $GITHUB_STEP_SUMMARY
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "### Check Results" >> $GITHUB_STEP_SUMMARY
+
+ if [ "${{ needs.python-lint.result }}" == "success" ]; then
+ echo "✅ **Python Formatting (Black):** PASSED" >> $GITHUB_STEP_SUMMARY
+ else
+ echo "❌ **Python Formatting (Black):** FAILED - Please run Black formatter" >> $GITHUB_STEP_SUMMARY
+ fi
+
+ echo "ℹ️ **Python Linting (Flake8, Pylint):** Informational only" >> $GITHUB_STEP_SUMMARY
+ echo "ℹ️ **C++ Linting (clang-format, cpplint):** Informational only" >> $GITHUB_STEP_SUMMARY
+
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "### Required Actions" >> $GITHUB_STEP_SUMMARY
+ echo "- ✅ Black formatting must pass (blocking)" >> $GITHUB_STEP_SUMMARY
+ echo "- ℹ️ Other linting issues are warnings and won't block PR" >> $GITHUB_STEP_SUMMARY
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "### How to Fix" >> $GITHUB_STEP_SUMMARY
+ echo "1. Save all files in VS Code (Ctrl+S) - auto-formatting will fix most issues" >> $GITHUB_STEP_SUMMARY
+ echo "2. Or run manually: \`black --line-length=100 mssql_python/ tests/\`" >> $GITHUB_STEP_SUMMARY
+ echo "3. For C++: \`clang-format -i mssql_python/pybind/*.cpp\`" >> $GITHUB_STEP_SUMMARY
+
+ - name: Fail if Python formatting failed
+ if: needs.python-lint.result != 'success'
+ run: |
+ echo "::error::Python Black formatting check failed. Please format your Python files."
+ exit 1
diff --git a/.github/workflows/pr-code-coverage.yml b/.github/workflows/pr-code-coverage.yml
new file mode 100644
index 000000000..f2f1aad9f
--- /dev/null
+++ b/.github/workflows/pr-code-coverage.yml
@@ -0,0 +1,475 @@
+name: PR Code Coverage
+
+on:
+ pull_request:
+ branches:
+ - main
+
+jobs:
+ coverage-report:
+ runs-on: ubuntu-latest
+ permissions:
+ pull-requests: write
+ contents: read
+
+ steps:
+ - name: Checkout repo
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Setup git for diff-cover
+ run: |
+ # Fetch the main branch for comparison
+ git fetch origin main:main
+ # Show available branches for debugging
+ echo "Available branches:"
+ git branch -a
+ # Verify main branch exists
+ git show-ref --verify refs/heads/main || echo "Warning: main branch not found"
+ git show-ref --verify refs/remotes/origin/main || echo "Warning: origin/main not found"
+
+ - name: Wait for ADO build to start
+ run: |
+ PR_NUMBER=${{ github.event.pull_request.number }}
+ API_URL="https://dev.azure.com/sqlclientdrivers/public/_apis/build/builds?definitions=2128&queryOrder=queueTimeDescending&%24top=10&api-version=7.1-preview.7"
+
+ echo "Waiting for Azure DevOps build to start for PR #$PR_NUMBER ..."
+
+ for i in {1..30}; do
+ echo "Attempt $i/30: Checking if build has started..."
+
+ # Fetch API response with error handling
+ API_RESPONSE=$(curl -s "$API_URL")
+
+ # Check if response is valid JSON
+ if ! echo "$API_RESPONSE" | jq . >/dev/null 2>&1; then
+ echo "❌ Invalid JSON response from Azure DevOps API"
+ echo "Response received: $API_RESPONSE"
+ echo "This usually indicates the Azure DevOps pipeline has failed or API is unavailable"
+ exit 1
+ fi
+
+ # Parse build info safely
+ BUILD_INFO=$(echo "$API_RESPONSE" | jq -c --arg PR "$PR_NUMBER" '[.value[]? | select(.triggerInfo["pr.number"]?==$PR)] | .[0] // empty' 2>/dev/null)
+
+ if [[ -n "$BUILD_INFO" && "$BUILD_INFO" != "null" && "$BUILD_INFO" != "empty" ]]; then
+ STATUS=$(echo "$BUILD_INFO" | jq -r '.status // "unknown"')
+ RESULT=$(echo "$BUILD_INFO" | jq -r '.result // "unknown"')
+ BUILD_ID=$(echo "$BUILD_INFO" | jq -r '.id // "unknown"')
+ WEB_URL=$(echo "$BUILD_INFO" | jq -r '._links.web.href // "unknown"')
+
+ echo "✅ Found build: ID=$BUILD_ID, Status=$STATUS, Result=$RESULT"
+ echo "🔗 Build URL: $WEB_URL"
+ echo "ADO_URL=$WEB_URL" >> $GITHUB_ENV
+ echo "BUILD_ID=$BUILD_ID" >> $GITHUB_ENV
+
+ # Check if build has failed early
+ if [[ "$STATUS" == "completed" && "$RESULT" == "failed" ]]; then
+ echo "❌ Azure DevOps build $BUILD_ID failed early"
+ echo "This coverage workflow cannot proceed when the main build fails."
+ exit 1
+ fi
+
+ echo "🚀 Build has started, proceeding to poll for coverage artifacts..."
+ break
+ else
+ echo "⏳ No build found for PR #$PR_NUMBER yet... (attempt $i/30)"
+ fi
+
+ if [[ $i -eq 30 ]]; then
+ echo "❌ Timeout: No build found for PR #$PR_NUMBER after 30 attempts"
+ echo "This may indicate the Azure DevOps pipeline was not triggered"
+ exit 1
+ fi
+
+ sleep 10
+ done
+
+ - name: Download and parse coverage report
+ run: |
+ BUILD_ID=${{ env.BUILD_ID }}
+ ARTIFACTS_URL="https://dev.azure.com/SqlClientDrivers/public/_apis/build/builds/$BUILD_ID/artifacts?api-version=7.1-preview.5"
+
+ echo "📥 Polling for coverage artifacts for build $BUILD_ID..."
+
+ # Poll for coverage artifacts with retry logic
+ COVERAGE_ARTIFACT=""
+ for i in {1..60}; do
+ echo "Attempt $i/60: Checking for coverage artifacts..."
+
+ # Fetch artifacts with error handling
+ ARTIFACTS_RESPONSE=$(curl -s "$ARTIFACTS_URL")
+
+ # Check if response is valid JSON
+ if ! echo "$ARTIFACTS_RESPONSE" | jq . >/dev/null 2>&1; then
+ echo "⚠️ Invalid JSON response from artifacts API (attempt $i/60)"
+ if [[ $i -eq 60 ]]; then
+ echo "❌ Persistent API issues after 60 attempts"
+ echo "Response received: $ARTIFACTS_RESPONSE"
+ exit 1
+ fi
+ sleep 30
+ continue
+ fi
+
+ # Show available artifacts for debugging
+ echo "🔍 Available artifacts:"
+ echo "$ARTIFACTS_RESPONSE" | jq -r '.value[]?.name // "No artifacts found"'
+
+ # Find the coverage report artifact
+ COVERAGE_ARTIFACT=$(echo "$ARTIFACTS_RESPONSE" | jq -r '.value[]? | select(.name | test("Code Coverage Report")) | .resource.downloadUrl // empty' 2>/dev/null)
+
+ if [[ -n "$COVERAGE_ARTIFACT" && "$COVERAGE_ARTIFACT" != "null" && "$COVERAGE_ARTIFACT" != "empty" ]]; then
+ echo "✅ Found coverage artifact on attempt $i!"
+ break
+ else
+ echo "⏳ Coverage report not ready yet (attempt $i/60)..."
+ if [[ $i -eq 60 ]]; then
+ echo "❌ Timeout: Coverage report artifact not found after 60 attempts"
+ echo "Available artifacts:"
+ echo "$ARTIFACTS_RESPONSE" | jq -r '.value[]?.name // "No artifacts found"'
+ exit 1
+ fi
+ sleep 30
+ fi
+ done
+
+ if [[ -n "$COVERAGE_ARTIFACT" && "$COVERAGE_ARTIFACT" != "null" && "$COVERAGE_ARTIFACT" != "empty" ]]; then
+ echo "📊 Downloading coverage report..."
+ if ! curl -L "$COVERAGE_ARTIFACT" -o coverage-report.zip --fail --silent; then
+ echo "❌ Failed to download coverage report from Azure DevOps"
+ echo "This indicates the coverage artifacts may not be available or accessible"
+ exit 1
+ fi
+
+ if ! unzip -o -q coverage-report.zip; then
+ echo "❌ Failed to extract coverage artifacts"
+ echo "Trying to extract with verbose output for debugging..."
+ unzip -l coverage-report.zip || echo "Failed to list archive contents"
+ exit 1
+ fi
+
+ # Find the main index.html file
+ INDEX_FILE=$(find . -name "index.html" -path "*/Code Coverage Report*" | head -1)
+
+ if [[ -f "$INDEX_FILE" ]]; then
+ echo "🔍 Parsing coverage data from $INDEX_FILE..."
+
+ # Debug: Show relevant parts of the HTML
+ echo "Debug: Looking for coverage data..."
+ grep -n "cardpercentagebar\|Covered lines\|Coverable lines" "$INDEX_FILE" | head -10
+
+ # Extract coverage metrics using simpler, more reliable patterns
+ OVERALL_PERCENTAGE=$(grep -o 'cardpercentagebar[0-9]*">[0-9]*%' "$INDEX_FILE" | head -1 | grep -o '[0-9]*%')
+ COVERED_LINES=$(grep -A1 "Covered lines:" "$INDEX_FILE" | grep -o 'title="[0-9]*"' | head -1 | grep -o '[0-9]*')
+ TOTAL_LINES=$(grep -A1 "Coverable lines:" "$INDEX_FILE" | grep -o 'title="[0-9]*"' | head -1 | grep -o '[0-9]*')
+
+ # Fallback method if the above doesn't work
+ if [[ -z "$OVERALL_PERCENTAGE" ]]; then
+ echo "Trying alternative parsing method..."
+ OVERALL_PERCENTAGE=$(grep -o 'large.*">[0-9]*%' "$INDEX_FILE" | head -1 | grep -o '[0-9]*%')
+ fi
+
+ echo "Extracted values:"
+ echo "OVERALL_PERCENTAGE=$OVERALL_PERCENTAGE"
+ echo "COVERED_LINES=$COVERED_LINES"
+ echo "TOTAL_LINES=$TOTAL_LINES"
+
+ # Validate that we got the essential data
+ if [[ -z "$OVERALL_PERCENTAGE" ]]; then
+ echo "❌ Could not extract coverage percentage from the report"
+ echo "The coverage report format may have changed or be incomplete"
+ exit 1
+ fi
+
+ echo "COVERAGE_PERCENTAGE=$OVERALL_PERCENTAGE" >> $GITHUB_ENV
+ echo "COVERED_LINES=${COVERED_LINES:-N/A}" >> $GITHUB_ENV
+ echo "TOTAL_LINES=${TOTAL_LINES:-N/A}" >> $GITHUB_ENV
+
+ # Extract top files with low coverage - improved approach
+ echo "📋 Extracting file-level coverage..."
+
+ # Extract file coverage data more reliably
+ LOW_COVERAGE_FILES=$(grep -o '| [^<]* | [0-9]* | [0-9]* | [0-9]* | [0-9]* | [0-9]*\.[0-9]*%' "$INDEX_FILE" | \
+ sed 's/ | \([^<]*\)<\/a><\/td>.*class="right">\([0-9]*\.[0-9]*\)%/\1: \2%/' | \
+ sort -t: -k2 -n | head -10)
+
+ # Alternative method if above fails
+ if [[ -z "$LOW_COVERAGE_FILES" ]]; then
+ echo "Trying alternative file parsing..."
+ LOW_COVERAGE_FILES=$(grep -E "\.py.*[0-9]+\.[0-9]+%" "$INDEX_FILE" | \
+ grep -o "[^>]*\.py[^<]*.*[0-9]*\.[0-9]*%" | \
+ sed 's/\([^<]*\)<\/a>.*\([0-9]*\.[0-9]*\)%/\1: \2%/' | \
+ sort -t: -k2 -n | head -10)
+ fi
+
+ echo "LOW_COVERAGE_FILES<> $GITHUB_ENV
+ echo "${LOW_COVERAGE_FILES:-No detailed file data available}" >> $GITHUB_ENV
+ echo "EOF" >> $GITHUB_ENV
+
+ echo "✅ Coverage data extracted successfully"
+ else
+ echo "❌ Could not find index.html in coverage report"
+ echo "Available files in the coverage report:"
+ find . -name "*.html" | head -10 || echo "No HTML files found"
+ exit 1
+ fi
+ else
+ echo "❌ Could not find coverage report artifact"
+ echo "Available artifacts from the build:"
+ echo "$ARTIFACTS_RESPONSE" | jq -r '.value[]?.name // "No artifacts found"' 2>/dev/null || echo "Could not parse artifacts list"
+ echo "This indicates the Azure DevOps build may not have generated coverage reports"
+ exit 1
+ fi
+
+ - name: Download coverage XML from ADO
+ run: |
+ # Download the Cobertura XML directly from the CodeCoverageReport job
+ BUILD_ID=${{ env.BUILD_ID }}
+ ARTIFACTS_URL="https://dev.azure.com/SqlClientDrivers/public/_apis/build/builds/$BUILD_ID/artifacts?api-version=7.1-preview.5"
+
+ echo "📥 Fetching artifacts for build $BUILD_ID to find coverage files..."
+
+ # Fetch artifacts with error handling
+ ARTIFACTS_RESPONSE=$(curl -s "$ARTIFACTS_URL")
+
+ # Check if response is valid JSON
+ if ! echo "$ARTIFACTS_RESPONSE" | jq . >/dev/null 2>&1; then
+ echo "❌ Invalid JSON response from artifacts API"
+ echo "Response received: $ARTIFACTS_RESPONSE"
+ exit 1
+ fi
+
+ echo "🔍 Available artifacts:"
+ echo "$ARTIFACTS_RESPONSE" | jq -r '.value[]?.name // "No artifacts found"'
+
+ # Look for the unified coverage artifact from CodeCoverageReport job
+ COVERAGE_XML_ARTIFACT=$(echo "$ARTIFACTS_RESPONSE" | jq -r '.value[]? | select(.name | test("unified-coverage|Code Coverage Report|coverage")) | .resource.downloadUrl // empty' 2>/dev/null | head -1)
+
+ if [[ -n "$COVERAGE_XML_ARTIFACT" && "$COVERAGE_XML_ARTIFACT" != "null" && "$COVERAGE_XML_ARTIFACT" != "empty" ]]; then
+ echo "📊 Downloading coverage artifact from: $COVERAGE_XML_ARTIFACT"
+ if ! curl -L "$COVERAGE_XML_ARTIFACT" -o coverage-artifacts.zip --fail --silent; then
+ echo "❌ Failed to download coverage artifacts"
+ exit 1
+ fi
+
+ if ! unzip -o -q coverage-artifacts.zip; then
+ echo "❌ Failed to extract coverage artifacts"
+ echo "Trying to extract with verbose output for debugging..."
+ unzip -l coverage-artifacts.zip || echo "Failed to list archive contents"
+ exit 1
+ fi
+
+ echo "🔍 Looking for coverage XML files in extracted artifacts..."
+ find . -name "*.xml" -type f | head -10
+
+ # Look for the main coverage.xml file in unified-coverage directory or any coverage XML
+ if [[ -f "unified-coverage/coverage.xml" ]]; then
+ echo "✅ Found unified coverage file at unified-coverage/coverage.xml"
+ cp "unified-coverage/coverage.xml" ./coverage.xml
+ elif [[ -f "coverage.xml" ]]; then
+ echo "✅ Found coverage.xml in root directory"
+ # Already in the right place
+ else
+ # Try to find any coverage XML file
+ COVERAGE_FILE=$(find . -name "*coverage*.xml" -type f | head -1)
+ if [[ -n "$COVERAGE_FILE" ]]; then
+ echo "✅ Found coverage file: $COVERAGE_FILE"
+ cp "$COVERAGE_FILE" ./coverage.xml
+ else
+ echo "❌ No coverage XML file found in artifacts"
+ echo "Available files:"
+ find . -name "*.xml" -type f
+ exit 1
+ fi
+ fi
+
+ echo "✅ Coverage XML file is ready at ./coverage.xml"
+ ls -la ./coverage.xml
+ else
+ echo "❌ Could not find coverage artifacts"
+ echo "This indicates the Azure DevOps CodeCoverageReport job may not have run successfully"
+ exit 1
+ fi
+
+ - name: Generate patch coverage report
+ run: |
+ # Install dependencies
+ pip install diff-cover jq
+ sudo apt-get update && sudo apt-get install -y libxml2-utils
+
+ # Verify coverage.xml exists before proceeding
+ if [[ ! -f coverage.xml ]]; then
+ echo "❌ coverage.xml not found in current directory"
+ echo "Available files:"
+ ls -la | head -20
+ exit 1
+ fi
+
+ echo "✅ coverage.xml found, size: $(wc -c < coverage.xml) bytes"
+ echo "🔍 Coverage file preview (first 10 lines):"
+ head -10 coverage.xml
+
+ # Generate diff coverage report using the new command format
+ echo "🚀 Generating patch coverage report..."
+
+ # Debug: Show git status and branches before running diff-cover
+ echo "🔍 Git status before diff-cover:"
+ git status --porcelain || echo "Git status failed"
+ echo "Current branch: $(git branch --show-current)"
+ echo "Available branches:"
+ git branch -a
+ echo "Checking if main branch is accessible:"
+ git log --oneline -n 5 main || echo "Could not access main branch"
+
+ # Debug: Show what diff-cover will analyze
+ echo "🔍 Git diff analysis:"
+ echo "Files changed between main and current branch:"
+ git diff --name-only main || echo "Could not get diff"
+ echo "Detailed diff for Python files:"
+ git diff main -- "*.py" | head -50 || echo "Could not get Python diff"
+
+ # Debug: Check coverage.xml content for specific files
+ echo "🔍 Coverage.xml analysis:"
+ echo "Python files mentioned in coverage.xml:"
+ grep -o 'filename="[^"]*\.py"' coverage.xml | head -10 || echo "Could not extract filenames"
+ echo "Sample coverage data:"
+ head -20 coverage.xml
+
+ # Use the new format for diff-cover commands
+ echo "🚀 Running diff-cover..."
+ diff-cover coverage.xml \
+ --compare-branch=main \
+ --html-report patch-coverage.html \
+ --json-report patch-coverage.json \
+ --markdown-report patch-coverage.md || {
+ echo "❌ diff-cover failed with exit code $?"
+ echo "Checking if coverage.xml is valid XML..."
+ if ! xmllint --noout coverage.xml 2>/dev/null; then
+ echo "❌ coverage.xml is not valid XML"
+ echo "First 50 lines of coverage.xml:"
+ head -50 coverage.xml
+ else
+ echo "✅ coverage.xml is valid XML"
+ echo "🔍 diff-cover verbose output:"
+ diff-cover coverage.xml --compare-branch=main --markdown-report debug-patch-coverage.md -v || echo "Verbose diff-cover also failed"
+ fi
+ # Don't exit here, let's see what files were created
+ }
+
+ # Check what files were generated
+ echo "🔍 Files generated after diff-cover:"
+ ls -la patch-coverage.* || echo "No patch-coverage files found"
+ ls -la *.md *.html *.json | grep -E "(patch|coverage)" || echo "No coverage-related files found"
+
+ # Extract patch coverage percentage
+ if [[ -f patch-coverage.json ]]; then
+ echo "🔍 Patch coverage analysis from JSON:"
+ echo "Raw JSON content:"
+ cat patch-coverage.json | jq . || echo "Could not parse JSON"
+
+ PATCH_COVERAGE=$(jq -r '.total_percent_covered // "N/A"' patch-coverage.json)
+ TOTAL_STATEMENTS=$(jq -r '.total_num_lines // "N/A"' patch-coverage.json)
+ MISSING_STATEMENTS=$(jq -r '.total_num_missing // "N/A"' patch-coverage.json)
+
+ echo "✅ Patch coverage: ${PATCH_COVERAGE}%"
+ echo "📊 Total lines: $TOTAL_STATEMENTS, Missing: $MISSING_STATEMENTS"
+
+ # Debug: Show per-file breakdown
+ echo "📁 Per-file coverage breakdown:"
+ jq -r '.src_stats // {} | to_entries[] | "\(.key): \(.value.percent_covered)% (\(.value.num_lines) lines, \(.value.num_missing) missing)"' patch-coverage.json || echo "Could not extract per-file stats"
+
+ echo "PATCH_COVERAGE_PCT=${PATCH_COVERAGE}%" >> $GITHUB_ENV
+ elif [[ -f patch-coverage.md ]]; then
+ echo "🔍 Extracting patch coverage from markdown file:"
+ echo "Markdown content:"
+ cat patch-coverage.md
+
+ # Extract coverage percentage from markdown
+ PATCH_COVERAGE=$(grep -o "Coverage.*[0-9]*%" patch-coverage.md | grep -o "[0-9]*%" | head -1 | sed 's/%//')
+ TOTAL_LINES=$(grep -o "Total.*[0-9]* lines" patch-coverage.md | grep -o "[0-9]*" | head -1)
+ MISSING_LINES=$(grep -o "Missing.*[0-9]* lines" patch-coverage.md | grep -o "[0-9]*" | tail -1)
+
+ if [[ -n "$PATCH_COVERAGE" ]]; then
+ echo "✅ Extracted patch coverage: ${PATCH_COVERAGE}%"
+ echo "📊 Total lines: $TOTAL_LINES, Missing: $MISSING_LINES"
+ echo "PATCH_COVERAGE_PCT=${PATCH_COVERAGE}%" >> $GITHUB_ENV
+ else
+ echo "⚠️ Could not extract coverage percentage from markdown"
+ echo "PATCH_COVERAGE_PCT=Could not parse" >> $GITHUB_ENV
+ fi
+ else
+ echo "⚠️ No patch coverage files generated"
+ echo "🔍 Checking for other output files:"
+ ls -la *coverage* || echo "No coverage files found"
+ echo "PATCH_COVERAGE_PCT=Report not generated" >> $GITHUB_ENV
+ fi
+
+ # Extract summary for comment
+ if [[ -f patch-coverage.md ]]; then
+ echo "PATCH_COVERAGE_SUMMARY<> $GITHUB_ENV
+ cat patch-coverage.md >> $GITHUB_ENV
+ echo "EOF" >> $GITHUB_ENV
+ echo "✅ Patch coverage markdown summary ready"
+ else
+ echo "⚠️ patch-coverage.md not generated"
+ echo "PATCH_COVERAGE_SUMMARY=Patch coverage report could not be generated." >> $GITHUB_ENV
+ fi
+
+ - name: Save coverage data for comment
+ run: |
+ mkdir -p coverage-comment-data
+ jq -n \
+ --arg pr_number "${{ github.event.pull_request.number }}" \
+ --arg coverage_percentage "${{ env.COVERAGE_PERCENTAGE }}" \
+ --arg covered_lines "${{ env.COVERED_LINES }}" \
+ --arg total_lines "${{ env.TOTAL_LINES }}" \
+ --arg patch_coverage_pct "${{ env.PATCH_COVERAGE_PCT }}" \
+ --arg low_coverage_files "${{ env.LOW_COVERAGE_FILES }}" \
+ --arg patch_coverage_summary "${{ env.PATCH_COVERAGE_SUMMARY }}" \
+ --arg ado_url "${{ env.ADO_URL }}" \
+ '{
+ pr_number: $pr_number,
+ coverage_percentage: $coverage_percentage,
+ covered_lines: $covered_lines,
+ total_lines: $total_lines,
+ patch_coverage_pct: $patch_coverage_pct,
+ low_coverage_files: $low_coverage_files,
+ patch_coverage_summary: $patch_coverage_summary,
+ ado_url: $ado_url
+ }' > coverage-comment-data/pr-info.json
+
+ # Validate JSON before uploading
+ echo "Validating generated JSON..."
+ jq . coverage-comment-data/pr-info.json > /dev/null || {
+ echo "❌ Invalid JSON generated"
+ cat coverage-comment-data/pr-info.json
+ exit 1
+ }
+ echo "✅ JSON validation successful"
+ cat coverage-comment-data/pr-info.json
+
+ - name: Upload coverage comment data
+ # Only upload artifact for forked PRs since same-repo PRs post comment directly
+ # This prevents unnecessary workflow_run triggers for same-repo PRs
+ if: github.event.pull_request.head.repo.full_name != github.repository
+ uses: actions/upload-artifact@v4
+ with:
+ name: coverage-comment-data
+ path: coverage-comment-data/
+ retention-days: 7
+
+ - name: Comment coverage summary on PR
+ # Skip for forked PRs due to token permission restrictions
+ if: github.event.pull_request.head.repo.full_name == github.repository
+ uses: ./.github/actions/post-coverage-comment
+ with:
+ pr_number: ${{ github.event.pull_request.number }}
+ coverage_percentage: ${{ env.COVERAGE_PERCENTAGE }}
+ covered_lines: ${{ env.COVERED_LINES }}
+ total_lines: ${{ env.TOTAL_LINES }}
+ patch_coverage_pct: ${{ env.PATCH_COVERAGE_PCT }}
+ low_coverage_files: ${{ env.LOW_COVERAGE_FILES }}
+ patch_coverage_summary: ${{ env.PATCH_COVERAGE_SUMMARY }}
+ ado_url: ${{ env.ADO_URL }}
\ No newline at end of file
diff --git a/.github/workflows/pr-format-check.yml b/.github/workflows/pr-format-check.yml
index 48e3b6e9c..55c3129d6 100644
--- a/.github/workflows/pr-format-check.yml
+++ b/.github/workflows/pr-format-check.yml
@@ -57,9 +57,9 @@ jobs:
// Extract the summary content
const summaryContent = summaryMatch[1];
- // Remove all HTML comments including the template placeholder
+ // Remove all HTML comments including unclosed ones (template placeholders)
const contentWithoutComments =
- summaryContent.replace(//g, '');
+ summaryContent.replace(/|$)/g, '');
// Remove whitespace and check if there's actual text content
const trimmedContent = contentWithoutComments.trim();
@@ -94,24 +94,35 @@ jobs:
labelToAdd = 'pr-size: large';
}
- // Remove existing size labels if any
+ // Get existing labels
const existingLabels = pr.labels.map(l => l.name);
const sizeLabels = ['pr-size: small', 'pr-size: medium', 'pr-size: large'];
- for (const label of existingLabels) {
- if (sizeLabels.includes(label)) {
+
+ // Find current size label (if any)
+ const currentSizeLabel = existingLabels.find(label => sizeLabels.includes(label));
+
+ // Only make changes if the label needs to be updated
+ if (currentSizeLabel !== labelToAdd) {
+ console.log(`Current size label: ${currentSizeLabel || 'none'}`);
+ console.log(`Required size label: ${labelToAdd} (Total changes: ${totalChanges})`);
+
+ // Remove existing size label if different from required
+ if (currentSizeLabel) {
+ console.log(`Removing outdated label: ${currentSizeLabel}`);
await github.rest.issues.removeLabel({
...context.repo,
issue_number: pr.number,
- name: label,
+ name: currentSizeLabel,
});
}
- }
- // Add new size label
- await github.rest.issues.addLabels({
- ...context.repo,
- issue_number: pr.number,
- labels: [labelToAdd],
- });
-
- console.log(`Added label: ${labelToAdd} (Total changes: ${totalChanges})`);
+ // Add new size label
+ console.log(`Adding new label: ${labelToAdd}`);
+ await github.rest.issues.addLabels({
+ ...context.repo,
+ issue_number: pr.number,
+ labels: [labelToAdd],
+ });
+ } else {
+ console.log(`Label already correct: ${labelToAdd} (Total changes: ${totalChanges}) - no changes needed`);
+ }
diff --git a/.gitignore b/.gitignore
index ccbdf8930..3069e19d4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,3 @@
-# Ignore all files in the pybind/build directory
-mssql_python/pybind/build/
-
# Ignore pycache files and folders
__pycache__/
**/__pycache__/
@@ -23,6 +20,7 @@ test-*.xml
# Ignore the build & mssql_python.egg-info directories
build/
+**/build/
mssql_python.egg-info/
# Python bytecode
@@ -46,4 +44,22 @@ build/
*.swp
# .DS_Store files
-.DS_Store
\ No newline at end of file
+.DS_Store
+
+# wheel files
+*.whl
+*.tar.gz
+*.zip
+
+# Dockerfiles and images (root only)
+/Dockerfile*
+/docker-compose.yml
+/docker-compose.override.yml
+/docker-compose.*.yml
+
+# Virtual environments
+*venv*/
+**/*venv*/
+
+# learning files
+learnings/
diff --git a/.vscode/extensions.json b/.vscode/extensions.json
new file mode 100644
index 000000000..5b1765667
--- /dev/null
+++ b/.vscode/extensions.json
@@ -0,0 +1,20 @@
+{
+ "recommendations": [
+ // Python extensions - Code formatting and linting
+ "ms-python.python",
+ "ms-python.vscode-pylance",
+ "ms-python.black-formatter",
+ "ms-python.autopep8",
+ "ms-python.pylint",
+ "ms-python.flake8",
+ // C++ extensions - Code formatting and linting
+ "ms-vscode.cpptools",
+ "ms-vscode.cpptools-extension-pack",
+ "xaver.clang-format",
+ "mine.cpplint",
+ ],
+ "unwantedRecommendations": [
+ // Avoid conflicts with multiple formatters
+ "ms-vscode.cpptools-themes"
+ ]
+}
\ No newline at end of file
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 000000000..f4e2ca119
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,252 @@
+{
+ "C_Cpp_Runner.cCompilerPath": "gcc",
+ "C_Cpp_Runner.cppCompilerPath": "g++",
+ "C_Cpp_Runner.debuggerPath": "gdb",
+ "C_Cpp_Runner.cStandard": "",
+ "C_Cpp_Runner.cppStandard": "",
+ "C_Cpp_Runner.msvcBatchPath": "C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Auxiliary/Build/vcvarsall.bat",
+ "C_Cpp_Runner.useMsvc": false,
+ "C_Cpp_Runner.warnings": [
+ "-Wall",
+ "-Wextra",
+ "-Wpedantic",
+ "-Wshadow",
+ "-Wformat=2",
+ "-Wcast-align",
+ "-Wconversion",
+ "-Wsign-conversion",
+ "-Wnull-dereference"
+ ],
+ "C_Cpp_Runner.msvcWarnings": [
+ "/W4",
+ "/permissive-",
+ "/w14242",
+ "/w14287",
+ "/w14296",
+ "/w14311",
+ "/w14826",
+ "/w44062",
+ "/w44242",
+ "/w14905",
+ "/w14906",
+ "/w14263",
+ "/w44265",
+ "/w14928"
+ ],
+ "C_Cpp_Runner.enableWarnings": true,
+ "C_Cpp_Runner.warningsAsError": false,
+ "C_Cpp_Runner.compilerArgs": [],
+ "C_Cpp_Runner.linkerArgs": [],
+ "C_Cpp_Runner.includePaths": [],
+ "C_Cpp_Runner.includeSearch": [
+ "*",
+ "**/*"
+ ],
+ "C_Cpp_Runner.excludeSearch": [
+ "**/build",
+ "**/build/**",
+ "**/.*",
+ "**/.*/**",
+ "**/.vscode",
+ "**/.vscode/**"
+ ],
+ "C_Cpp_Runner.useAddressSanitizer": false,
+ "C_Cpp_Runner.useUndefinedSanitizer": false,
+ "C_Cpp_Runner.useLeakSanitizer": false,
+ "C_Cpp_Runner.showCompilationTime": false,
+ "C_Cpp_Runner.useLinkTimeOptimization": false,
+ "C_Cpp_Runner.msvcSecureNoWarnings": false,
+ "python.testing.pytestArgs": [
+ "mssql_python"
+ ],
+ "python.testing.unittestEnabled": false,
+ "python.testing.pytestEnabled": true,
+ "files.associations": {
+ "stdexcept": "cpp",
+ "array": "cpp",
+ "atomic": "cpp",
+ "bit": "cpp",
+ "cctype": "cpp",
+ "charconv": "cpp",
+ "chrono": "cpp",
+ "clocale": "cpp",
+ "cmath": "cpp",
+ "codecvt": "cpp",
+ "compare": "cpp",
+ "concepts": "cpp",
+ "condition_variable": "cpp",
+ "cstdarg": "cpp",
+ "cstddef": "cpp",
+ "cstdint": "cpp",
+ "cstdio": "cpp",
+ "cstdlib": "cpp",
+ "ctime": "cpp",
+ "cwchar": "cpp",
+ "cwctype": "cpp",
+ "deque": "cpp",
+ "string": "cpp",
+ "unordered_map": "cpp",
+ "vector": "cpp",
+ "exception": "cpp",
+ "algorithm": "cpp",
+ "functional": "cpp",
+ "iterator": "cpp",
+ "memory": "cpp",
+ "memory_resource": "cpp",
+ "numeric": "cpp",
+ "optional": "cpp",
+ "random": "cpp",
+ "ratio": "cpp",
+ "string_view": "cpp",
+ "system_error": "cpp",
+ "tuple": "cpp",
+ "type_traits": "cpp",
+ "utility": "cpp",
+ "format": "cpp",
+ "fstream": "cpp",
+ "initializer_list": "cpp",
+ "iomanip": "cpp",
+ "iosfwd": "cpp",
+ "iostream": "cpp",
+ "istream": "cpp",
+ "limits": "cpp",
+ "mutex": "cpp",
+ "new": "cpp",
+ "numbers": "cpp",
+ "ostream": "cpp",
+ "semaphore": "cpp",
+ "span": "cpp",
+ "sstream": "cpp",
+ "stop_token": "cpp",
+ "streambuf": "cpp",
+ "text_encoding": "cpp",
+ "thread": "cpp",
+ "typeinfo": "cpp",
+ "variant": "cpp",
+ "list": "cpp",
+ "complex": "cpp",
+ "cstring": "cpp",
+ "forward_list": "cpp",
+ "map": "cpp",
+ "set": "cpp",
+ "unordered_set": "cpp",
+ "ranges": "cpp",
+ "typeindex": "cpp",
+ "valarray": "cpp",
+ "bitset": "cpp",
+ "regex": "cpp",
+ "xlocale": "cpp",
+ "filesystem": "cpp",
+ "ios": "cpp",
+ "locale": "cpp",
+ "stack": "cpp",
+ "xfacet": "cpp",
+ "xhash": "cpp",
+ "xiosbase": "cpp",
+ "xlocbuf": "cpp",
+ "xlocinfo": "cpp",
+ "xlocmes": "cpp",
+ "xlocmon": "cpp",
+ "xlocnum": "cpp",
+ "xloctime": "cpp",
+ "xmemory": "cpp",
+ "xstring": "cpp",
+ "xtr1common": "cpp",
+ "xtree": "cpp",
+ "xutility": "cpp"
+ },
+ "cmake.sourceDirectory": "C:/Users/jathakkar/OneDrive - Microsoft/Documents/Github_mssql_python/New/mssql-python/mssql_python/pybind",
+ "python.linting.pylintEnabled": true,
+ "python.linting.enabled": true,
+ "python.linting.pylintArgs": [
+ "--disable=fixme,no-member,too-many-arguments,too-many-positional-arguments,invalid-name,useless-parent-delegation"
+ ],
+ "C_Cpp.cppStandard": "c++14",
+ "C_Cpp.clang_format_style": "file",
+ "C_Cpp.clang_format_path": "clang-format",
+ // Auto-format on save
+ "editor.formatOnSave": true,
+ "editor.formatOnPaste": false,
+ "editor.formatOnType": false,
+ // Python formatting (using Black - Microsoft's recommended formatter)
+ "[python]": {
+ "editor.formatOnSave": true,
+ "editor.defaultFormatter": "ms-python.black-formatter",
+ "editor.codeActionsOnSave": {
+ "source.organizeImports": "explicit"
+ }
+ },
+ // Black formatter settings (following Microsoft guidelines)
+ "black-formatter.args": [
+ "--line-length=100"
+ ],
+ // Python linting
+ "python.linting.flake8Enabled": true,
+ "python.linting.flake8Args": [
+ "--max-line-length=100",
+ "--extend-ignore=E203,W503"
+ ],
+ // C++ formatting (using clang-format with .clang-format file)
+ "[cpp]": {
+ "editor.formatOnSave": true,
+ "editor.defaultFormatter": "xaver.clang-format",
+ "editor.formatOnPaste": false,
+ "editor.formatOnType": false,
+ "editor.codeActionsOnSave": {
+ "source.fixAll": "explicit"
+ }
+ },
+ "[c]": {
+ "editor.formatOnSave": true,
+ "editor.defaultFormatter": "ms-vscode.cpptools",
+ "editor.formatOnPaste": false,
+ "editor.formatOnType": false,
+ "editor.codeActionsOnSave": {
+ "source.fixAll": "explicit"
+ }
+ },
+ "[h]": {
+ "editor.formatOnSave": true,
+ "editor.defaultFormatter": "ms-vscode.cpptools"
+ },
+ "[hpp]": {
+ "editor.formatOnSave": true,
+ "editor.defaultFormatter": "ms-vscode.cpptools"
+ },
+ // C++ IntelliSense settings
+ "C_Cpp.formatting": "clangFormat",
+ "C_Cpp.clang_format_fallbackStyle": "LLVM",
+ "C_Cpp.clang_format_sortIncludes": true,
+ // Disable conflicting formatters
+ "clang-format.executable": "",
+ "clang-format.style": "file",
+ // C++ Linting with cpplint
+ "cpplint.cpplintPath": "python3 -m cpplint",
+ "cpplint.lintMode": "workspace",
+ "cpplint.filters": [
+ "-legal/copyright",
+ "-build/include_subdir",
+ "-build/c++11"
+ ],
+ "cpplint.lineLength": 100,
+ // Python type checking (Pylance) - Microsoft's recommended settings
+ "python.analysis.typeCheckingMode": "basic",
+ "python.analysis.autoImportCompletions": true,
+ "python.analysis.diagnosticMode": "workspace",
+ "python.analysis.inlayHints.functionReturnTypes": true,
+ "python.analysis.inlayHints.variableTypes": true,
+ "python.analysis.inlayHints.parameterTypes": true,
+ // Additional Python analysis settings
+ "python.analysis.diagnosticSeverityOverrides": {
+ "reportMissingTypeStubs": "none",
+ "reportUnknownMemberType": "none",
+ "reportUnknownVariableType": "none",
+ "reportUnknownArgumentType": "none",
+ "reportGeneralTypeIssues": "warning",
+ "reportOptionalMemberAccess": "warning",
+ "reportOptionalSubscript": "warning",
+ "reportPrivateUsage": "warning",
+ "reportUnusedImport": "information",
+ "reportUnusedVariable": "warning"
+ }
+}
\ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 836a0a794..4288fcb5a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -39,4 +39,4 @@ All pull requests must include:
- **Meaningful Summary**: Include a clear description of your changes under the "### Summary" section in the PR description (minimum 10 characters)
- **Issue/Work Item Link** (only one required):
- External contributors: Link to a GitHub issue
- - Microsoft org members: Link to an ADO work item
\ No newline at end of file
+ - Microsoft org members: Link to an ADO work item
diff --git a/OneBranchPipelines/build-release-package-pipeline.yml b/OneBranchPipelines/build-release-package-pipeline.yml
new file mode 100644
index 000000000..550d317df
--- /dev/null
+++ b/OneBranchPipelines/build-release-package-pipeline.yml
@@ -0,0 +1,467 @@
+# =========================================================================================
+# OneBranch Release Pipeline for mssql-python
+# =========================================================================================
+# Builds Python wheels for all supported platforms with SDL compliance:
+# - Windows: Python 3.10-3.14 (x64 + ARM64)
+# - macOS: Python 3.10-3.14 (Universal2 = x86_64 + ARM64 in single binary)
+# - Linux: Python 3.10-3.14 on manylinux/musllinux (x86_64 + ARM64)
+#
+# Security Features:
+# - ESRP code signing (Windows .pyd files only)
+# - ESRP malware scanning (all artifacts)
+# - Component Governance (dependency scanning)
+# - BinSkim (binary security analysis)
+# - CredScan (credential leak detection)
+# - PoliCheck (inclusive language scanning)
+# - CodeQL (static code analysis)
+# - SBOM generation (Software Bill of Materials)
+# =========================================================================================
+
+# Build number format: YYDDD.r (YY=year, DDD=day of year, r=revision)
+# Example: 24365.1 = 2024, day 365, revision 1
+name: $(Year:YY)$(DayOfYear)$(Rev:.r)
+
+# =========================
+# PIPELINE TRIGGERS
+# =========================
+# Trigger on commits to main branch
+trigger:
+ branches:
+ include:
+ - main
+
+# Trigger on pull requests to main branch
+pr:
+ branches:
+ include:
+ - main
+
+# Schedule: Daily builds at 07:00 AM IST (01:30 UTC)
+# Cron format: minute hour day month weekday
+# always:true = run even if no code changes
+schedules:
+ - cron: "30 1 * * *"
+ displayName: Daily run at 07:00 AM IST
+ branches:
+ include:
+ - main
+ always: true
+
+# =========================
+# PIPELINE PARAMETERS
+# =========================
+parameters:
+ # OneBranch build type determines compliance level
+ # - Official: Production builds with full SDL compliance, all security scanning enabled
+ # - NonOfficial: Development/test builds with reduced security scanning
+ # Note: Scheduled (daily) builds automatically use 'Official' regardless of this setting
+ - name: oneBranchType
+ displayName: 'OneBranch Template Type'
+ type: string
+ values:
+ - 'Official'
+ - 'NonOfficial'
+ default: 'NonOfficial'
+
+ # Enable/disable SDL security tasks (BinSkim, CredScan, PoliCheck, etc.)
+ # Set to false for faster builds during development
+ - name: runSdlTasks
+ displayName: 'Run SDL Security Tasks'
+ type: boolean
+ default: true
+
+ # =========================
+ # PLATFORM CONFIGURATIONS
+ # =========================
+ # Each platform uses different matrix strategy:
+ # - Windows: Explicit per-version stages (9 stages for x64/ARM64 combos)
+ # - macOS: Explicit per-version stages (5 stages for universal2 builds)
+ # - Linux: Per-distro stages, builds ALL Python versions in loop (4 stages)
+
+ # Windows Configuration Matrix
+ # Each entry creates separate stage: Win_py_
+ # pyVer format: '310' = Python 3.10, '314' = Python 3.14
+ # arch: 'x64' (Intel/AMD 64-bit) or 'arm64' (ARM64, cross-compiled on x64)
+ # Note: ARM64 builds use x64 host with ARM64 python.lib for cross-compilation
+ - name: windowsConfigs
+ type: object
+ default:
+ # x64 builds (5 versions: 3.10-3.14)
+ - pyVer: '310'
+ arch: 'x64'
+ - pyVer: '311'
+ arch: 'x64'
+ - pyVer: '312'
+ arch: 'x64'
+ - pyVer: '313'
+ arch: 'x64'
+ - pyVer: '314'
+ arch: 'x64'
+ # ARM64 builds (4 versions: 3.11-3.14)
+ # 3.10 excluded due to limited ARM64 support
+ - pyVer: '311'
+ arch: 'arm64'
+ - pyVer: '312'
+ arch: 'arm64'
+ - pyVer: '313'
+ arch: 'arm64'
+ - pyVer: '314'
+ arch: 'arm64'
+
+ # macOS Configuration Matrix
+ # Each entry creates separate stage: MacOS_py
+ # All builds are Universal2 (x86_64 + ARM64 in single binary)
+ # pyVer format: '310' = Python 3.10, '314' = Python 3.14
+ - name: macosConfigs
+ type: object
+ default:
+ # 5 versions: 3.10-3.14 (all universal2)
+ - pyVer: '310'
+ - pyVer: '311'
+ - pyVer: '312'
+ - pyVer: '313'
+ - pyVer: '314'
+
+ # Linux Configuration Matrix
+ # Each entry creates ONE stage that builds ALL Python versions (3.10-3.14)
+ # tag: 'manylinux' (glibc-based, e.g., Ubuntu/CentOS) or 'musllinux' (musl-based, e.g., Alpine)
+ # arch: CPU architecture for Docker platform
+ # platform: Docker platform identifier for multi-arch builds
+ - name: linuxConfigs
+ type: object
+ default:
+ # manylinux (glibc-based) for x86_64 and ARM64
+ - { tag: 'manylinux', arch: 'x86_64', platform: 'linux/amd64' }
+ - { tag: 'manylinux', arch: 'aarch64', platform: 'linux/arm64' }
+ # musllinux (musl-based) for x86_64 and ARM64
+ - { tag: 'musllinux', arch: 'x86_64', platform: 'linux/amd64' }
+ - { tag: 'musllinux', arch: 'aarch64', platform: 'linux/arm64' }
+
+# =========================
+# PIPELINE VARIABLES
+# =========================
+variables:
+ # Determine effective build type: scheduled builds are Official, manual/PR builds use parameter
+ # Build.Reason values: Schedule, Manual, IndividualCI, PullRequest, BatchedCI
+ - name: effectiveOneBranchType
+ ${{ if eq(variables['Build.Reason'], 'Schedule') }}:
+ value: 'Official'
+ ${{ else }}:
+ value: '${{ parameters.oneBranchType }}'
+
+ # Variable template imports
+ # Each file provides specific variable groups:
+ # - common-variables: Shared across all builds (paths, flags)
+ # - onebranch-variables: OneBranch-specific settings (SDL, compliance)
+ # - build-variables: Build configuration (compiler flags, options)
+ # - signing-variables: ESRP signing credentials and settings
+ # - symbol-variables: Debug symbol publishing configuration
+ - template: /OneBranchPipelines/variables/common-variables.yml@self
+ - template: /OneBranchPipelines/variables/onebranch-variables.yml@self
+ - template: /OneBranchPipelines/variables/build-variables.yml@self
+ - template: /OneBranchPipelines/variables/signing-variables.yml@self
+ - template: /OneBranchPipelines/variables/symbol-variables.yml@self
+
+ # Variable group from Azure DevOps Library
+ # Contains ESRP service connection credentials:
+ # - SigningEsrpConnectedServiceName
+ # - SigningAppRegistrationClientId
+ # - SigningAppRegistrationTenantId
+ # - SigningEsrpClientId
+ # - DB_PASSWORD (SQL Server SA password for testing)
+ - group: 'ESRP Federated Creds (AME)'
+
+# =========================
+# ONEBRANCH RESOURCES
+# =========================
+# OneBranch.Pipelines/GovernedTemplates repository contains:
+# - SDL compliance templates (BinSkim, CredScan, PoliCheck, etc.)
+# - Security scanning templates (ESRP, Component Governance)
+# - Artifact publishing templates (OneBranch-compliant artifact handling)
+resources:
+ repositories:
+ - repository: templates
+ type: git
+ name: 'OneBranch.Pipelines/GovernedTemplates'
+ ref: 'refs/heads/main'
+
+# =========================
+# PIPELINE TEMPLATE EXTENSION
+# =========================
+# Extends OneBranch official template for cross-platform builds
+# Template type determined by effectiveOneBranchType:
+# - Scheduled builds: Always Official (full SDL compliance)
+# - Manual/PR builds: Uses oneBranchType parameter (default NonOfficial)
+extends:
+ template: 'v2/OneBranch.${{ variables.effectiveOneBranchType }}.CrossPlat.yml@templates'
+
+ # =========================
+ # ONEBRANCH TEMPLATE PARAMETERS
+ # =========================
+ parameters:
+ # Pool Configuration
+ # Different platforms use different agent pools:
+ # - Windows: Custom 1ES pool (Django-1ES-pool) with WIN22-SQL22 image (Windows Server 2022 + SQL Server 2022)
+ # - Linux: Custom 1ES pool (Django-1ES-pool) with ADO-UB22-SQL22 image (Ubuntu 22.04 + SQL Server 2022)
+ # - macOS: Microsoft-hosted pool (Azure Pipelines) with macOS-14 image (macOS Sonoma)
+ # Note: Container definitions section present but unused (pools configured in individual stage templates)
+
+ # Feature Flags
+ # Controls OneBranch platform behavior
+ featureFlags:
+ # Use Windows Server 2022 base image for Windows builds
+ WindowsHostVersion:
+ Version: '2022'
+ # Enable BinSkim scanning for all supported file extensions
+ # Without this, only .dll/.exe scanned (misses .pyd Python extensions)
+ binskimScanAllExtensions: true
+
+ # =========================
+ # GLOBAL SDL CONFIGURATION
+ # =========================
+ # SDL = Security Development Lifecycle
+ # Comprehensive security scanning across all build stages
+ # See: https://aka.ms/obpipelines/sdl
+ globalSdl:
+ # Global Guardian baseline and suppression files
+ # Baseline = known issues that are being tracked
+ # Suppression = false positives that should be ignored
+ baseline:
+ baselineFile: $(Build.SourcesDirectory)/.gdn/.gdnbaselines
+ suppressionSet: default
+ suppression:
+ suppressionFile: $(Build.SourcesDirectory)/.gdn/.gdnsuppress
+ suppressionSet: default
+
+ # ApiScan - Scans APIs for security vulnerabilities
+ # Disabled: Requires PDB symbols for Windows DLLs
+ # Python wheels (.pyd files) better covered by BinSkim
+ # Justification: JDBC team also disables APIScan for similar reasons
+ apiscan:
+ enabled: false
+ justificationForDisabling: 'APIScan requires PDB symbols for native Windows DLLs. Python wheels primarily contain .pyd files and Python code, better covered by BinSkim. JDBC team also has APIScan disabled for similar reasons.'
+
+ # Armory - Security scanning for binaries
+ # Checks for known vulnerabilities in compiled artifacts
+ # break:true = fail build if critical issues found
+ armory:
+ enabled: ${{ parameters.runSdlTasks }}
+ break: true
+
+ # AsyncSdl - Asynchronous SDL tasks (run after build completion)
+ # Disabled: All SDL tasks run synchronously during build
+ asyncSdl:
+ enabled: false
+
+ # BinSkim - Binary security analyzer (Microsoft tool)
+ # Scans compiled binaries for security best practices:
+ # - Stack buffer overrun protection (/GS)
+ # - DEP (Data Execution Prevention)
+ # - ASLR (Address Space Layout Randomization)
+ # - Control Flow Guard (CFG)
+ # Scans: .pyd (Python), .dll/.exe (Windows), .so (Linux), .dylib (macOS)
+ binskim:
+ enabled: ${{ parameters.runSdlTasks }}
+ break: true # Fail build on critical BinSkim errors
+ # Recursive scan of all binary file types
+ analyzeTarget: '$(Build.SourcesDirectory)/**/*.{pyd,dll,exe,so,dylib}'
+ analyzeRecurse: true
+ # SARIF output (Static Analysis Results Interchange Format)
+ logFile: '$(Build.ArtifactStagingDirectory)/BinSkimResults.sarif'
+
+ # CodeInspector - Source code security analysis
+ # Checks Python/C++ code for security anti-patterns
+ codeinspector:
+ enabled: ${{ parameters.runSdlTasks }}
+ logLevel: Error
+
+ # CodeQL - Semantic code analysis (GitHub Advanced Security)
+ # Deep analysis of Python and C++ code:
+ # - SQL injection vulnerabilities
+ # - Buffer overflows
+ # - Use-after-free
+ # - Integer overflows
+ # security-extended suite = comprehensive security queries
+ codeql:
+ enabled: ${{ parameters.runSdlTasks }}
+ language: 'python,cpp'
+ sourceRoot: '$(REPO_ROOT)'
+ querySuite: security-extended
+
+ # CredScan - Credential scanner
+ # Detects hardcoded credentials, API keys, passwords in code
+ # Uses global baseline/suppression files configured above
+ credscan:
+ enabled: ${{ parameters.runSdlTasks }}
+
+ # ESLint - JavaScript/TypeScript linter
+ # Disabled: Not applicable to Python/C++ project
+ eslint:
+ enabled: false
+
+ # PoliCheck - Political correctness checker
+ # Scans code and documentation for inappropriate terms
+ # Exclusion file contains approved exceptions (technical terms)
+ policheck:
+ enabled: ${{ parameters.runSdlTasks }}
+ break: true
+ exclusionFile: '$(REPO_ROOT)/.config/PolicheckExclusions.xml'
+
+ # Roslyn Analyzers - .NET C# code analysis
+ # Disabled: Not applicable to Python/C++ project
+ roslyn:
+ enabled: false
+
+ # Publish SDL Logs
+ # Uploads security scan results (SARIF files) to pipeline artifacts
+ # Used for audit trail and compliance reporting
+ publishLogs:
+ enabled: ${{ parameters.runSdlTasks }}
+
+ # SBOM - Software Bill of Materials
+ # Generates machine-readable list of all dependencies
+ # Required for supply chain security and compliance
+ # Format: SPDX or CycloneDX
+ # Version automatically detected from wheel metadata (setup.py)
+ sbom:
+ enabled: ${{ parameters.runSdlTasks }}
+ packageName: 'mssql-python'
+
+ # TSA - Threat and Security Assessment
+ # Uploads scan results to Microsoft's TSA tool for tracking
+ # Only enabled for Official builds (production compliance requirement)
+ tsa:
+ enabled: ${{ and(eq(variables.effectiveOneBranchType, 'Official'), parameters.runSdlTasks) }}
+ configFile: '$(REPO_ROOT)/.config/tsaoptions.json'
+
+ # =========================
+ # PIPELINE STAGES
+ # =========================
+ # Total stages: 9 Windows + 5 macOS + 4 Linux + 1 Consolidate = 19 stages
+ # Stages run in parallel (no dependencies between platform builds)
+ stages:
+ # =========================
+ # WINDOWS BUILD STAGES
+ # =========================
+ # Strategy: Explicit stage per Python version × architecture
+ # Total: 9 stages (5 x64 + 4 ARM64)
+ # Python versions: 3.10-3.14 (x64), 3.11-3.14 (ARM64)
+ # Each stage:
+ # 1. Installs Python (UsePythonVersion or NuGet for 3.14)
+ # 2. Downloads ARM64 python.lib if cross-compiling
+ # 3. Builds .pyd native extension
+ # 4. Runs pytest (x64 only, ARM64 can't execute on x64 host)
+ # 5. Builds wheel
+ # 6. Publishes artifacts (wheels + PYD + PDB)
+ # 7. ESRP malware scanning
+ - ${{ each config in parameters.windowsConfigs }}:
+ - template: /OneBranchPipelines/stages/build-windows-single-stage.yml@self
+ parameters:
+ stageName: Win_py${{ config.pyVer }}_${{ config.arch }}
+ jobName: BuildWheel
+ # Convert pyVer '310' → pythonVersion '3.10'
+ pythonVersion: ${{ format('{0}.{1}', substring(config.pyVer, 0, 1), substring(config.pyVer, 1, 2)) }}
+ shortPyVer: ${{ config.pyVer }}
+ architecture: ${{ config.arch }}
+ oneBranchType: '${{ variables.effectiveOneBranchType }}'
+
+ # =========================
+ # MACOS BUILD STAGES
+ # =========================
+ # Strategy: Explicit stage per Python version
+ # Total: 5 stages (3.10-3.14)
+ # All builds are Universal2 (x86_64 + ARM64 in single .so binary)
+ # Each stage:
+ # 1. Installs Python via UsePythonVersion@0
+ # 2. Installs CMake and pybind11
+ # 3. Builds universal2 .so (ARCHFLAGS="-arch x86_64 -arch arm64")
+ # 4. Starts SQL Server Docker container (via Colima)
+ # 5. Runs pytest
+ # 6. Builds wheel
+ # 7. Publishes artifacts (wheels + .so)
+ # 8. ESRP malware scanning
+ - ${{ each config in parameters.macosConfigs }}:
+ - template: /OneBranchPipelines/stages/build-macos-single-stage.yml@self
+ parameters:
+ stageName: MacOS_py${{ config.pyVer }}
+ jobName: BuildWheel
+ # Convert pyVer '310' → pythonVersion '3.10'
+ pythonVersion: ${{ format('{0}.{1}', substring(config.pyVer, 0, 1), substring(config.pyVer, 1, 2)) }}
+ shortPyVer: ${{ config.pyVer }}
+ oneBranchType: '${{ variables.effectiveOneBranchType }}'
+
+ # =========================
+ # LINUX BUILD STAGES
+ # =========================
+ # Strategy: One stage per distribution × architecture
+ # Total: 4 stages (manylinux×2 + musllinux×2)
+ # Each stage builds ALL Python versions (3.10-3.14) in a loop
+ # Distributions:
+ # - manylinux: glibc-based (Ubuntu, CentOS, etc.)
+ # - musllinux: musl-based (Alpine Linux)
+ # Architectures: x86_64 (AMD/Intel), aarch64 (ARM64)
+ # Each stage:
+ # 1. Starts PyPA Docker container (manylinux_2_28 or musllinux_1_2)
+ # 2. Starts SQL Server Docker container
+ # 3. For each Python version (cp310-cp314):
+ # a. Builds .so native extension
+ # b. Builds wheel
+ # c. Installs wheel in isolated directory
+ # d. Runs pytest against SQL Server
+ # 4. Publishes artifacts (all 5 wheels)
+ # 5. Component Governance + AntiMalware scanning
+ - ${{ each config in parameters.linuxConfigs }}:
+ - template: /OneBranchPipelines/stages/build-linux-single-stage.yml@self
+ parameters:
+ stageName: Linux_${{ config.tag }}_${{ config.arch }}
+ jobName: BuildWheels
+ linuxTag: ${{ config.tag }}
+ arch: ${{ config.arch }}
+ dockerPlatform: ${{ config.platform }}
+ oneBranchType: '${{ variables.effectiveOneBranchType }}'
+
+ # =========================
+ # CONSOLIDATE STAGE
+ # =========================
+ # Purpose: Collect all artifacts from platform builds into single dist/ folder
+ # Dependencies: All 18 build stages (9 Windows + 5 macOS + 4 Linux)
+ # Stages run in parallel, Consolidate waits for ALL to complete
+ # Outputs:
+ # - dist/wheels/*.whl (all platform wheels)
+ # - dist/bindings/Windows/*.{pyd,pdb} (Windows native extensions)
+ # - dist/bindings/macOS/*.so (macOS universal2 binaries)
+ # - dist/bindings/Linux/*.so (Linux native extensions)
+ # This stage also runs final BinSkim scan on all binaries
+ - stage: Consolidate
+ displayName: 'Consolidate All Artifacts'
+ dependsOn:
+ # Windows dependencies (9 stages)
+ - Win_py310_x64
+ - Win_py311_x64
+ - Win_py312_x64
+ - Win_py313_x64
+ - Win_py314_x64
+ - Win_py311_arm64
+ - Win_py312_arm64
+ - Win_py313_arm64
+ - Win_py314_arm64
+ # macOS dependencies (5 stages)
+ - MacOS_py310
+ - MacOS_py311
+ - MacOS_py312
+ - MacOS_py313
+ - MacOS_py314
+ # Linux dependencies (4 stages)
+ - Linux_manylinux_x86_64
+ - Linux_manylinux_aarch64
+ - Linux_musllinux_x86_64
+ - Linux_musllinux_aarch64
+ jobs:
+ - template: /OneBranchPipelines/jobs/consolidate-artifacts-job.yml@self
+ parameters:
+ # CRITICAL: Use effectiveOneBranchType to ensure scheduled builds run as 'Official'
+ # Using parameters.oneBranchType would break scheduled builds (they'd run as 'NonOfficial')
+ oneBranchType: '${{ variables.effectiveOneBranchType }}'
+
+ # Note: Symbol publishing handled directly in Windows build stages
+ # PDB files uploaded to Microsoft Symbol Server for debugging
diff --git a/OneBranchPipelines/dummy-release-pipeline.yml b/OneBranchPipelines/dummy-release-pipeline.yml
new file mode 100644
index 000000000..51c5a3fd2
--- /dev/null
+++ b/OneBranchPipelines/dummy-release-pipeline.yml
@@ -0,0 +1,311 @@
+# OneBranch DUMMY/TEST Release Pipeline for mssql-python
+# ⚠️ THIS IS A TEST PIPELINE - NOT FOR PRODUCTION RELEASES ⚠️
+# Downloads wheel and symbol artifacts from build pipeline, publishes symbols, and performs dummy ESRP release for testing
+# Uses Maven ContentType instead of PyPI to avoid accidental production releases
+# This pipeline is ALWAYS NonOfficial - for testing only, not production
+
+name: $(Year:YY)$(DayOfYear)$(Rev:.r)-Dummy-Release
+
+# Manual trigger only - releases should be deliberate
+trigger: none
+pr: none
+
+# Parameters for DUMMY release pipeline
+parameters:
+ - name: publishSymbols
+ displayName: '[TEST] Publish Symbols to Symbol Servers'
+ type: boolean
+ default: false
+
+ - name: performDummyRelease
+ displayName: '[TEST] Perform Dummy ESRP Release (Maven - NOT PyPI)'
+ type: boolean
+ default: true # Safe to enable - uses Maven ContentType for testing
+
+# Variables
+variables:
+ # Common variables
+ - template: /OneBranchPipelines/variables/common-variables.yml@self
+ - template: /OneBranchPipelines/variables/onebranch-variables.yml@self
+
+ # Variable groups
+ - group: 'ESRP Federated Creds (AME)' # Contains ESRP signing credentials
+ - group: 'Symbols Publishing' # Contains SymbolServer, SymbolTokenUri variables
+
+# OneBranch resources
+resources:
+ repositories:
+ - repository: templates
+ type: git
+ name: 'OneBranch.Pipelines/GovernedTemplates'
+ ref: 'refs/heads/main'
+
+ # Reference to the build pipeline
+ pipelines:
+ - pipeline: buildPipeline
+ source: 'Build-Release-Package-Pipeline' # Name of the build pipeline
+ trigger: none # Manual trigger only
+
+# Extend OneBranch Nonofficial template
+# Always uses NonOfficial template for dummy pipeline
+extends:
+ template: 'v2/OneBranch.NonOfficial.CrossPlat.yml@templates'
+
+ parameters:
+ # Feature flags
+ featureFlags:
+ WindowsHostVersion:
+ Version: '2022'
+
+ # Global SDL Configuration
+ globalSdl:
+ # Global Guardian baseline and suppression files
+ baseline:
+ baselineFile: $(Build.SourcesDirectory)\.gdn\.gdnbaselines
+ suppressionSet: default
+ suppression:
+ suppressionFile: $(Build.SourcesDirectory)\.gdn\.gdnsuppress
+ suppressionSet: default
+
+ # Minimal SDL for release pipeline - artifacts already scanned during build
+ binskim:
+ enabled: true
+ break: true
+
+ credscan:
+ enabled: true
+
+ policheck:
+ enabled: true
+ break: true
+ exclusionFile: '$(REPO_ROOT)/.config/PolicheckExclusions.xml'
+
+ # Publish SDL logs
+ publishLogs:
+ enabled: true
+
+ # Pipeline stages
+ stages:
+ - stage: TestReleasePackages
+ displayName: '[TEST] Dummy Release - Testing ESRP Workflow'
+
+ jobs:
+ - job: DownloadAndTestRelease
+ displayName: '[TEST] Download Artifacts and Perform Dummy Release'
+
+ pool:
+ type: windows
+ isCustom: true
+ name: Django-1ES-pool
+ vmImage: WIN22-SQL22
+
+ variables:
+ ob_outputDirectory: '$(Build.ArtifactStagingDirectory)'
+
+ steps:
+ # Step 1: Download consolidated artifacts from build pipeline
+ - task: DownloadPipelineArtifact@2
+ displayName: '[TEST] Download Consolidated Artifacts from Build Pipeline'
+ inputs:
+ buildType: 'specific'
+ project: '$(System.TeamProject)'
+ definition: 2199 # Build-Release-Package-Pipeline definition ID
+ buildVersionToDownload: 'specific'
+ buildId: $(resources.pipeline.buildPipeline.runID) # Use the build run selected in UI
+ artifactName: 'drop_Consolidate_ConsolidateArtifacts' # Consolidated artifact with dist/ and symbols/
+ targetPath: '$(Build.SourcesDirectory)/artifacts'
+
+ # Step 3: List downloaded artifacts for verification
+ - task: PowerShell@2
+ displayName: '[TEST] List Downloaded Wheel and Symbol Files'
+ inputs:
+ targetType: 'inline'
+ script: |
+ Write-Host "====================================="
+ Write-Host "[TEST PIPELINE] Downloaded Artifacts:"
+ Write-Host "====================================="
+
+ # List wheel files
+ $wheelsPath = "$(Build.SourcesDirectory)/artifacts/dist"
+ if (Test-Path $wheelsPath) {
+ $wheels = Get-ChildItem -Path $wheelsPath -Filter "*.whl" -Recurse
+
+ Write-Host "`n[WHEELS] Total wheel files found: $($wheels.Count)"
+ foreach ($wheel in $wheels) {
+ $size = [math]::Round($wheel.Length / 1MB, 2)
+ Write-Host " - $($wheel.Name) (${size} MB)"
+ }
+
+ # Copy wheels to dist folder for ESRP
+ Write-Host "`nCopying wheels to $(Build.SourcesDirectory)/dist..."
+ New-Item -ItemType Directory -Force -Path "$(Build.SourcesDirectory)/dist" | Out-Null
+ Copy-Item -Path "$wheelsPath/*.whl" -Destination "$(Build.SourcesDirectory)/dist/" -Force
+
+ } else {
+ Write-Error "Wheel directory not found at: $wheelsPath"
+ exit 1
+ }
+
+ # List symbol files
+ $symbolsPath = "$(Build.SourcesDirectory)/artifacts/symbols"
+ if (Test-Path $symbolsPath) {
+ $symbols = Get-ChildItem -Path $symbolsPath -Filter "*.pdb" -Recurse
+
+ Write-Host "`n[SYMBOLS] Total PDB files found: $($symbols.Count)"
+ foreach ($symbol in $symbols) {
+ $size = [math]::Round($symbol.Length / 1KB, 2)
+ Write-Host " - $($symbol.Name) (${size} KB)"
+ }
+
+ # Copy symbols to symbols folder for publishing
+ Write-Host "`nCopying symbols to $(Build.SourcesDirectory)/symbols..."
+ New-Item -ItemType Directory -Force -Path "$(Build.SourcesDirectory)/symbols" | Out-Null
+ Copy-Item -Path "$symbolsPath/*.pdb" -Destination "$(Build.SourcesDirectory)/symbols/" -Force
+
+ } else {
+ Write-Warning "Symbol directory not found at: $symbolsPath"
+ Write-Warning "Symbol publishing will be skipped if no PDB files found"
+ }
+
+ Write-Host "`n====================================="
+ Write-Host "Summary:"
+ Write-Host "Wheels: $($wheels.Count) files"
+ Write-Host "Symbols: $(if ($symbols) { $symbols.Count } else { 0 }) files"
+ Write-Host "====================================="
+
+ # Step 4: Verify wheel integrity
+ - task: PowerShell@2
+ displayName: '[TEST] Verify Wheel Integrity'
+ inputs:
+ targetType: 'inline'
+ script: |
+ Write-Host "[TEST] Verifying wheel file integrity..."
+
+ $wheels = Get-ChildItem -Path "$(Build.SourcesDirectory)/dist" -Filter "*.whl"
+ $allValid = $true
+
+ foreach ($wheel in $wheels) {
+ # Check if wheel is a valid ZIP file
+ try {
+ Add-Type -AssemblyName System.IO.Compression.FileSystem
+ $zip = [System.IO.Compression.ZipFile]::OpenRead($wheel.FullName)
+ $entryCount = $zip.Entries.Count
+ $zip.Dispose()
+
+ Write-Host "✓ $($wheel.Name) - Valid ($entryCount entries)"
+ }
+ catch {
+ Write-Error "✗ $($wheel.Name) - INVALID: $_"
+ $allValid = $false
+ }
+ }
+
+ if (-not $allValid) {
+ Write-Error "One or more wheel files are corrupted"
+ exit 1
+ }
+
+ Write-Host "`nAll wheels verified successfully!"
+
+ # Step 5: Publish Symbols (if enabled and symbols exist)
+ - ${{ if eq(parameters.publishSymbols, true) }}:
+ - template: /OneBranchPipelines/steps/symbol-publishing-step.yml@self
+ parameters:
+ SymbolsFolder: '$(Build.SourcesDirectory)/symbols'
+
+ # Step 6: Copy wheels to ob_outputDirectory for OneBranch artifact publishing
+ - task: CopyFiles@2
+ displayName: '[TEST] Stage Wheels for Dummy Release'
+ inputs:
+ SourceFolder: '$(Build.SourcesDirectory)/dist'
+ Contents: '*.whl'
+ TargetFolder: '$(ob_outputDirectory)/release'
+ flattenFolders: true
+
+ # Step 7: ESRP Dummy Release Task (only if performDummyRelease is true)
+ # ⚠️ IMPORTANT: Uses Maven ContentType for testing - NOT PyPI!
+ - ${{ if eq(parameters.performDummyRelease, true) }}:
+ - task: EsrpRelease@9
+ displayName: '[TEST] ESRP Dummy Release (Maven - NOT PyPI)'
+ inputs:
+ connectedservicename: '$(ESRPConnectedServiceName)'
+ usemanagedidentity: true
+ keyvaultname: '$(AuthAKVName)'
+ signcertname: '$(AuthSignCertName)'
+ clientid: '$(EsrpClientId)'
+ Intent: 'PackageDistribution'
+ # ⚠️ CRITICAL: ContentType is Maven (NOT PyPI) for safe testing
+ # This ensures no accidental production releases to PyPI
+ ContentType: 'Maven'
+ ContentSource: 'Folder'
+ FolderLocation: '$(Build.SourcesDirectory)/dist'
+ WaitForReleaseCompletion: true
+ Owners: '$(owner)'
+ Approvers: '$(approver)'
+ ServiceEndpointUrl: 'https://api.esrp.microsoft.com'
+ MainPublisher: 'ESRPRELPACMAN'
+ DomainTenantId: '$(DomainTenantId)'
+
+ # Step 8: Show test release status
+ - ${{ if eq(parameters.performDummyRelease, true) }}:
+ - task: PowerShell@2
+ displayName: '[TEST] Dummy Release Summary'
+ inputs:
+ targetType: 'inline'
+ script: |
+ Write-Host "===================================="
+ Write-Host "⚠️ TEST PIPELINE - DUMMY RELEASE COMPLETED ⚠️"
+ Write-Host "===================================="
+ Write-Host "Package: mssql-python (TEST)"
+ Write-Host "ContentType: Maven (NOT PyPI - Safe for Testing)"
+ Write-Host "Owners: $(owner)"
+ Write-Host "Approvers: $(approver)"
+ Write-Host "Symbols Published: ${{ parameters.publishSymbols }}"
+ Write-Host "====================================="
+ Write-Host ""
+ Write-Host "⚠️ IMPORTANT: This was a DUMMY release using Maven ContentType"
+ Write-Host " NO packages were released to PyPI"
+ Write-Host ""
+ Write-Host "What was tested:"
+ Write-Host "✓ Artifact download from build pipeline"
+ Write-Host "✓ Wheel integrity verification"
+ if ("${{ parameters.publishSymbols }}" -eq "True") {
+ Write-Host "✓ Symbol publishing to SqlClientDrivers org"
+ }
+ Write-Host "✓ ESRP release workflow (Maven ContentType)"
+ Write-Host ""
+ Write-Host "Next steps:"
+ Write-Host "1. Verify dummy release in ESRP portal"
+ Write-Host "2. Check ESRP approval workflow completion"
+ Write-Host "3. Verify symbols in SqlClientDrivers org (if published)"
+ Write-Host "4. For PRODUCTION release, use official-release-pipeline.yml"
+ Write-Host "====================================="
+
+ - ${{ if eq(parameters.performDummyRelease, false) }}:
+ - task: PowerShell@2
+ displayName: '[TEST] Dry Run - Dummy Release Skipped'
+ inputs:
+ targetType: 'inline'
+ script: |
+ Write-Host "===================================="
+ Write-Host "⚠️ TEST PIPELINE - DRY RUN MODE ⚠️"
+ Write-Host "===================================="
+ Write-Host "Package: mssql-python (TEST)"
+ Write-Host ""
+ Write-Host "Actions performed:"
+ Write-Host "✓ Downloaded wheels from build pipeline"
+ Write-Host "✓ Verified wheel integrity"
+ Write-Host "✓ Downloaded symbols from build pipeline"
+ if ("${{ parameters.publishSymbols }}" -eq "True") {
+ Write-Host "✓ Published symbols to SqlClientDrivers org"
+ }
+ Write-Host "✗ ESRP dummy release NOT performed (parameter disabled)"
+ Write-Host ""
+ Write-Host "To test ESRP workflow:"
+ Write-Host "1. Set 'performDummyRelease' parameter to true"
+ Write-Host "2. Re-run this TEST pipeline"
+ Write-Host ""
+ Write-Host "For PRODUCTION release:"
+ Write-Host "1. Use official-release-pipeline.yml instead"
+ Write-Host "2. Official pipeline uses PyPI ContentType"
+ Write-Host "====================================="
diff --git a/OneBranchPipelines/github-ado-sync.yml b/OneBranchPipelines/github-ado-sync.yml
new file mode 100644
index 000000000..fd859b0a4
--- /dev/null
+++ b/OneBranchPipelines/github-ado-sync.yml
@@ -0,0 +1,138 @@
+# GitHub-to-ADO Sync Pipeline
+# Syncs main branch from public GitHub to internal Azure DevOps daily at 5pm IST
+#
+# SYNC STRATEGY RATIONALE:
+# This pipeline uses a "replace-all" approach rather than traditional git merge/rebase because:
+# 1. DIVERGENT HISTORY: ADO repository contains commits from early development that don't exist
+# in GitHub. These historical commits were made before GitHub became the source of truth.
+# 2. AVOIDING CONFLICTS: Standard git operations (merge, rebase, reset --hard) fail when
+# repositories have divergent commit histories. Attempting to merge results in conflicts
+# that cannot be automatically resolved.
+# 3. IMPLEMENTATION: We use 'git fetch + git rm + git checkout' to completely replace ADO's
+# working tree with GitHub's files without attempting to reconcile git history. This creates
+# a clean sync commit that updates all files to match GitHub exactly.
+# 4. CHANGE DETECTION: The pipeline checks if any files actually differ before creating PRs,
+# avoiding unnecessary sync operations when repositories are already aligned.
+
+name: GitHub-Sync-$(Date:yyyyMMdd)$(Rev:.r)
+
+schedules:
+ - cron: "30 11 * * *"
+ displayName: "Daily sync at 5pm IST"
+ branches:
+ include:
+ - main
+ always: true
+
+trigger: none
+pr: none
+
+jobs:
+- job: SyncFromGitHub
+ displayName: 'Sync main branch from GitHub'
+ pool:
+ vmImage: 'windows-latest'
+
+ steps:
+ - checkout: self
+ persistCredentials: true
+
+ - task: CmdLine@2
+ displayName: 'Add GitHub remote'
+ inputs:
+ script: |
+ git remote add github https://github.com/microsoft/mssql-python.git
+ git fetch github main
+
+ - task: CmdLine@2
+ displayName: 'Create timestamped sync branch'
+ inputs:
+ script: |
+ echo Getting current timestamp...
+ powershell -Command "Get-Date -Format 'yyyyMMdd-HHmmss'" > timestamp.txt
+ set /p TIMESTAMP= branchname.txt
+ echo Creating sync branch: %SYNC_BRANCH%
+ git checkout -b %SYNC_BRANCH%
+ echo ##vso[task.setvariable variable=SYNC_BRANCH;isOutput=true]%SYNC_BRANCH%
+
+ - task: CmdLine@2
+ displayName: 'Sync with GitHub main'
+ inputs:
+ script: |
+ echo Syncing with GitHub main...
+ git config user.email "sync@microsoft.com"
+ git config user.name "ADO Sync Bot"
+
+ git fetch github main
+ git rm -rf .
+ git checkout github/main -- .
+ echo timestamp.txt >> .git\info\exclude
+ echo branchname.txt >> .git\info\exclude
+ git diff --cached --quiet
+ if %ERRORLEVEL% EQU 0 (
+ echo No changes detected. Skipping commit.
+ echo ##vso[task.setvariable variable=HAS_CHANGES]false
+ ) else (
+ echo Changes detected. Creating commit...
+ git add . && git commit -m "Sync from GitHub main"
+ echo ##vso[task.setvariable variable=HAS_CHANGES]true
+ )
+
+ - task: CmdLine@2
+ displayName: 'Push branch to Azure DevOps'
+ condition: eq(variables['HAS_CHANGES'], 'true')
+ inputs:
+ script: |
+ set /p SYNC_BRANCH= pr_id.txt
+ set /p PR_ID=, macOS_, Linux_
+ # This downloads all of them automatically (27 total artifacts)
+ - task: DownloadPipelineArtifact@2
+ displayName: 'Download All Platform Artifacts'
+ inputs:
+ buildType: 'current'
+ targetPath: '$(Pipeline.Workspace)/all-artifacts'
+
+ # Consolidate all wheels into single dist/ directory
+ - bash: |
+ set -e
+ echo "Creating consolidated dist directory..."
+ mkdir -p $(ob_outputDirectory)/dist
+
+ echo "=========================================="
+ echo "Searching for all wheel files across all artifacts..."
+ echo "=========================================="
+
+ # List all downloaded artifacts
+ echo "Downloaded artifacts:"
+ ls -la $(Pipeline.Workspace)/all-artifacts/
+
+ echo ""
+ echo "Finding all .whl files..."
+ find $(Pipeline.Workspace)/all-artifacts -name "*.whl" -exec ls -lh {} \;
+
+ echo ""
+ echo "Copying all wheels to consolidated dist/..."
+ find $(Pipeline.Workspace)/all-artifacts -name "*.whl" -exec cp -v {} $(ob_outputDirectory)/dist/ \;
+
+ echo ""
+ echo "=========================================="
+ echo "Consolidation complete! Total wheels:"
+ echo "=========================================="
+ ls -lh $(ob_outputDirectory)/dist/
+ echo ""
+ WHEEL_COUNT=$(ls -1 $(ob_outputDirectory)/dist/*.whl 2>/dev/null | wc -l)
+ echo "Total wheel count: $WHEEL_COUNT"
+ echo "Expected: 27 wheels (7 Windows + 4 macOS + 16 Linux)"
+
+ if [ "$WHEEL_COUNT" -ne 27 ]; then
+ echo "WARNING: Expected 27 wheels but found $WHEEL_COUNT"
+ else
+ echo "SUCCESS: All 27 wheels consolidated!"
+ fi
+ displayName: 'Consolidate wheels from all platforms'
+
+ # Optional: Consolidate native bindings for reference
+ - bash: |
+ set -e
+ echo "Creating bindings directory structure..."
+ mkdir -p $(ob_outputDirectory)/bindings
+
+ echo "Searching for bindings directories..."
+ find $(Pipeline.Workspace)/all-artifacts -type d -name "bindings" | while read dir; do
+ echo "Found bindings in: $dir"
+ cp -rv "$dir"/* $(ob_outputDirectory)/bindings/ 2>/dev/null || true
+ done
+
+ echo "Bindings consolidation complete!"
+ echo "Bindings structure:"
+ find $(ob_outputDirectory)/bindings -type f | head -20
+ displayName: 'Consolidate native bindings (optional)'
+ continueOnError: true
+
+ # Optional: Consolidate Windows symbols
+ - bash: |
+ set -e
+ echo "Searching for symbols directories..."
+ if find $(Pipeline.Workspace)/all-artifacts -type d -name "symbols" | grep -q .; then
+ echo "Copying Windows symbols..."
+ mkdir -p $(ob_outputDirectory)/symbols
+ find $(Pipeline.Workspace)/all-artifacts -type d -name "symbols" | while read dir; do
+ echo "Found symbols in: $dir"
+ cp -rv "$dir"/* $(ob_outputDirectory)/symbols/ 2>/dev/null || true
+ done
+ echo "Symbols consolidation complete!"
+ else
+ echo "No Windows symbols found (expected for NonOfficial builds)"
+ fi
+ displayName: 'Consolidate Windows symbols (optional)'
+ continueOnError: true
+
+ # Verify consolidation
+ - bash: |
+ echo "=========================================="
+ echo "Consolidation Summary"
+ echo "=========================================="
+ echo ""
+ echo "Wheels in dist/:"
+ ls -lh $(ob_outputDirectory)/dist/*.whl || echo "No wheels found!"
+ echo ""
+ echo "Total wheels: $(ls -1 $(ob_outputDirectory)/dist/*.whl 2>/dev/null | wc -l)"
+ echo ""
+ if [ -d "$(ob_outputDirectory)/bindings" ]; then
+ echo "Bindings directory:"
+ find $(ob_outputDirectory)/bindings -type f | head -20
+ fi
+ echo ""
+ echo "=========================================="
+ displayName: 'Verify consolidation'
+
+ # Publish consolidated artifacts
+ - task: PublishPipelineArtifact@1
+ displayName: 'Publish Consolidated Artifacts'
+ inputs:
+ targetPath: '$(ob_outputDirectory)'
+ artifact: 'drop_Consolidate_ConsolidateArtifacts'
+ publishLocation: 'pipeline'
diff --git a/OneBranchPipelines/official-release-pipeline.yml b/OneBranchPipelines/official-release-pipeline.yml
new file mode 100644
index 000000000..a459dabc5
--- /dev/null
+++ b/OneBranchPipelines/official-release-pipeline.yml
@@ -0,0 +1,294 @@
+# OneBranch Official Release Pipeline for mssql-python
+# Downloads wheel and symbol artifacts from build pipeline, publishes symbols, and releases wheels to PyPI via ESRP
+# This pipeline is ALWAYS Official - no NonOfficial option
+
+name: $(Year:YY)$(DayOfYear)$(Rev:.r)-Release
+
+# Manual trigger only - releases should be deliberate
+trigger: none
+pr: none
+
+# Parameters for release pipeline
+parameters:
+ - name: publishSymbols
+ displayName: 'Publish Symbols to Symbol Servers'
+ type: boolean
+ default: true
+
+ - name: releaseToPyPI
+ displayName: 'Release to PyPI (Production)'
+ type: boolean
+ default: false # Safety: Default to false to prevent accidental releases
+
+# Variables
+variables:
+ # Common variables
+ - template: /OneBranchPipelines/variables/common-variables.yml@self
+ - template: /OneBranchPipelines/variables/onebranch-variables.yml@self
+
+ # Variable groups
+ - group: 'ESRP Federated Creds (AME)' # Contains ESRP signing credentials
+ - group: 'Symbols Publishing' # Contains SymbolServer, SymbolTokenUri variables
+
+# OneBranch resources
+resources:
+ repositories:
+ - repository: templates
+ type: git
+ name: 'OneBranch.Pipelines/GovernedTemplates'
+ ref: 'refs/heads/main'
+
+ # Reference to the build pipeline
+ pipelines:
+ - pipeline: buildPipeline
+ source: 'Build-Release-Package-Pipeline' # Name of the build pipeline
+ trigger: none # Manual trigger only
+
+# Extend OneBranch official template
+# Always uses Official template for release pipeline
+extends:
+ template: 'v2/OneBranch.Official.CrossPlat.yml@templates'
+
+ parameters:
+ # Feature flags
+ featureFlags:
+ WindowsHostVersion:
+ Version: '2022'
+
+ # Global SDL Configuration
+ globalSdl:
+ # Global Guardian baseline and suppression files
+ baseline:
+ baselineFile: $(Build.SourcesDirectory)\.gdn\.gdnbaselines
+ suppressionSet: default
+ suppression:
+ suppressionFile: $(Build.SourcesDirectory)\.gdn\.gdnsuppress
+ suppressionSet: default
+
+ # Minimal SDL for release pipeline - artifacts already scanned during build
+ binskim:
+ enabled: true
+ break: true
+
+ credscan:
+ enabled: true
+
+ policheck:
+ enabled: true
+ break: true
+ exclusionFile: '$(REPO_ROOT)/.config/PolicheckExclusions.xml'
+
+ # Publish SDL logs
+ publishLogs:
+ enabled: true
+
+ # TSA - Always enabled for Official release pipeline
+ tsa:
+ enabled: true
+ configFile: '$(REPO_ROOT)/.config/tsaoptions.json'
+
+ # Pipeline stages
+ stages:
+ - stage: ReleasePackages
+ displayName: 'Release Python Packages to PyPI'
+
+ jobs:
+ - job: DownloadAndRelease
+ displayName: 'Download Artifacts and Release via ESRP'
+
+ pool:
+ type: windows
+ isCustom: true
+ name: Django-1ES-pool
+ vmImage: WIN22-SQL22
+
+ variables:
+ ob_outputDirectory: '$(Build.ArtifactStagingDirectory)'
+
+ steps:
+ # Step 1: Download consolidated artifacts from build pipeline
+ - task: DownloadPipelineArtifact@2
+ displayName: 'Download Consolidated Artifacts from Build Pipeline'
+ inputs:
+ buildType: 'specific'
+ project: '$(System.TeamProject)'
+ definition: 2199 # Build-Release-Package-Pipeline definition ID
+ buildVersionToDownload: 'specific'
+ buildId: $(resources.pipeline.buildPipeline.runID) # Use the build run selected in UI
+ artifactName: 'drop_Consolidate_ConsolidateArtifacts' # Consolidated artifact with dist/ and symbols/
+ targetPath: '$(Build.SourcesDirectory)/artifacts'
+
+ # Step 3: List downloaded artifacts for verification
+ - task: PowerShell@2
+ displayName: 'List Downloaded Wheel and Symbol Files'
+ inputs:
+ targetType: 'inline'
+ script: |
+ Write-Host "====================================="
+ Write-Host "Downloaded Artifacts:"
+ Write-Host "====================================="
+
+ # List wheel files
+ $wheelsPath = "$(Build.SourcesDirectory)/artifacts/dist"
+ if (Test-Path $wheelsPath) {
+ $wheels = Get-ChildItem -Path $wheelsPath -Filter "*.whl" -Recurse
+
+ Write-Host "`n[WHEELS] Total wheel files found: $($wheels.Count)"
+ foreach ($wheel in $wheels) {
+ $size = [math]::Round($wheel.Length / 1MB, 2)
+ Write-Host " - $($wheel.Name) (${size} MB)"
+ }
+
+ # Copy wheels to dist folder for ESRP
+ Write-Host "`nCopying wheels to $(Build.SourcesDirectory)/dist..."
+ New-Item -ItemType Directory -Force -Path "$(Build.SourcesDirectory)/dist" | Out-Null
+ Copy-Item -Path "$wheelsPath/*.whl" -Destination "$(Build.SourcesDirectory)/dist/" -Force
+
+ } else {
+ Write-Error "Wheel directory not found at: $wheelsPath"
+ exit 1
+ }
+
+ # List symbol files
+ $symbolsPath = "$(Build.SourcesDirectory)/artifacts/symbols"
+ if (Test-Path $symbolsPath) {
+ $symbols = Get-ChildItem -Path $symbolsPath -Filter "*.pdb" -Recurse
+
+ Write-Host "`n[SYMBOLS] Total PDB files found: $($symbols.Count)"
+ foreach ($symbol in $symbols) {
+ $size = [math]::Round($symbol.Length / 1KB, 2)
+ Write-Host " - $($symbol.Name) (${size} KB)"
+ }
+
+ # Copy symbols to symbols folder for publishing
+ Write-Host "`nCopying symbols to $(Build.SourcesDirectory)/symbols..."
+ New-Item -ItemType Directory -Force -Path "$(Build.SourcesDirectory)/symbols" | Out-Null
+ Copy-Item -Path "$symbolsPath/*.pdb" -Destination "$(Build.SourcesDirectory)/symbols/" -Force
+
+ } else {
+ Write-Warning "Symbol directory not found at: $symbolsPath"
+ Write-Warning "Symbol publishing will be skipped if no PDB files found"
+ }
+
+ Write-Host "`n====================================="
+ Write-Host "Summary:"
+ Write-Host "Wheels: $($wheels.Count) files"
+ Write-Host "Symbols: $(if ($symbols) { $symbols.Count } else { 0 }) files"
+ Write-Host "====================================="
+
+ # Step 4: Verify wheel integrity
+ - task: PowerShell@2
+ displayName: 'Verify Wheel Integrity'
+ inputs:
+ targetType: 'inline'
+ script: |
+ Write-Host "Verifying wheel file integrity..."
+
+ $wheels = Get-ChildItem -Path "$(Build.SourcesDirectory)/dist" -Filter "*.whl"
+ $allValid = $true
+
+ foreach ($wheel in $wheels) {
+ # Check if wheel is a valid ZIP file
+ try {
+ Add-Type -AssemblyName System.IO.Compression.FileSystem
+ $zip = [System.IO.Compression.ZipFile]::OpenRead($wheel.FullName)
+ $entryCount = $zip.Entries.Count
+ $zip.Dispose()
+
+ Write-Host "✓ $($wheel.Name) - Valid ($entryCount entries)"
+ }
+ catch {
+ Write-Error "✗ $($wheel.Name) - INVALID: $_"
+ $allValid = $false
+ }
+ }
+
+ if (-not $allValid) {
+ Write-Error "One or more wheel files are corrupted"
+ exit 1
+ }
+
+ Write-Host "`nAll wheels verified successfully!"
+
+ # Step 5: Publish Symbols (if enabled and symbols exist)
+ - ${{ if eq(parameters.publishSymbols, true) }}:
+ - template: /OneBranchPipelines/steps/symbol-publishing-step.yml@self
+ parameters:
+ SymbolsFolder: '$(Build.SourcesDirectory)/symbols'
+
+ # Step 6: Copy wheels to ob_outputDirectory for OneBranch artifact publishing
+ - task: CopyFiles@2
+ displayName: 'Stage Wheels for Release'
+ inputs:
+ SourceFolder: '$(Build.SourcesDirectory)/dist'
+ Contents: '*.whl'
+ TargetFolder: '$(ob_outputDirectory)/release'
+ flattenFolders: true
+
+ # Step 7: ESRP Release Task (only if releaseToPyPI is true)
+ - ${{ if eq(parameters.releaseToPyPI, true) }}:
+ - task: EsrpRelease@9
+ displayName: 'ESRP Release to PyPI'
+ inputs:
+ connectedservicename: '$(ESRPConnectedServiceName)'
+ usemanagedidentity: true
+ keyvaultname: '$(AuthAKVName)'
+ signcertname: '$(AuthSignCertName)'
+ clientid: '$(EsrpClientId)'
+ Intent: 'PackageDistribution'
+ ContentType: 'PyPI'
+ ContentSource: 'Folder'
+ FolderLocation: '$(Build.SourcesDirectory)/dist'
+ WaitForReleaseCompletion: true
+ Owners: '$(owner)'
+ Approvers: '$(approver)'
+ ServiceEndpointUrl: 'https://api.esrp.microsoft.com'
+ MainPublisher: 'ESRPRELPACMAN'
+ DomainTenantId: '$(DomainTenantId)'
+
+ # Step 8: Show release status
+ - ${{ if eq(parameters.releaseToPyPI, true) }}:
+ - task: PowerShell@2
+ displayName: 'Release Summary'
+ inputs:
+ targetType: 'inline'
+ script: |
+ Write-Host "===================================="
+ Write-Host "ESRP Release Completed"
+ Write-Host "===================================="
+ Write-Host "Package: mssql-python"
+ Write-Host "Target: PyPI"
+ Write-Host "Owners: $(owner)"
+ Write-Host "Approvers: $(approver)"
+ Write-Host "Symbols Published: ${{ parameters.publishSymbols }}"
+ Write-Host "====================================="
+ Write-Host ""
+ Write-Host "Next steps:"
+ Write-Host "1. Verify release in ESRP portal"
+ Write-Host "2. Wait for approval workflow completion"
+ Write-Host "3. Verify package on PyPI: https://pypi.org/project/mssql-python/"
+ Write-Host "4. Verify symbols in SqlClientDrivers org (if published)"
+ Write-Host "====================================="
+
+ - ${{ if eq(parameters.releaseToPyPI, false) }}:
+ - task: PowerShell@2
+ displayName: 'Dry Run - Release Skipped'
+ inputs:
+ targetType: 'inline'
+ script: |
+ Write-Host "===================================="
+ Write-Host "DRY RUN MODE - No Release Performed"
+ Write-Host "===================================="
+ Write-Host "Package: mssql-python"
+ Write-Host ""
+ Write-Host "Actions performed:"
+ Write-Host "- Downloaded wheels from build pipeline"
+ Write-Host "- Downloaded symbols from build pipeline"
+ if ("${{ parameters.publishSymbols }}" -eq "True") {
+ Write-Host "- Published symbols to SqlClientDrivers org"
+ }
+ Write-Host ""
+ Write-Host "To perform actual release:"
+ Write-Host "1. Set 'releaseToPyPI' parameter to true"
+ Write-Host "2. Re-run pipeline"
+ Write-Host "====================================="
diff --git a/OneBranchPipelines/stages/build-linux-single-stage.yml b/OneBranchPipelines/stages/build-linux-single-stage.yml
new file mode 100644
index 000000000..6b68a737b
--- /dev/null
+++ b/OneBranchPipelines/stages/build-linux-single-stage.yml
@@ -0,0 +1,413 @@
+# Linux Single Configuration Stage Template
+# Builds Python wheels for a specific Linux distribution and architecture
+# Builds for Python 3.10, 3.11, 3.12, 3.13, 3.14 within single job
+# Tests each wheel after building with isolated pytest execution
+parameters:
+ # Stage identifier (e.g., 'Linux_manylinux_x86_64')
+ - name: stageName
+ type: string
+ # Job identifier within the stage
+ - name: jobName
+ type: string
+ default: 'BuildWheels'
+ # Linux distribution type: 'manylinux' (glibc-based) or 'musllinux' (musl libc-based)
+ - name: linuxTag
+ type: string
+ # CPU architecture: 'x86_64' (AMD64) or 'aarch64' (ARM64)
+ - name: arch
+ type: string
+ # Docker platform for QEMU emulation: 'linux/amd64' or 'linux/arm64'
+ - name: dockerPlatform
+ type: string
+ # OneBranch build type: 'Official' (production) or 'NonOfficial' (dev/test)
+ - name: oneBranchType
+ type: string
+ default: 'Official'
+
+stages:
+ - stage: ${{ parameters.stageName }}
+ displayName: 'Linux ${{ parameters.linuxTag }} ${{ parameters.arch }}'
+ jobs:
+ - job: ${{ parameters.jobName }}
+ displayName: 'Build Wheels - ${{ parameters.linuxTag }} ${{ parameters.arch }}'
+
+ # Use custom 1ES pool with Ubuntu 22.04 + SQL Server 2022 pre-installed
+ pool:
+ type: linux
+ isCustom: true
+ name: Django-1ES-pool
+ demands:
+ - imageOverride -equals ADO-UB22-SQL22
+ # Extended timeout for multi-version builds + testing (5 Python versions × build + test time)
+ timeoutInMinutes: 120
+
+ variables:
+ # Disable BinSkim for Linux - requires ICU libraries not available in manylinux/musllinux containers
+ - name: ob_sdl_binskim_enabled
+ value: false
+ # OneBranch output directory for artifacts (wheels, bindings, symbols)
+ - name: ob_outputDirectory
+ value: '$(Build.ArtifactStagingDirectory)'
+ # OneBranch-required variable (unused in this template)
+ - name: LinuxContainerImage
+ value: 'onebranch.azurecr.io/linux/ubuntu-2204:latest'
+ # Distribution type passed to container selection logic
+ - name: LINUX_TAG
+ value: ${{ parameters.linuxTag }}
+ # Architecture passed to container selection and file naming
+ - name: ARCH
+ value: ${{ parameters.arch }}
+ # Docker platform for QEMU-based cross-compilation
+ - name: DOCKER_PLATFORM
+ value: ${{ parameters.dockerPlatform }}
+
+ steps:
+ - checkout: self
+ fetchDepth: 0
+
+ # Install Docker
+ - task: DockerInstaller@0
+ inputs:
+ dockerVersion: '20.10.21'
+ displayName: 'Install Docker'
+
+ - bash: |
+ set -e
+ echo "Verifying we're on Linux..."
+ if [[ "$(uname -s)" != "Linux" ]]; then
+ echo "ERROR: This job requires a Linux agent but got: $(uname -s)"
+ echo "Agent info: $(uname -a)"
+ exit 1
+ fi
+
+ uname -a
+
+ # Start dockerd
+ sudo dockerd > docker.log 2>&1 &
+ sleep 10
+
+ # Verify Docker works
+ docker --version
+ docker info
+ displayName: 'Setup and start Docker daemon'
+
+ - script: |
+ docker run --rm --privileged tonistiigi/binfmt --install all
+ displayName: 'Enable QEMU (for aarch64)'
+
+ - script: |
+ rm -rf $(ob_outputDirectory)/dist $(ob_outputDirectory)/bindings
+ mkdir -p $(ob_outputDirectory)/dist
+ mkdir -p $(ob_outputDirectory)/bindings/$(LINUX_TAG)-$(ARCH)
+ displayName: 'Prepare artifact directories'
+
+ - script: |
+ # Determine image based on LINUX_TAG and ARCH
+ if [[ "$(LINUX_TAG)" == "musllinux" ]]; then
+ IMAGE="quay.io/pypa/musllinux_1_2_$(ARCH)"
+ else
+ IMAGE="quay.io/pypa/manylinux_2_28_$(ARCH)"
+ fi
+
+ docker run -d --name build-$(LINUX_TAG)-$(ARCH) \
+ --platform $(DOCKER_PLATFORM) \
+ -v $(Build.SourcesDirectory):/workspace \
+ -w /workspace \
+ $IMAGE \
+ tail -f /dev/null
+ displayName: 'Start $(LINUX_TAG) $(ARCH) container'
+
+ - script: |
+ set -euxo pipefail
+ export PATH=$PATH:`pwd`/docker
+ if [[ "$(LINUX_TAG)" == "manylinux" ]]; then
+ docker exec build-$(LINUX_TAG)-$(ARCH) bash -lc '
+ set -euxo pipefail
+ if command -v dnf >/dev/null 2>&1; then
+ dnf -y update || true
+ dnf -y install gcc gcc-c++ make cmake unixODBC-devel krb5-libs keyutils-libs ccache || true
+ elif command -v yum >/dev/null 2>&1; then
+ yum -y update || true
+ yum -y install gcc gcc-c++ make cmake unixODBC-devel krb5-libs keyutils-libs ccache || true
+ fi
+ gcc --version || true
+ cmake --version || true
+ '
+ else
+ docker exec build-$(LINUX_TAG)-$(ARCH) sh -lc '
+ set -euxo pipefail
+ apk update || true
+ apk add --no-cache bash build-base cmake unixodbc-dev krb5-libs keyutils-libs ccache || true
+ gcc --version || true
+ cmake --version || true
+ '
+ fi
+ displayName: 'Install system build dependencies'
+
+ # Start SQL Server container for pytest execution
+ # Runs on host (not in build container) to be accessible from build container via network
+ - script: |
+ set -euxo pipefail
+
+ echo "Starting SQL Server 2022 container for testing..."
+ docker run -d --name sqlserver-$(LINUX_TAG)-$(ARCH) \
+ --platform linux/amd64 \
+ -e ACCEPT_EULA=Y \
+ -e MSSQL_SA_PASSWORD="$(DB_PASSWORD)" \
+ -p 1433:1433 \
+ mcr.microsoft.com/mssql/server:2022-latest
+
+ echo "Waiting for SQL Server to be ready..."
+ for i in {1..30}; do
+ if docker exec sqlserver-$(LINUX_TAG)-$(ARCH) /opt/mssql-tools18/bin/sqlcmd \
+ -S localhost -U SA -P "$(DB_PASSWORD)" -C -Q "SELECT 1" >/dev/null 2>&1; then
+ echo "✓ SQL Server is ready!"
+ break
+ fi
+ sleep 2
+ done
+
+ # Get SQL Server container IP for build container to connect
+ SQL_IP=$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' sqlserver-$(LINUX_TAG)-$(ARCH))
+ echo "SQL Server IP: $SQL_IP"
+ echo "##vso[task.setvariable variable=SQL_IP]$SQL_IP"
+ displayName: 'Start SQL Server container for testing'
+ env:
+ DB_PASSWORD: $(DB_PASSWORD)
+
+ # Build wheels for all Python versions (3.10-3.14) and test each one
+ - script: |
+ set -euxo pipefail
+ if [[ "$(LINUX_TAG)" == "manylinux" ]]; then SHELL_EXE=bash; else SHELL_EXE=sh; fi
+ docker exec build-$(LINUX_TAG)-$(ARCH) $SHELL_EXE -lc 'mkdir -p /workspace/dist'
+
+ # Loop through all Python versions: build wheel -> test wheel -> repeat
+ for PYBIN in cp310 cp311 cp312 cp313 cp314; do
+ echo ""
+ echo "====================================================="
+ echo "Building and testing $PYBIN on $(LINUX_TAG)/$(ARCH)"
+ echo "====================================================="
+
+ if [[ "$(LINUX_TAG)" == "manylinux" ]]; then
+ # Manylinux (glibc-based) - use bash
+ docker exec -e PYBIN=$PYBIN -e SQL_IP=$(SQL_IP) -e DB_PASSWORD="$(DB_PASSWORD)" build-$(LINUX_TAG)-$(ARCH) bash -lc '
+ set -euxo pipefail;
+
+ # Step 1: Setup Python environment
+ PY=/opt/python/${PYBIN}-${PYBIN}/bin/python;
+ test -x $PY || { echo "Python $PY missing - skipping"; exit 0; };
+ ln -sf $PY /usr/local/bin/python;
+ echo "Using: $(python --version)";
+
+ # Step 2: Install build dependencies
+ python -m pip install -q -U pip setuptools wheel pybind11;
+
+ # Step 3: Build native extension (.so)
+ echo "Building native extension...";
+ cd /workspace/mssql_python/pybind;
+ bash build.sh;
+
+ # Step 4: Build wheel
+ echo "Building wheel package...";
+ cd /workspace;
+ python setup.py bdist_wheel;
+
+ # Step 5: Install wheel in isolated directory for testing
+ echo "Installing wheel in isolated test environment...";
+ TEST_DIR="/test_isolated_${PYBIN}";
+ rm -rf $TEST_DIR;
+ mkdir -p $TEST_DIR;
+ cd $TEST_DIR;
+
+ # Find and install the wheel for this Python version
+ WHEEL=$(ls /workspace/dist/*${PYBIN}*.whl | head -1);
+ if [ -z "$WHEEL" ]; then
+ echo "ERROR: No wheel found for ${PYBIN}";
+ exit 1;
+ fi;
+ echo "Installing: $WHEEL";
+ $PY -m pip install -q "$WHEEL";
+
+ # Step 6: Verify package imports correctly
+ echo "Verifying package installation...";
+ $PY -c import\ mssql_python;
+
+ # Step 7: Setup test environment
+ echo "Setting up test environment...";
+ $PY -m pip install -q pytest;
+ cp -r /workspace/tests $TEST_DIR/ || echo "WARNING: No tests directory";
+ cp /workspace/pytest.ini $TEST_DIR/ || echo "WARNING: No pytest.ini";
+ cp /workspace/requirements.txt $TEST_DIR/ || true;
+ $PY -m pip install -q -r $TEST_DIR/requirements.txt || true;
+
+ # Step 8: Run pytest (stops on first failure)
+ if [ -d $TEST_DIR/tests ]; then
+ echo "Running pytest for ${PYBIN}...";
+ DB_CONNECTION_STRING="Server=$SQL_IP;Database=master;Uid=SA;Pwd=$DB_PASSWORD;TrustServerCertificate=yes" \
+ $PY -m pytest $TEST_DIR/tests -v --maxfail=1 || {
+ echo "ERROR: Tests failed for ${PYBIN}";
+ exit 1;
+ };
+ echo "✓ All tests passed for ${PYBIN}";
+ else
+ echo "WARNING: No tests found, skipping pytest";
+ fi;
+ '
+ else
+ # Musllinux (musl libc-based) - use sh
+ docker exec -e PYBIN=$PYBIN -e SQL_IP=$(SQL_IP) -e DB_PASSWORD="$(DB_PASSWORD)" build-$(LINUX_TAG)-$(ARCH) sh -lc '
+ set -euxo pipefail;
+
+ # Step 1: Setup Python environment
+ PY=/opt/python/${PYBIN}-${PYBIN}/bin/python;
+ test -x $PY || { echo "Python $PY missing - skipping"; exit 0; };
+ ln -sf $PY /usr/local/bin/python;
+ echo "Using: $(python --version)";
+
+ # Step 2: Install build dependencies
+ python -m pip install -q -U pip setuptools wheel pybind11;
+
+ # Step 3: Build native extension (.so)
+ echo "Building native extension...";
+ cd /workspace/mssql_python/pybind;
+ bash build.sh;
+
+ # Step 4: Build wheel
+ echo "Building wheel package...";
+ cd /workspace;
+ python setup.py bdist_wheel;
+
+ # Step 5: Install wheel in isolated directory for testing
+ echo "Installing wheel in isolated test environment...";
+ TEST_DIR="/test_isolated_${PYBIN}";
+ rm -rf $TEST_DIR;
+ mkdir -p $TEST_DIR;
+ cd $TEST_DIR;
+
+ # Find and install the wheel for this Python version
+ WHEEL=$(ls /workspace/dist/*${PYBIN}*.whl | head -1);
+ if [ -z "$WHEEL" ]; then
+ echo "ERROR: No wheel found for ${PYBIN}";
+ exit 1;
+ fi;
+ echo "Installing: $WHEEL";
+ $PY -m pip install -q "$WHEEL";
+
+ # Step 6: Verify package imports correctly
+ echo "Verifying package installation...";
+ $PY -c import\ mssql_python;
+
+ # Step 7: Setup test environment
+ echo "Setting up test environment...";
+ $PY -m pip install -q pytest;
+ cp -r /workspace/tests $TEST_DIR/ || echo "WARNING: No tests directory";
+ cp /workspace/pytest.ini $TEST_DIR/ || echo "WARNING: No pytest.ini";
+ cp /workspace/requirements.txt $TEST_DIR/ || true;
+ $PY -m pip install -q -r $TEST_DIR/requirements.txt || true;
+
+ # Step 8: Run pytest (stops on first failure)
+ if [ -d $TEST_DIR/tests ]; then
+ echo "Running pytest for ${PYBIN}...";
+ DB_CONNECTION_STRING="Server=$SQL_IP;Database=master;Uid=SA;Pwd=$DB_PASSWORD;TrustServerCertificate=yes" \
+ $PY -m pytest $TEST_DIR/tests -v --maxfail=1 || {
+ echo "ERROR: Tests failed for ${PYBIN}";
+ exit 1;
+ };
+ echo "✓ All tests passed for ${PYBIN}";
+ else
+ echo "WARNING: No tests found, skipping pytest";
+ fi;
+ '
+ fi
+
+ echo "✓ Build and test complete for $PYBIN"
+ done
+
+ echo ""
+ echo "====================================================="
+ echo "✓ All Python versions built and tested successfully!"
+ echo "====================================================="
+ displayName: 'Build and test wheels for Python 3.10-3.14'
+ env:
+ DB_PASSWORD: $(DB_PASSWORD)
+
+ # Copy built artifacts from container to host for publishing
+ - script: |
+ set -euxo pipefail
+
+ # Copy all wheels (5 Python versions) to output directory
+ echo "Copying wheels to host..."
+ docker cp build-$(LINUX_TAG)-$(ARCH):/workspace/dist/. "$(ob_outputDirectory)/wheels/" || echo "No wheels found"
+
+ # Copy native .so bindings for artifact archival
+ echo "Copying .so bindings to host..."
+ mkdir -p "$(ob_outputDirectory)/bindings/$(LINUX_TAG)-$(ARCH)"
+ docker exec build-$(LINUX_TAG)-$(ARCH) $([[ "$(LINUX_TAG)" == "manylinux" ]] && echo bash -lc || echo sh -lc) '
+ OUT="/tmp/ddbc-out";
+ rm -rf "$OUT"; mkdir -p "$OUT";
+ find /workspace/mssql_python -maxdepth 1 -type f -name "*.so" -exec cp -v {} "$OUT"/ \; || true
+ '
+
+ docker cp "build-$(LINUX_TAG)-$(ARCH):/tmp/ddbc-out/." \
+ "$(ob_outputDirectory)/bindings/$(LINUX_TAG)-$(ARCH)/" || echo "No .so files found"
+
+ echo "✓ Artifacts copied successfully"
+ displayName: 'Copy artifacts to host'
+
+ # Cleanup: Stop and remove Docker containers
+ - script: |
+ echo "Stopping and removing containers..."
+ docker stop build-$(LINUX_TAG)-$(ARCH) sqlserver-$(LINUX_TAG)-$(ARCH) || true
+ docker rm build-$(LINUX_TAG)-$(ARCH) sqlserver-$(LINUX_TAG)-$(ARCH) || true
+ echo "✓ Containers cleaned up"
+ displayName: 'Cleanup containers'
+ condition: always() # Always run cleanup, even if build/test fails
+
+ # Publish artifacts to Azure Pipelines for downstream consumption
+ # OneBranch requires specific artifact naming: drop__
+ - task: PublishPipelineArtifact@1
+ displayName: 'Publish Linux Artifacts'
+ inputs:
+ targetPath: '$(ob_outputDirectory)'
+ artifact: 'drop_${{ parameters.stageName }}_${{ parameters.jobName }}'
+ publishLocation: 'pipeline'
+
+ # Security Scanning: Component Governance + OneBranch AntiMalware
+ # Scans wheels and binaries for known vulnerabilities and malware signatures
+ - template: ../steps/malware-scanning-step.yml@self
+ parameters:
+ scanPath: '$(ob_outputDirectory)'
+ artifactType: 'dll'
+
+ # ESRP Malware Scanning (Official Builds Only)
+ # ESRP = Microsoft's Enterprise Signing and Release Platform
+ # Scans wheel files for malware using Microsoft Defender and custom signatures
+ # Only runs for Official builds (production compliance requirement)
+ - ${{ if eq(parameters.oneBranchType, 'Official') }}:
+ - task: EsrpMalwareScanning@5
+ displayName: 'ESRP MalwareScanning - Python Wheels (Official)'
+ inputs:
+ ConnectedServiceName: '$(SigningEsrpConnectedServiceName)'
+ AppRegistrationClientId: '$(SigningAppRegistrationClientId)'
+ AppRegistrationTenantId: '$(SigningAppRegistrationTenantId)'
+ EsrpClientId: '$(SigningEsrpClientId)'
+ UseMSIAuthentication: true
+ FolderPath: '$(ob_outputDirectory)/wheels'
+ Pattern: '*.whl'
+ SessionTimeout: 60
+ CleanupTempStorage: 1
+ VerboseLogin: 1
+
+ # ESRP Code Signing (DISABLED - wheel files cannot be signed with SignTool)
+ # See compound-esrp-code-signing-step.yml for detailed explanation of why this doesn't work
+ # - ${{ if eq(parameters.oneBranchType, 'Official') }}:
+ # - template: /OneBranchPipelines/steps/compound-esrp-code-signing-step.yml@self
+ # parameters:
+ # appRegistrationClientId: '$(SigningAppRegistrationClientId)'
+ # appRegistrationTenantId: '$(SigningAppRegistrationTenantId)'
+ # artifactType: 'whl'
+ # authAkvName: '$(SigningAuthAkvName)'
+ # authSignCertName: '$(SigningAuthSignCertName)'
+ # esrpClientId: '$(SigningEsrpClientId)'
+ # esrpConnectedServiceName: '$(SigningEsrpConnectedServiceName)'
+ # signPath: '$(ob_outputDirectory)/wheels'
diff --git a/OneBranchPipelines/stages/build-macos-single-stage.yml b/OneBranchPipelines/stages/build-macos-single-stage.yml
new file mode 100644
index 000000000..71ccaf607
--- /dev/null
+++ b/OneBranchPipelines/stages/build-macos-single-stage.yml
@@ -0,0 +1,260 @@
+# macOS Single Configuration Stage Template
+# Builds Python wheel for a specific Python version (universal2 binary)
+# Universal2 = combined x86_64 + ARM64 binary in single .so file
+# Tests with Docker-based SQL Server (using Colima as Docker runtime)
+parameters:
+ # Stage identifier (e.g., 'MacOS_py312')
+ - name: stageName
+ type: string
+ # Job identifier within the stage
+ - name: jobName
+ type: string
+ default: 'BuildWheel'
+ # Python version in X.Y format (e.g., '3.12')
+ - name: pythonVersion
+ type: string
+ # Python version as 3-digit string for file naming (e.g., '312')
+ - name: shortPyVer
+ type: string
+ # OneBranch build type: 'Official' (production) or 'NonOfficial' (dev/test)
+ - name: oneBranchType
+ type: string
+ default: 'Official'
+
+stages:
+ - stage: ${{ parameters.stageName }}
+ displayName: 'macOS Py${{ parameters.pythonVersion }} Universal2'
+ jobs:
+ - job: ${{ parameters.jobName }}
+ displayName: 'Build Wheel - Py${{ parameters.pythonVersion }} Universal2'
+
+ # Pool Configuration
+ # macOS-14 image = macOS Sonoma with Xcode 15, Python 3.x toolchain
+ # type:linux is Azure Pipelines quirk (macOS pools declare as 'linux' type)
+ pool:
+ type: linux
+ isCustom: true
+ name: Azure Pipelines
+ vmImage: 'macOS-14'
+ # 120-minute timeout (universal2 builds take longer due to dual-architecture compilation)
+ timeoutInMinutes: 120
+
+ # Build Variables
+ variables:
+ # Disable BinSkim (Windows-focused binary analyzer) - macOS uses Mach-O format, not PE
+ - name: ob_sdl_binskim_enabled
+ value: false
+ # OneBranch artifact output directory
+ - name: ob_outputDirectory
+ value: '$(Build.ArtifactStagingDirectory)'
+ # Linux container image (unused in macOS builds, but required by OneBranch template)
+ - name: LinuxContainerImage
+ value: 'onebranch.azurecr.io/linux/ubuntu-2204:latest'
+ # Python version in X.Y format (e.g., '3.12')
+ - name: pythonVersion
+ value: ${{ parameters.pythonVersion }}
+ # Python version as 3-digit string (e.g., '312') for file naming
+ - name: shortPyVer
+ value: ${{ parameters.shortPyVer }}
+
+ steps:
+ # =========================
+ # SOURCE CODE CHECKOUT
+ # =========================
+ # fetchDepth: 0 = full git history (needed for version tagging)
+ - checkout: self
+ fetchDepth: 0
+
+ # =========================
+ # PYTHON INSTALLATION
+ # =========================
+ # UsePythonVersion@0 supports Python 3.10-3.14 on macOS
+ # No need for NuGet download like Windows (3.14 is in Azure Pipelines registry)
+ - task: UsePythonVersion@0
+ inputs:
+ versionSpec: '${{ parameters.pythonVersion }}'
+ addToPath: true
+ displayName: 'Use Python ${{ parameters.pythonVersion }} (Universal2)'
+ continueOnError: false
+
+ # =========================
+ # BUILD TOOLS
+ # =========================
+ # CMake = cross-platform build system generator (needed for C++ compilation)
+ # Uninstall first to ensure clean version (avoid conflicts with pre-installed CMake)
+ - script: |
+ brew update
+ brew uninstall cmake --ignore-dependencies || echo "CMake not installed"
+ brew install cmake
+ displayName: 'Install CMake'
+
+ # =========================
+ # PYTHON DEPENDENCIES
+ # =========================
+ # Install build dependencies:
+ # - requirements.txt: runtime dependencies (if any)
+ # - cmake: CMake Python wrapper
+ # - pybind11: C++/Python binding library (headers needed for compilation)
+ - script: |
+ python --version
+ python -m pip --version
+ python -m pip install --upgrade pip
+ python -m pip install -r requirements.txt
+ python -m pip install cmake pybind11
+ displayName: 'Install dependencies'
+
+ # =========================
+ # NATIVE EXTENSION BUILD
+ # =========================
+ # Build universal2 .so binary (x86_64 + ARM64 in single file)
+ # build.sh sets ARCHFLAGS="-arch x86_64 -arch arm64" for clang
+ # Output: mssql_python.cpython-3XX-darwin.so (Mach-O universal binary)
+ - script: |
+ echo "Python Version: ${{ parameters.pythonVersion }}"
+ echo "Building Universal2 Binary"
+ cd "$(Build.SourcesDirectory)/mssql_python/pybind"
+ ./build.sh
+ displayName: 'Build .so file'
+ continueOnError: false
+
+ # Copy native extension to artifact directory for later inspection
+ # .so file will be packaged into wheel in later step
+ - task: CopyFiles@2
+ inputs:
+ SourceFolder: '$(Build.SourcesDirectory)/mssql_python'
+ Contents: '*.so'
+ TargetFolder: '$(ob_outputDirectory)/bindings/macOS'
+ displayName: 'Copy .so files'
+
+ # Install Docker CLI and Colima (macOS Docker runtime)
+ # Colima = lightweight Docker Desktop alternative using macOS virtualization
+ # vz = native macOS virtualization (faster, only works on M1+)
+ # qemu = cross-platform emulator (slower, works on Intel Macs)
+ # 4 CPU cores + 8GB RAM needed for SQL Server container
+ - script: |
+ brew update
+ brew install docker colima
+ colima start --vm-type vz --cpu 4 --memory 8 || {
+ echo "vz VM failed, trying qemu..."
+ colima start --vm-type qemu --cpu 4 --memory 8
+ }
+ sleep 30
+ docker context use colima >/dev/null || true
+ docker version
+ displayName: 'Install and start Docker (Colima)'
+ timeoutInMinutes: 15
+
+ # =========================
+ # SQL SERVER CONTAINER
+ # =========================
+ # Start SQL Server 2022 Docker container for pytest execution
+ # macOS uses host networking (localhost:1433) vs Linux uses container IP
+ # Container runs in background (-d) and accepts connections on port 1433
+ - script: |
+ docker pull mcr.microsoft.com/mssql/server:2022-latest
+ docker run --name sqlserver \
+ -e ACCEPT_EULA=Y \
+ -e MSSQL_SA_PASSWORD="${DB_PASSWORD}" \
+ -p 1433:1433 -d \
+ mcr.microsoft.com/mssql/server:2022-latest
+
+ # Wait for SQL Server to accept connections (up to 60 seconds)
+ # sqlcmd -C flag = trust server certificate (for TLS connection)
+ for i in {1..30}; do
+ docker exec sqlserver /opt/mssql-tools18/bin/sqlcmd \
+ -S localhost -U SA -P "$DB_PASSWORD" -C -Q "SELECT 1" && break
+ sleep 2
+ done
+ displayName: 'Start SQL Server (Docker)'
+ env:
+ DB_PASSWORD: $(DB_PASSWORD)
+
+ # =========================
+ # TESTING
+ # =========================
+ # Run pytest against SQL Server container
+ # Tests use localhost:1433 connection (SA user with password from variable)
+ # -v = verbose output (show test names and results)
+ - script: |
+ python -m pytest -v
+ displayName: 'Run pytests'
+ env:
+ # Connection string uses localhost (SQL Server container exposed on port 1433)
+ # TrustServerCertificate=yes bypasses SSL cert validation (test env only)
+ DB_CONNECTION_STRING: 'Server=tcp:127.0.0.1,1433;Database=master;Uid=SA;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes'
+
+ # =========================
+ # WHEEL BUILD
+ # =========================
+ # Build wheel package from setup.py
+ # Wheel filename: mssql_python-X.Y.Z-cp3XX-cp3XX-macosx_XX_X_universal2.whl
+ # bdist_wheel = build binary wheel distribution (contains pre-compiled .so)
+ - script: |
+ python -m pip install --upgrade pip wheel setuptools
+ python setup.py bdist_wheel
+ displayName: 'Build wheel package'
+
+ # =========================
+ # ARTIFACT PUBLISHING
+ # =========================
+ # Copy wheel to OneBranch output directory
+ # dist/ = output directory from setup.py bdist_wheel
+ - task: CopyFiles@2
+ inputs:
+ SourceFolder: '$(Build.SourcesDirectory)/dist'
+ Contents: '*.whl'
+ TargetFolder: '$(ob_outputDirectory)/wheels'
+ displayName: 'Copy wheel files'
+
+ # Publish all artifacts (wheels + .so files) for Consolidate stage
+ # Artifact naming: drop__ (OneBranch requirement)
+ # Consolidate stage downloads this artifact via 'dependsOn' dependency
+ - task: PublishPipelineArtifact@1
+ displayName: 'Publish macOS Artifacts'
+ inputs:
+ targetPath: '$(ob_outputDirectory)'
+ artifact: 'drop_${{ parameters.stageName }}_${{ parameters.jobName }}'
+ publishLocation: 'pipeline'
+
+ # =========================
+ # SECURITY SCANNING
+ # =========================
+ # Component Governance + OneBranch AntiMalware scanning
+ # artifactType:'dll' is misnomer - scans all binary files (.so, .dylib, etc.)
+ - template: ../steps/malware-scanning-step.yml@self
+ parameters:
+ scanPath: '$(ob_outputDirectory)'
+ artifactType: 'dll'
+
+ # ESRP Malware Scanning (Official Builds Only)
+ # ESRP = Microsoft's Enterprise Signing and Release Platform
+ # Scans wheel files for malware using Microsoft Defender and custom signatures
+ # Only runs for Official builds (production compliance requirement)
+ - ${{ if eq(parameters.oneBranchType, 'Official') }}:
+ - task: EsrpMalwareScanning@5
+ displayName: 'ESRP MalwareScanning - Python Wheels (Official)'
+ inputs:
+ ConnectedServiceName: '$(SigningEsrpConnectedServiceName)'
+ AppRegistrationClientId: '$(SigningAppRegistrationClientId)'
+ AppRegistrationTenantId: '$(SigningAppRegistrationTenantId)'
+ EsrpClientId: '$(SigningEsrpClientId)'
+ UseMSIAuthentication: true
+ FolderPath: '$(ob_outputDirectory)/wheels'
+ Pattern: '*.whl' # Scan all wheel files
+ SessionTimeout: 60
+ CleanupTempStorage: 1
+ VerboseLogin: 1
+
+ # ESRP Code Signing (DISABLED - wheel files cannot be signed with SignTool)
+ # See compound-esrp-code-signing-step.yml for detailed explanation of why this doesn't work
+ # - ${{ if eq(parameters.oneBranchType, 'Official') }}:
+ # - template: /OneBranchPipelines/steps/compound-esrp-code-signing-step.yml@self
+ # parameters:
+ # appRegistrationClientId: '$(SigningAppRegistrationClientId)'
+ # appRegistrationTenantId: '$(SigningAppRegistrationTenantId)'
+ # artifactType: 'whl'
+ # authAkvName: '$(SigningAuthAkvName)'
+ # authSignCertName: '$(SigningAuthSignCertName)'
+ # esrpClientId: '$(SigningEsrpClientId)'
+ # esrpConnectedServiceName: '$(SigningEsrpConnectedServiceName)'
+ # signPath: '$(ob_outputDirectory)/wheels'
diff --git a/OneBranchPipelines/stages/build-windows-single-stage.yml b/OneBranchPipelines/stages/build-windows-single-stage.yml
new file mode 100644
index 000000000..b432f15ec
--- /dev/null
+++ b/OneBranchPipelines/stages/build-windows-single-stage.yml
@@ -0,0 +1,358 @@
+# Windows Single Configuration Stage Template
+# Builds Python wheel for a specific Python version and architecture
+# Supports both x64 (AMD64) and ARM64 cross-compilation
+# Tests x64 builds with pytest (ARM64 binaries can't run on x64 host)
+parameters:
+ # Stage identifier (e.g., 'Win_py312_x64')
+ - name: stageName
+ type: string
+ # Job identifier within the stage
+ - name: jobName
+ type: string
+ default: 'BuildWheel'
+ # Python version in X.Y format (e.g., '3.12')
+ - name: pythonVersion
+ type: string
+ # Python version as 3-digit string for file naming (e.g., '312')
+ - name: shortPyVer
+ type: string
+ # Target architecture: 'x64' (AMD64) or 'arm64' (ARM64)
+ - name: architecture
+ type: string
+ # OneBranch build type: 'Official' (production) or 'NonOfficial' (dev/test)
+ - name: oneBranchType
+ type: string
+ default: 'Official'
+ # Publish PDB symbols to symbol server (disabled by default, handled in release pipeline)
+ - name: publishSymbols
+ type: boolean
+ default: true
+
+stages:
+ - stage: ${{ parameters.stageName }}
+ displayName: 'Windows Py${{ parameters.pythonVersion }} ${{ parameters.architecture }}'
+ jobs:
+ - job: ${{ parameters.jobName }}
+ displayName: 'Build Wheel - Py${{ parameters.pythonVersion }} ${{ parameters.architecture }}'
+ # Use custom 1ES pool with Windows Server 2022 + SQL Server 2022 pre-installed
+ pool:
+ type: windows
+ isCustom: true
+ name: Django-1ES-pool
+ vmImage: WIN22-SQL22
+ # Extended timeout for downloads, builds, and testing
+ timeoutInMinutes: 120
+
+ variables:
+ # OneBranch output directory for artifacts (wheels, bindings, symbols)
+ ob_outputDirectory: '$(Build.ArtifactStagingDirectory)'
+ # OneBranch-required variable (unused in this template)
+ LinuxContainerImage: 'onebranch.azurecr.io/linux/ubuntu-2204:latest'
+ # Python version passed to build scripts
+ pythonVersion: ${{ parameters.pythonVersion }}
+ # Short Python version for file naming (e.g., '312')
+ shortPyVer: ${{ parameters.shortPyVer }}
+ # Target architecture (can differ from host for cross-compilation)
+ targetArch: ${{ parameters.architecture }}
+ # System access token for authenticated downloads (e.g., GitHub artifacts)
+ SYSTEM_ACCESSTOKEN: $(System.AccessToken)
+
+ steps:
+ - checkout: self
+ fetchDepth: 0
+
+ # Python 3.14 Installation: Download from NuGet (not yet in UsePythonVersion@0 task)
+ # Microsoft hasn't added Python 3.14 to the standard Python registry yet
+ - powershell: |
+ $pythonVer = "${{ parameters.pythonVersion }}"
+
+ if ($pythonVer -eq "3.14") {
+ Write-Host "Python 3.14 detected - downloading from NuGet..."
+
+ # Download Python 3.14 x64 from NuGet (stable release)
+ $nugetUrl = "https://www.nuget.org/api/v2/package/python/3.14.0"
+ $nugetFile = "$(Build.SourcesDirectory)\python-x64.nupkg"
+ $zipFile = "$(Build.SourcesDirectory)\python-x64.zip"
+ $extractPath = "C:\Python314-NuGet"
+
+ Write-Host "Downloading Python 3.14 x64 from: $nugetUrl"
+ Invoke-WebRequest -Uri $nugetUrl -OutFile $nugetFile -UseBasicParsing
+
+ Write-Host "Extracting NuGet package..."
+ Move-Item -Path $nugetFile -Destination $zipFile -Force
+ Expand-Archive -Path $zipFile -DestinationPath $extractPath -Force
+
+ # Python executable is in tools directory
+ $pythonDir = "$extractPath\tools"
+
+ Write-Host "Setting up Python at: $pythonDir"
+
+ # Create C:\Python314 for consistent paths
+ New-Item -ItemType Directory -Force -Path "C:\Python314" | Out-Null
+ Copy-Item -Path "$pythonDir\*" -Destination "C:\Python314" -Recurse -Force
+
+ Write-Host "`nVerifying Python installation:"
+ & "C:\Python314\python.exe" --version
+ & "C:\Python314\python.exe" -c "import sys; print('Python:', sys.executable)"
+
+ # Add to PATH
+ Write-Host "##vso[task.prependpath]C:\Python314"
+ Write-Host "##vso[task.prependpath]C:\Python314\Scripts"
+
+ # Cleanup
+ Remove-Item -Path $zipFile -Force -ErrorAction SilentlyContinue
+ Remove-Item -Path $nugetFile -Force -ErrorAction SilentlyContinue
+ }
+ condition: eq('${{ parameters.pythonVersion }}', '3.14')
+ displayName: 'Download and install Python 3.14 from NuGet'
+
+ # Python 3.10-3.13: Use standard Azure Pipelines task
+ # UsePythonVersion@0 supports these versions natively
+ - task: UsePythonVersion@0
+ inputs:
+ versionSpec: '${{ parameters.pythonVersion }}'
+ architecture: 'x64'
+ addToPath: true
+ condition: ne('${{ parameters.pythonVersion }}', '3.14')
+ displayName: 'Use Python ${{ parameters.pythonVersion }} (${{ parameters.architecture }})'
+ continueOnError: false
+
+ - powershell: |
+ Write-Host "Python version:"
+ python --version
+ Write-Host "Python location:"
+ python -c "import sys; print(sys.executable)"
+ Write-Host "Architecture:"
+ python -c "import platform; print(platform.machine())"
+ displayName: 'Verify Python installation'
+
+ - powershell: |
+ $ErrorActionPreference = "Stop"
+ Write-Host "Installing Python dependencies..."
+ python -m pip install --upgrade pip
+ python -m pip install setuptools wheel pybind11 pytest pyodbc
+ Write-Host "Dependencies installed successfully"
+ displayName: 'Install Python dependencies'
+
+ # Start SQL Server LocalDB for pytest execution
+ # LocalDB is a lightweight SQL Server instance pre-installed on WIN22-SQL22 agents
+ - powershell: |
+ sqllocaldb create MSSQLLocalDB
+ sqllocaldb start MSSQLLocalDB
+ displayName: 'Start LocalDB instance'
+
+ - powershell: |
+ sqlcmd -S "(localdb)\MSSQLLocalDB" -Q "CREATE DATABASE TestDB"
+ sqlcmd -S "(localdb)\MSSQLLocalDB" -Q "CREATE LOGIN testuser WITH PASSWORD = '$(DB_PASSWORD)'"
+ sqlcmd -S "(localdb)\MSSQLLocalDB" -d TestDB -Q "CREATE USER testuser FOR LOGIN testuser"
+ sqlcmd -S "(localdb)\MSSQLLocalDB" -d TestDB -Q "ALTER ROLE db_owner ADD MEMBER testuser"
+ displayName: 'Setup database and user'
+ env:
+ DB_PASSWORD: $(DB_PASSWORD)
+
+ # Download ARM64 Python libraries for cross-compilation (ARM64 builds only)
+ # ARM64 wheels must be built on x64 host using ARM64 python.lib
+ - powershell: |
+ # Download Python ARM64 from NuGet (contains libs directory with python.lib)
+ $pythonVer = "${{ parameters.pythonVersion }}"
+
+ # Map version to NuGet package version
+ $nugetVersion = switch ($pythonVer) {
+ "3.10" { "3.10.11" }
+ "3.11" { "3.11.9" }
+ "3.12" { "3.12.7" }
+ "3.13" { "3.13.0" }
+ "3.14" { "3.14.0" }
+ }
+
+ $nugetUrl = "https://www.nuget.org/api/v2/package/pythonarm64/$nugetVersion"
+ $nugetFile = "$(Build.SourcesDirectory)\pythonarm64.nupkg"
+ $zipFile = "$(Build.SourcesDirectory)\pythonarm64.zip"
+ $extractPath = "$(Build.SourcesDirectory)\pythonarm64-nuget"
+ $destPath = "$(Build.SourcesDirectory)\mssql_python\pybind\python_libs\arm64"
+
+ Write-Host "Downloading Python $pythonVer ARM64 NuGet package from: $nugetUrl"
+ Invoke-WebRequest -Uri $nugetUrl -OutFile $nugetFile -UseBasicParsing
+
+ Write-Host "Renaming .nupkg to .zip for extraction..."
+ Move-Item -Path $nugetFile -Destination $zipFile -Force
+
+ Write-Host "Extracting NuGet package..."
+ Expand-Archive -Path $zipFile -DestinationPath $extractPath -Force
+
+ Write-Host "`nSearching for libs directory..."
+ $libsDir = Get-ChildItem -Path $extractPath -Recurse -Directory -Filter "libs" | Select-Object -First 1
+
+ if ($libsDir) {
+ Write-Host "Found libs at: $($libsDir.FullName)"
+ New-Item -ItemType Directory -Force -Path $destPath | Out-Null
+ Copy-Item -Path "$($libsDir.FullName)\*" -Destination $destPath -Recurse -Force
+ Write-Host "✓ Copied .lib files from NuGet package"
+ } else {
+ Write-Host "libs directory not found, searching for .lib files..."
+ $libFiles = Get-ChildItem -Path $extractPath -Recurse -Filter "*.lib"
+ New-Item -ItemType Directory -Force -Path $destPath | Out-Null
+ foreach ($lib in $libFiles) {
+ Write-Host " Copying $($lib.Name)"
+ Copy-Item -Path $lib.FullName -Destination $destPath -Force
+ }
+ }
+
+ Write-Host "`nContents of $destPath :"
+ Get-ChildItem $destPath | ForEach-Object { Write-Host " - $($_.Name)" }
+
+ $expectedLib = "python$($pythonVer.Replace('.', '')).lib"
+ if (Test-Path "$destPath\$expectedLib") {
+ Write-Host "`n✓ $expectedLib found"
+ } else {
+ Write-Error "$expectedLib not found in NuGet package!"
+ exit 1
+ }
+
+ # Cleanup
+ Remove-Item -Path $zipFile -Force -ErrorAction SilentlyContinue
+ Remove-Item -Path $extractPath -Recurse -Force -ErrorAction SilentlyContinue
+ condition: eq(variables['targetArch'], 'arm64')
+ displayName: 'Download Python ARM64 libs from NuGet'
+
+ # Build native Python extension (.pyd) using MSVC and CMake
+ # For ARM64: Uses CUSTOM_PYTHON_LIB_DIR to link against ARM64 python.lib
+ - script: |
+ echo "Python Version: $(pythonVersion)"
+ echo "Short Tag: $(shortPyVer)"
+ echo "Architecture: Host=$(architecture), Target=$(targetArch)"
+
+ cd "$(Build.SourcesDirectory)\mssql_python\pybind"
+
+ REM Override lib path for ARM64
+ if "$(targetArch)"=="arm64" (
+ echo Using arm64-specific Python library...
+ set CUSTOM_PYTHON_LIB_DIR=$(Build.SourcesDirectory)\mssql_python\pybind\python_libs\arm64
+ )
+
+ call build.bat $(targetArch)
+ call keep_single_arch.bat $(targetArch)
+
+ cd ..\..
+ displayName: 'Build PYD for $(targetArch)'
+ continueOnError: false
+
+ # =========================
+ # TESTING
+ # =========================
+ # Run pytest to validate bindings (x64 only)
+ # ARM64 binaries cannot execute on x64 host, so tests are skipped
+ - powershell: |
+ Write-Host "Running pytests to validate bindings"
+ if ("$(targetArch)" -eq "arm64") {
+ Write-Host "Skipping pytests on Windows ARM64"
+ } else {
+ python -m pytest -v
+ }
+ displayName: 'Run pytests'
+ env:
+ DB_CONNECTION_STRING: 'Server=(localdb)\MSSQLLocalDB;Database=TestDB;Uid=testuser;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes'
+
+ # Copy artifacts to OneBranch output directory for publishing
+ # PYD files: Native Python extensions (ddbc_bindings.cpXXX-win_xxx.pyd)
+ - task: CopyFiles@2
+ inputs:
+ SourceFolder: '$(Build.SourcesDirectory)\mssql_python\pybind\build\$(targetArch)\py$(shortPyVer)\Release'
+ Contents: 'ddbc_bindings.cp$(shortPyVer)-*.pyd'
+ TargetFolder: '$(ob_outputDirectory)\bindings\windows'
+ displayName: 'Copy PYD files'
+
+ # PDB files: Debugging symbols for native code
+ - task: CopyFiles@2
+ inputs:
+ SourceFolder: '$(Build.SourcesDirectory)\mssql_python\pybind\build\$(targetArch)\py$(shortPyVer)\Release'
+ Contents: 'ddbc_bindings.cp$(shortPyVer)-*.pdb'
+ TargetFolder: '$(ob_outputDirectory)\symbols'
+ displayName: 'Copy PDB files'
+
+ # Copy PYD to ApiScan directory for SDL security scanning
+ # BinSkim and other tools scan binaries from this location
+ - task: CopyFiles@2
+ inputs:
+ SourceFolder: '$(Build.SourcesDirectory)\mssql_python\pybind\build\$(targetArch)\py$(shortPyVer)\Release'
+ Contents: 'ddbc_bindings.cp$(shortPyVer)-*.pyd'
+ TargetFolder: '$(Build.SourcesDirectory)\apiScan\dlls\windows\py$(shortPyVer)\$(targetArch)'
+ displayName: 'Copy PYD to ApiScan directory'
+
+ - task: CopyFiles@2
+ inputs:
+ SourceFolder: '$(Build.SourcesDirectory)\mssql_python\pybind\build\$(targetArch)\py$(shortPyVer)\Release'
+ Contents: 'ddbc_bindings.cp$(shortPyVer)-*.pdb'
+ TargetFolder: '$(Build.SourcesDirectory)\apiScan\pdbs\windows\py$(shortPyVer)\$(targetArch)'
+ displayName: 'Copy PDB to ApiScan directory'
+
+ # Build Python wheel package from source distribution
+ # ARCHITECTURE environment variable controls target platform tagging
+ - script: |
+ python -m pip install --upgrade pip wheel setuptools
+ set ARCHITECTURE=$(targetArch)
+ python setup.py bdist_wheel
+ displayName: 'Build wheel package'
+
+ # =========================
+ # ARTIFACT PUBLISHING
+ # =========================
+ # Copy wheel to OneBranch output directory
+ - task: CopyFiles@2
+ inputs:
+ SourceFolder: '$(Build.SourcesDirectory)\dist'
+ Contents: '*.whl'
+ TargetFolder: '$(ob_outputDirectory)\wheels'
+ displayName: 'Copy wheel files'
+
+ # Publish artifacts to Azure Pipelines for downstream consumption
+ # OneBranch requires specific artifact naming: drop__
+ - task: PublishPipelineArtifact@1
+ displayName: 'Publish Windows Artifacts'
+ inputs:
+ targetPath: '$(ob_outputDirectory)'
+ artifact: 'drop_${{ parameters.stageName }}_${{ parameters.jobName }}'
+ publishLocation: 'pipeline'
+
+ # Security Scanning: Component Governance + OneBranch AntiMalware
+ # Scans PYD files and wheels for known vulnerabilities and malware signatures
+ - template: /OneBranchPipelines/steps/malware-scanning-step.yml@self
+ parameters:
+ scanPath: '$(ob_outputDirectory)'
+ artifactType: 'dll'
+
+ # ESRP Malware Scanning (Official Builds Only)
+ # ESRP = Microsoft's Enterprise Signing and Release Platform
+ # Scans wheel files for malware using Microsoft Defender and custom signatures
+ # Only runs for Official builds (production compliance requirement)
+ - ${{ if eq(parameters.oneBranchType, 'Official') }}:
+ - task: EsrpMalwareScanning@5
+ displayName: 'ESRP MalwareScanning - Python Wheels (Official)'
+ inputs:
+ ConnectedServiceName: '$(SigningEsrpConnectedServiceName)'
+ AppRegistrationClientId: '$(SigningAppRegistrationClientId)'
+ AppRegistrationTenantId: '$(SigningAppRegistrationTenantId)'
+ EsrpClientId: '$(SigningEsrpClientId)'
+ UseMSIAuthentication: true
+ FolderPath: '$(ob_outputDirectory)/wheels'
+ Pattern: '*.whl'
+ SessionTimeout: 60
+ CleanupTempStorage: 1
+ VerboseLogin: 1
+
+ # ESRP Code Signing (DISABLED - wheel files cannot be signed with SignTool)
+ # See compound-esrp-code-signing-step.yml for detailed explanation of why this doesn't work
+ # - ${{ if eq(parameters.oneBranchType, 'Official') }}:
+ # - template: /OneBranchPipelines/steps/compound-esrp-code-signing-step.yml@self
+ # parameters:
+ # appRegistrationClientId: '$(SigningAppRegistrationClientId)'
+ # appRegistrationTenantId: '$(SigningAppRegistrationTenantId)'
+ # artifactType: 'whl'
+ # authAkvName: '$(SigningAuthAkvName)'
+ # authSignCertName: '$(SigningAuthSignCertName)'
+ # esrpClientId: '$(SigningEsrpClientId)'
+ # esrpConnectedServiceName: '$(SigningEsrpConnectedServiceName)'
+ # signPath: '$(ob_outputDirectory)\wheels'
+
+ # Note: Symbol publishing moved to release pipeline
+ # Symbols are published as artifacts here and consumed in release pipeline
diff --git a/OneBranchPipelines/steps/compound-esrp-code-signing-step.yml b/OneBranchPipelines/steps/compound-esrp-code-signing-step.yml
new file mode 100644
index 000000000..62c9357fd
--- /dev/null
+++ b/OneBranchPipelines/steps/compound-esrp-code-signing-step.yml
@@ -0,0 +1,210 @@
+'''
+ESRP Code Signing Step Template (DISABLED - Python wheels cannot be signed with SignTool)
+
+This template was originally designed to handle signing of binary artifacts using Enterprise Secure Release Process (ESRP).
+However, we discovered that Python wheel (.whl) files cannot be signed using Windows SignTool because:
+
+1. Python wheels are ZIP archive files, not PE format binaries
+2. Windows SignTool only supports PE format files (.exe, .dll, .sys, etc.)
+3. ZIP archives require different signing approaches (if supported at all)
+
+Error Messages Encountered:
+
+ESRP Error Log:
+"SignTool Error: This file format cannot be signed because it is not recognized."
+
+Full SignTool Command that Failed:
+sign /NPH /fd "SHA256" /f "..." /tr "..." /d "mssql-python" "...whl"
+
+Technical Details:
+- Certificate CP-230012 loads successfully and authentication works correctly
+- File upload to ESRP service works without issues
+- The failure occurs when SignTool attempts to process the .whl file
+- SignTool recognizes .whl as an unknown/unsupported format
+
+Alternative Approaches Considered:
+1. OneBranch signing (onebranch.pipeline.signing@1) - had authentication issues requiring interactive login
+2. Different ESRP operations - no ESRP operation exists for ZIP archive signing
+3. Signing individual files within wheels - would break wheel integrity and PyPI compatibility
+
+Conclusion:
+Python wheels distributed to PyPI are typically unsigned. The package integrity is verified through
+checksums and PyPIs own security mechanisms. Many popular Python packages on PyPI are not code-signed.
+
+This template is preserved for reference and potential future use if alternative signing approaches
+are identified or if other file types need to be signed.
+
+Original Configuration Details:
+CP-230012: "SHA256 Authenticode Standard Microsoft Corporation" certificate for external distribution
+Operation: SigntoolSign (Windows SignTool for PE format binaries only)
+Service Connection: Microsoft Release Management Internal
+
+Based on SqlClient ESRP signing implementation
+COMMENTED OUT - All ESRP signing tasks are disabled due to SignTool incompatibility with wheel files
+The code below is preserved for reference and potential future use with other file types
+'''
+# parameters:
+# - name: appRegistrationClientId
+# type: string
+# displayName: 'App Registration Client ID'
+#
+# - name: appRegistrationTenantId
+# type: string
+# displayName: 'App Registration Tenant ID'
+#
+# - name: artifactType
+# type: string
+# displayName: 'Artifact type to sign'
+# values:
+# - 'dll' # For .pyd, .so, .dylib files (native binaries)
+# - 'whl' # For .whl files (Python wheels)
+#
+# - name: authAkvName
+# type: string
+# displayName: 'Azure Key Vault name'
+#
+# - name: authSignCertName
+# type: string
+# displayName: 'Signing certificate name'
+#
+# - name: esrpClientId
+# type: string
+# displayName: 'ESRP Client ID'
+#
+# - name: esrpConnectedServiceName
+# type: string
+# displayName: 'ESRP Connected Service Name'
+#
+# - name: signPath
+# type: string
+# displayName: 'Path containing files to sign'
+
+# steps:
+# # Sign native binary files (.pyd, .so, .dylib)
+# - ${{ if eq(parameters.artifactType, 'dll') }}:
+# - task: EsrpCodeSigning@5
+# displayName: 'ESRP CodeSigning - Native Binaries'
+# inputs:
+# ConnectedServiceName: '${{ parameters.esrpConnectedServiceName }}'
+# AppRegistrationClientId: '${{ parameters.appRegistrationClientId }}'
+# AppRegistrationTenantId: '${{ parameters.appRegistrationTenantId }}'
+# EsrpClientId: '${{ parameters.esrpClientId }}'
+# UseMSIAuthentication: true
+# AuthAKVName: '${{ parameters.authAkvName }}'
+# AuthSignCertName: '${{ parameters.authSignCertName }}'
+# FolderPath: '${{ parameters.signPath }}'
+# Pattern: '*.pyd,*.dll,*.so,*.dylib'
+# signConfigType: inlineSignParams
+# inlineOperation: |
+# [
+# {
+# "keyCode": "CP-230012",
+# "operationSetCode": "SigntoolSign",
+# "parameters": [
+# {
+# "parameterName": "OpusName",
+# "parameterValue": "mssql-python"
+# },
+# {
+# "parameterName": "OpusInfo",
+# "parameterValue": "http://www.microsoft.com"
+# },
+# {
+# "parameterName": "FileDigest",
+# "parameterValue": "/fd \"SHA256\""
+# },
+# {
+# "parameterName": "PageHash",
+# "parameterValue": "/NPH"
+# },
+# {
+# "parameterName": "TimeStamp",
+# "parameterValue": "/tr \"http://rfc3161.gtm.corp.microsoft.com/TSS/HttpTspServer\" /td sha256"
+# }
+# ],
+# "toolName": "sign",
+# "toolVersion": "1.0"
+# },
+# {
+# "keyCode": "CP-230012",
+# "operationSetCode": "SigntoolVerify",
+# "parameters": [],
+# "toolName": "sign",
+# "toolVersion": "1.0"
+# }
+# ]
+#
+# # Sign Python wheel files (.whl)
+# - ${{ if eq(parameters.artifactType, 'whl') }}:
+# - task: EsrpCodeSigning@5
+# displayName: 'ESRP CodeSigning - Python Wheels'
+# inputs:
+# ConnectedServiceName: '${{ parameters.esrpConnectedServiceName }}'
+# AppRegistrationClientId: '${{ parameters.appRegistrationClientId }}'
+# AppRegistrationTenantId: '${{ parameters.appRegistrationTenantId }}'
+# EsrpClientId: '${{ parameters.esrpClientId }}'
+# UseMSIAuthentication: true
+# AuthAKVName: '${{ parameters.authAkvName }}'
+# AuthSignCertName: '${{ parameters.authSignCertName }}'
+# FolderPath: '${{ parameters.signPath }}'
+# Pattern: '*.whl'
+# signConfigType: inlineSignParams
+# inlineOperation: |
+# [
+# {
+# "keyCode": "CP-230012",
+# "operationSetCode": "SigntoolSign",
+# "parameters": [
+# {
+# "parameterName": "OpusName",
+# "parameterValue": "mssql-python"
+# },
+# {
+# "parameterName": "OpusInfo",
+# "parameterValue": "http://www.microsoft.com"
+# },
+# {
+# "parameterName": "FileDigest",
+# "parameterValue": "/fd \"SHA256\""
+# },
+# {
+# "parameterName": "PageHash",
+# "parameterValue": "/NPH"
+# },
+# {
+# "parameterName": "TimeStamp",
+# "parameterValue": "/tr \"http://rfc3161.gtm.corp.microsoft.com/TSS/HttpTspServer\" /td sha256"
+# }
+# ],
+# "toolName": "sign",
+# "toolVersion": "1.0"
+# },
+# {
+# "keyCode": "CP-230012",
+# "operationSetCode": "SigntoolVerify",
+# "parameters": [],
+# "toolName": "sign",
+# "toolVersion": "1.0"
+# }
+# ]
+#
+# # List signed files (platform-specific)
+# - ${{ if eq(parameters.artifactType, 'dll') }}:
+# # Windows - use cmd syntax
+# - script: |
+# echo Signed files in: ${{ parameters.signPath }}
+# dir /s /b "${{ parameters.signPath }}\*.whl" "${{ parameters.signPath }}\*.pyd" "${{ parameters.signPath }}\*.dll" 2>nul
+# displayName: 'List signed files (Windows)'
+# condition: succeededOrFailed()
+#
+# - ${{ else }}:
+# # Linux/macOS - use bash syntax
+# - bash: |
+# echo "Signed files in: ${{ parameters.signPath }}"
+# if [ -d "${{ parameters.signPath }}" ]; then
+# find "${{ parameters.signPath }}" -type f \( -name "*.whl" -o -name "*.pyd" -o -name "*.dll" -o -name "*.so" -o -name "*.dylib" \) -ls
+# else
+# echo "Directory not found: ${{ parameters.signPath }}"
+# fi
+# displayName: 'List signed files (Linux/macOS)'
+# condition: succeededOrFailed()
diff --git a/OneBranchPipelines/steps/malware-scanning-step.yml b/OneBranchPipelines/steps/malware-scanning-step.yml
new file mode 100644
index 000000000..bbba5d888
--- /dev/null
+++ b/OneBranchPipelines/steps/malware-scanning-step.yml
@@ -0,0 +1,28 @@
+# Malware Scanning Step Template
+# Scans artifacts for malware before signing/publishing
+parameters:
+ - name: scanPath
+ type: string
+ displayName: 'Path to scan for malware'
+
+ - name: artifactType
+ type: string
+ displayName: 'Type of artifact (dll, pkg)'
+ values:
+ - 'dll'
+ - 'pkg'
+
+steps:
+ - task: ComponentGovernanceComponentDetection@0
+ displayName: 'Component Governance Detection'
+ inputs:
+ scanType: 'Register'
+ verbosity: 'Verbose'
+ alertWarningLevel: 'High'
+
+ # AntiMalware scanning (OneBranch will inject this automatically via globalSdl)
+ # This step is a placeholder for visibility
+ - script: |
+ echo "Malware scanning for ${{ parameters.artifactType }} files in ${{ parameters.scanPath }}"
+ echo "OneBranch AntiMalware scanning will be performed automatically"
+ displayName: 'Malware Scan Notification (${{ parameters.artifactType }})'
diff --git a/OneBranchPipelines/steps/symbol-publishing-step.yml b/OneBranchPipelines/steps/symbol-publishing-step.yml
new file mode 100644
index 000000000..479c1c337
--- /dev/null
+++ b/OneBranchPipelines/steps/symbol-publishing-step.yml
@@ -0,0 +1,209 @@
+# Symbol Publishing Step Template
+# Publishes PDB symbols to Azure DevOps Symbol Server and Microsoft Symbol Publishing Service
+parameters:
+ - name: SymbolsFolder
+ type: string
+ default: '$(ob_outputDirectory)\symbols'
+
+steps:
+ # Set AccountName for SqlClientDrivers organization (separate PowerShell task like JDBC)
+ - task: PowerShell@2
+ displayName: 'Set Symbol.AccountName to SqlClientDrivers'
+ inputs:
+ targetType: inline
+ # NOTE: we're setting PAT in this step since Pat:$(System.AccessToken) doesn't work in PublishSymbols@2 task directly
+ # Tried using env: parameter on PublishSymbols@2 but it didn't work
+ # This is a workaround to set it via script, and setting as a secret variable
+ script: |
+ Write-Host "##vso[task.setvariable variable=ArtifactServices.Symbol.AccountName;]SqlClientDrivers"
+ Write-Host "##vso[task.setvariable variable=ArtifactServices.Symbol.Pat;issecret=true;]$env:SYSTEM_ACCESSTOKEN"
+ # Verify System.AccessToken is available
+ if (-not $env:SYSTEM_ACCESSTOKEN) {
+ Write-Error "SYSTEM_ACCESSTOKEN is not available. Ensure 'Allow scripts to access the OAuth token' is enabled in the pipeline settings."
+ } else {
+ Write-Host "SYSTEM_ACCESSTOKEN is available and will be used for symbol publishing."
+ }
+ env:
+ SYSTEM_ACCESSTOKEN: $(System.AccessToken)
+
+ - task: PublishSymbols@2
+ displayName: 'Push Symbols to SqlClientDrivers ADO Organization'
+ inputs:
+ SymbolsFolder: '${{ parameters.SymbolsFolder }}'
+ SearchPattern: '**/*.pdb'
+ IndexSources: false
+ SymbolServerType: TeamServices
+ SymbolsMaximumWaitTime: 10
+ SymbolsProduct: mssql-python
+ SymbolsVersion: $(Build.BuildId)
+
+ # Publish to Microsoft Symbol Publishing Service (External)
+ # This step finds the request name created by PublishSymbols@2 task above and publishes to internal/public servers
+ # The PublishSymbols@2 task uploads symbols and creates a request; this step marks it for publishing
+ #
+ # PREREQUISITES (Critical for avoiding 403 Forbidden errors):
+ # 1. Project must be registered with Symbol team via IcM incident (ICM 696470276 for mssql-python)
+ # 2. Service principal/identity used by azureSubscription must be added as Reader AND Publisher
+ # - Symbol team must explicitly grant this identity access to your project
+ # - 403 errors indicate the identity hasn't been added or wrong identity is being used
+ # 3. Verify identity matches: az account get-access-token will use the identity from azureSubscription
+ #
+ # Reference: https://www.osgwiki.com/wiki/Symbols_Publishing_Pipeline_to_SymWeb_and_MSDL#Step_3:_Project_Setup
+ - task: AzureCLI@2
+ displayName: 'Publish symbols to Microsoft Symbol Publishing Service'
+ condition: succeeded()
+ env:
+ SymbolServer: '$(SymbolServer)'
+ SymbolTokenUri: '$(SymbolTokenUri)'
+ inputs:
+ azureSubscription: 'SymbolsPublishing-msodbcsql-mssql-python'
+ scriptType: ps
+ scriptLocation: inlineScript
+ inlineScript: |
+ $symbolServer = $env:SymbolServer
+ $tokenUri = $env:SymbolTokenUri
+ $projectName = "mssql-python"
+
+ # Get the access token for the symbol publishing service
+ # This uses the identity from azureSubscription
+ # CRITICAL: The identity must be registered as Reader AND Publisher for the project
+ # Otherwise you'll get 403 Forbidden errors when calling the Symbol Publishing Service API
+ $symbolPublishingToken = az account get-access-token --resource $tokenUri --query accessToken -o tsv
+ echo "> 1.Symbol publishing token acquired."
+
+ # CRITICAL: We search build logs to find the auto-generated request name from PublishSymbols@2
+ # Two implementation patterns exist:
+ # 1. JDBC Pattern (used here): PublishSymbols@2 auto-generates request name → search logs → publish
+ # 2. SqlClient Pattern: Pass explicit symbolsArtifactName parameter → use same name → publish
+ # We use JDBC pattern because it's more flexible and doesn't require parameter coordination
+
+ # KEY LEARNING: Must use $(System.CollectionUri) for correct API URL construction
+ # $(System.CollectionUri) = full org URL like "https://dev.azure.com/SqlClientDrivers/"
+ # $(System.TeamProject) = only project name like "mssql-python"
+ # Previous error: Used "https://dev.azure.com/$(System.TeamProject)" which resolved to
+ # "https://dev.azure.com/mssql-python" (missing organization) → 404 error
+ echo "Searching for request name created by PublishSymbols@2 task..."
+ $logList = Invoke-RestMethod -Uri "$(System.CollectionUri)$(System.TeamProject)/_apis/build/builds/$(Build.BuildId)/logs?api-version=7.1" -Method GET -Headers @{ Authorization = "Bearer $(System.AccessToken)" } -ContentType "application/json"
+
+ # KEY LEARNING: Build API returns logs in the .value property, not .logs
+ # Previous error: Used $logList.logs → property not found
+ # Azure DevOps Build API schema: { "value": [ { "id": 1, ... }, ... ] }
+ $requestName = $null
+ $logList.value | ForEach-Object {
+ $id = $_.id
+ $log = Invoke-RestMethod -Uri "$(System.CollectionUri)$(System.TeamProject)/_apis/build/builds/$(Build.BuildId)/logs/$id" -Method GET -Headers @{ Authorization = "Bearer $(System.AccessToken)" } -ContentType "application/json"
+
+ echo $log > log.txt
+ # PublishSymbols@2 creates a request with pattern like: Request 'mssql-python/{branch}/{date}.{build}/{buildId}/{guid}'
+ # Example: Request 'mssql-python/official-release/25290.7-release/127537/23bc7689-7bae-4d13-8772-ae70c50b72df'
+ $request = Select-String -Path log.txt -Pattern "Request '.*'" -ErrorAction SilentlyContinue
+
+ if ($request -and $request -match "'mssql-python\/.*'") {
+ $requestName = (-Split $Matches[0])[0].Replace("'","")
+ echo "Found request name: $requestName"
+ }
+ }
+
+ if (-not $requestName) {
+ echo "##[error]Could not find request name in build logs. The PublishSymbols@2 task may have failed or not created a request."
+ exit 1
+ }
+
+ echo "> 2.Request name found from PublishSymbols@2 task."
+
+ # Register the request name with Symbol Publishing Service
+ # This is an idempotent operation - if already registered, API returns success
+ # KEY LEARNING: Use ConvertTo-Json for proper JSON formatting (not manual string construction)
+ # This ensures correct boolean values and escaping
+ echo "Registering the request name ..."
+ $requestNameRegistration = @{ requestName = $requestName }
+ $requestNameRegistrationBody = $requestNameRegistration | ConvertTo-Json -Compress
+ try {
+ Invoke-RestMethod -Method POST -Uri "https://$symbolServer.trafficmanager.net/projects/$projectName/requests" -Headers @{ Authorization = "Bearer $symbolPublishingToken" } -ContentType "application/json" -Body $requestNameRegistrationBody
+ echo "> 3.Registration of request name succeeded."
+ } catch {
+ echo "Registration may have already existed (this is okay): $($_.Exception.Message)"
+ }
+
+ # Publish the symbols to internal and public servers
+ # KEY LEARNING: This API call is asynchronous - it submits the request but doesn't wait for completion
+ # We need to poll the status endpoint (below) to confirm when publishing finishes
+ # Status codes: 0=NotRequested, 1=Submitted, 2=Processing, 3=Completed
+ # Result codes: 0=Pending, 1=Succeeded, 2=Failed, 3=Cancelled
+ echo "Publishing the symbols to internal and public servers..."
+ $publishSymbols = @{
+ publishToInternalServer = $true
+ publishToPublicServer = $true
+ }
+ $publishSymbolsBody = $publishSymbols | ConvertTo-Json -Compress
+ echo "Publishing symbols request body: $publishSymbolsBody"
+
+ try {
+ $response = Invoke-RestMethod -Method POST -Uri "https://$symbolServer.trafficmanager.net/projects/$projectName/requests/$requestName" -Headers @{ Authorization = "Bearer $symbolPublishingToken" } -ContentType "application/json" -Body $publishSymbolsBody
+ echo "> 4.Request to publish symbols succeeded."
+ echo "Response: $($response | ConvertTo-Json)"
+ } catch {
+ echo "##[error]Failed to publish symbols. Status Code: $($_.Exception.Response.StatusCode.value__)"
+ echo "##[error]Error Message: $($_.Exception.Message)"
+ if ($_.ErrorDetails.Message) {
+ echo "##[error]Error Details: $($_.ErrorDetails.Message)"
+ }
+ throw
+ }
+
+ echo "> 3.Request to publish symbols succeeded."
+
+ # Poll for publishing status until complete or timeout
+ # KEY LEARNING: Publishing is asynchronous - need to poll until Status=3 (Completed)
+ # Both internal and public servers must complete before we can confirm success
+ # Timeout after 5 minutes (30 attempts × 10 seconds) as a safety measure
+ echo "> 4.Checking the status of the request ..."
+ $maxAttempts = 30 # 30 attempts = ~5 minutes with 10 second intervals
+ $attemptCount = 0
+ $publishingComplete = $false
+
+ while (-not $publishingComplete -and $attemptCount -lt $maxAttempts) {
+ $attemptCount++
+ $status = Invoke-RestMethod -Method GET -Uri "https://$symbolServer.trafficmanager.net/projects/$projectName/requests/$requestName" -Headers @{ Authorization = "Bearer $symbolPublishingToken" } -ContentType "application/json"
+
+ echo "Attempt $attemptCount/$maxAttempts - Status Check:"
+ echo " Internal Server: Status=$($status.publishToInternalServerStatus), Result=$($status.publishToInternalServerResult)"
+ echo " Public Server: Status=$($status.publishToPublicServerStatus), Result=$($status.publishToPublicServerResult)"
+
+ # Wait for both servers to reach Status=3 (Completed)
+ # KEY LEARNING: Empty file arrays (filesBlockedFromPublicServer, filesPublishedAsPrivateSymbolsToPublicServer)
+ # are normal and expected - they populate only when there are blocked/private files
+ $internalDone = $status.publishToInternalServerStatus -eq 3
+ $publicDone = $status.publishToPublicServerStatus -eq 3
+
+ if ($internalDone -and $publicDone) {
+ $publishingComplete = $true
+ echo ""
+ echo "Publishing completed!"
+ echo " Internal Result: $($status.publishToInternalServerResult) (1=Success, 2=Failed)"
+ echo " Public Result: $($status.publishToPublicServerResult) (1=Success, 2=Failed)"
+
+ # Check for failures and report with detailed messages
+ if ($status.publishToInternalServerResult -eq 2) {
+ echo "##[warning]Internal server publishing failed: $($status.publishToInternalServerFailureMessage)"
+ }
+ if ($status.publishToPublicServerResult -eq 2) {
+ echo "##[warning]Public server publishing failed: $($status.publishToPublicServerFailureMessage)"
+ }
+
+ # Output final status for debugging
+ echo ""
+ echo "Final Status:"
+ $status | ConvertTo-Json
+ } else {
+ if ($attemptCount -lt $maxAttempts) {
+ echo " Still processing... waiting 10 seconds before next check"
+ Start-Sleep -Seconds 10
+ }
+ }
+ }
+
+ if (-not $publishingComplete) {
+ echo "##[warning]Publishing status check timed out after $maxAttempts attempts. Symbols may still be processing."
+ echo "You can check status manually at: https://$symbolServer.trafficmanager.net/projects/$projectName/requests/$requestName"
+ }
diff --git a/OneBranchPipelines/variables/build-variables.yml b/OneBranchPipelines/variables/build-variables.yml
new file mode 100644
index 000000000..d1d41f84e
--- /dev/null
+++ b/OneBranchPipelines/variables/build-variables.yml
@@ -0,0 +1,35 @@
+# Build-specific variables
+variables:
+ # Build output directories
+ - name: DIST_PATH
+ value: '$(Build.SourcesDirectory)/dist'
+
+ - name: BINDINGS_PATH
+ value: '$(Build.SourcesDirectory)/mssql_python/pybind'
+
+ # Artifact output paths for OneBranch
+ - name: WHEELS_OUTPUT_PATH
+ value: '$(ob_outputDirectory)/wheels'
+
+ - name: BINDINGS_OUTPUT_PATH
+ value: '$(ob_outputDirectory)/bindings'
+
+ - name: SYMBOLS_OUTPUT_PATH
+ value: '$(ob_outputDirectory)/symbols'
+
+ # Build tools
+ - name: CMAKE_VERSION
+ value: 'latest'
+
+ - name: PYBIND11_VERSION
+ value: 'latest'
+
+ # Architecture support
+ - name: WINDOWS_ARCHITECTURES
+ value: 'x64,arm64'
+
+ - name: MACOS_ARCHITECTURES
+ value: 'universal2'
+
+ - name: LINUX_ARCHITECTURES
+ value: 'x86_64,aarch64'
diff --git a/OneBranchPipelines/variables/common-variables.yml b/OneBranchPipelines/variables/common-variables.yml
new file mode 100644
index 000000000..3597f4192
--- /dev/null
+++ b/OneBranchPipelines/variables/common-variables.yml
@@ -0,0 +1,25 @@
+# Common variables used across all pipelines
+variables:
+ # Repository root path
+ - name: REPO_ROOT
+ value: $(Build.SourcesDirectory)
+ readonly: true
+
+ # Artifact staging paths
+ - name: ARTIFACT_PATH
+ value: $(Build.ArtifactStagingDirectory)
+ readonly: true
+
+ # Build configuration
+ - name: BUILD_CONFIGURATION
+ value: 'Release'
+
+ # Python versions to build
+ - name: PYTHON_VERSIONS
+ value: '3.10,3.11,3.12,3.13'
+
+ # Package name
+ - name: PACKAGE_NAME
+ value: 'mssql-python'
+ readonly: true
+
diff --git a/OneBranchPipelines/variables/onebranch-variables.yml b/OneBranchPipelines/variables/onebranch-variables.yml
new file mode 100644
index 000000000..71f31037f
--- /dev/null
+++ b/OneBranchPipelines/variables/onebranch-variables.yml
@@ -0,0 +1,22 @@
+# OneBranch-specific variables
+variables:
+ # OneBranch output directory for automatic artifact publishing
+ # All artifacts placed here are automatically published by OneBranch
+ - name: ob_outputDirectory
+ value: '$(ARTIFACT_PATH)'
+
+ # OneBranch SDL configuration
+ - name: ob_sdl_enabled
+ value: true
+
+ # OneBranch symbol publishing
+ - name: ob_symbolsPublishing_enabled
+ value: true
+
+ # OneBranch TSA (Threat and Security Assessment) enabled for Official builds only
+ - name: ob_tsa_enabled
+ value: true
+
+ # Windows host version for OneBranch
+ - name: ob_windows_host_version
+ value: '2022'
diff --git a/OneBranchPipelines/variables/signing-variables.yml b/OneBranchPipelines/variables/signing-variables.yml
new file mode 100644
index 000000000..88c58e9fc
--- /dev/null
+++ b/OneBranchPipelines/variables/signing-variables.yml
@@ -0,0 +1,32 @@
+# ESRP Code Signing Variables
+# These variables map from the 'ESRP Federated Creds (AME)' variable group
+# to the naming convention expected by OneBranch ESRP signing tasks
+# Required variable group: 'ESRP Federated Creds (AME)'
+variables:
+ # Map ESRP variable group names to OneBranch signing variable names
+ # Note: The source variable group uses different naming (without 'Signing' prefix)
+
+ # ESRP App Registration for authentication
+ - name: SigningAppRegistrationClientId
+ value: $(EsrpClientId) # Maps from EsrpClientId in variable group
+
+ - name: SigningAppRegistrationTenantId
+ value: $(DomainTenantId) # Maps from DomainTenantId in variable group
+
+ # Azure Key Vault for signing certificates
+ - name: SigningAuthAkvName
+ value: $(AuthAKVName) # Maps from AuthAKVName in variable group
+
+ - name: SigningAuthSignCertName
+ value: $(AuthSignCertName) # Maps from AuthSignCertName in variable group
+
+ # ESRP client configuration
+ - name: SigningEsrpClientId
+ value: $(EsrpClientId) # Maps from EsrpClientId in variable group
+
+ - name: SigningEsrpConnectedServiceName
+ value: $(ESRPConnectedServiceName) # Maps from ESRPConnectedServiceName in variable group
+
+ # Signing operation codes (for reference - actual operations defined in step template)
+ # Native binary files (.pyd, .so, .dylib) use: SigntoolSign with CP-230012
+ # Python wheel files (.whl) use: NuGetSign with CP-401405
diff --git a/OneBranchPipelines/variables/symbol-variables.yml b/OneBranchPipelines/variables/symbol-variables.yml
new file mode 100644
index 000000000..8946e80e9
--- /dev/null
+++ b/OneBranchPipelines/variables/symbol-variables.yml
@@ -0,0 +1,18 @@
+# Symbol Publishing Variables
+# These variables configure where debug symbols (.pdb files) are published
+variables:
+ # Symbol paths for ApiScan
+ # Must use Build.SourcesDirectory (not ob_outputDirectory) so files persist for globalSdl
+ # Files are copied here during build stages, before ApiScan runs
+ # CRITICAL: Must use backslashes to match Build.SourcesDirectory's Windows path format
+ # When Build.SourcesDirectory resolves to D:\a\_work\1\s, we append \apiScan\dlls
+ - name: apiScanDllPath
+ value: '$(Build.SourcesDirectory)\apiScan\dlls'
+
+ - name: apiScanPdbPath
+ value: '$(Build.SourcesDirectory)\apiScan\pdbs'
+
+ # Symbol server variables come from 'Symbols Publishing' variable group:
+ # - SymbolServer: Symbol publishing server hostname
+ # - SymbolTokenUri: Token URI for symbol publishing service authentication
+
diff --git a/PyPI_Description.md b/PyPI_Description.md
index f52f0f9e3..bb0ebb2f9 100644
--- a/PyPI_Description.md
+++ b/PyPI_Description.md
@@ -1,26 +1,50 @@
-# mssql-python
-
-This is a new Python driver for Microsoft SQL Server currently in Alpha phase.
-
-## Public Preview Release
-
-We are making progress - The Public Preview of our driver is now available! This marks a significant milestone in our development journey. While we saw a few early adopters of our alpha release, we are introducing the following functionalities to support your applications in a more robust and reliable manner.
-
-### What's Included:
-
-- Everything from previous releases
-- **Azure Active Directory Authentication:** New authentication module supporting Azure AD login options (ActiveDirectoryInteractive, ActiveDirectoryDeviceCode, ActiveDirectoryDefault) for secure and flexible cloud integration.
-- **Batch Execution Performance:** Refactored `executemany` for efficient bulk operations and improved C++ bindings for performance.
-- **Robust Logging System:** Overhauled logging with a singleton manager, sensitive data sanitization, and better exception handling.
-- **Improved Row Representation:** Enhanced output and debugging via updated `Row` object string and representation methods.
-
+# General Availability Release
+
+mssql‑python is now Generally Available (GA) as Microsoft’s official Python driver for SQL Server, Azure SQL, and SQL databases in Fabric. This release delivers a production‑ready, high‑performance, and developer‑friendly experience.
+
+## What makes mssql-python different?
+
+### Powered by DDBC – Direct Database Connectivity
+
+Most Python SQL Server drivers, including pyodbc, route calls through the Driver Manager, which has slightly different implementations across Windows, macOS, and Linux. This results in inconsistent behavior and capabilities across platforms. Additionally, the Driver Manager must be installed separately, creating friction for both new developers and when deploying applications to servers.
+
+At the heart of the mssql-python driver is DDBC (Direct Database Connectivity) — a lightweight, high-performance C++ layer that replaces the platform’s Driver Manager.
+
+Key Advantages:
+
+- Provides a consistent, cross-platform backend that handles connections, statements, and memory directly.
+- Interfaces directly with the native SQL Server drivers.
+- Integrates with the same TDS core library that powers the ODBC driver.
+
+### Why is this architecture important?
+
+By simplifying the architecture, DDBC delivers:
+
+- Consistency across platforms
+- Lower function call overhead
+- Zero external dependencies on Windows (`pip install mssql-python` is all you need)
+- Full control over connections, memory, and statement handling
+
+### Built with PyBind11 + Modern C++ for Performance and Safety
+
+To expose the DDBC engine to Python, mssql-python uses PyBind11 – a modern C++ binding library.
+
+PyBind11 provides:
+
+- Native-speed execution with automatic type conversions
+- Memory-safe bindings
+- Clean and Pythonic API, while performance-critical logic remains in robust, maintainable C++.
+
+## What's new in v1.3.0
+
+### Bug Fixes
+
+- **Segmentation Fault Fix** - Fixed segmentation fault in libmsodbcsql-18.5 during SQLFreeHandle() (#415).
+
For more information, please visit the project link on Github: https://github.com/microsoft/mssql-python
-
-### What's Next:
-
-As we continue to develop and refine the driver, you can expect regular updates that will introduce new features, optimizations, and bug fixes. We encourage you to contribute, provide feedback and report any issues you encounter, as this will help us improve the driver for the final release.
-
-### Stay Tuned:
-
-We appreciate your interest and support in this project. Stay tuned for more updates and enhancements as we work towards delivering a robust and fully-featured driver in coming months.
-Thank you for being a part of our journey!
\ No newline at end of file
+
+If you have any feedback, questions or need support please mail us at mssql-python@microsoft.com.
+
+## What's Next
+
+As we continue to refine the driver and add new features, you can expect regular updates, optimizations, and bug fixes. We encourage you to contribute, provide feedback and report any issues you encounter, as this will help us improve the driver.
diff --git a/README.md b/README.md
index 0a66c599d..d73b6bc07 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ The driver is compatible with all the Python versions >= 3.10
[Documentation](https://github.com/microsoft/mssql-python/wiki) | [Release Notes](https://github.com/microsoft/mssql-python/releases) | [Roadmap](https://github.com/microsoft/mssql-python/blob/main/ROADMAP.md)
> **Note:**
-> This project is currently in Public Preview, meaning it is still under active development. We are working on core functionalities and gathering more feedback before GA. Please use with caution and avoid production environments.
+> This project is now Generally Available (GA) and ready for production use. We’ve completed core functionality and incorporated feedback from the preview phase.
>
## Installation
@@ -17,35 +17,38 @@ pip install mssql-python
```
**MacOS:** mssql-python can be installed with [pip](http://pypi.python.org/pypi/pip)
```bash
+# For Mac, OpenSSL is a pre-requisite - skip if already present
brew install openssl
pip install mssql-python
```
**Linux:** mssql-python can be installed with [pip](http://pypi.python.org/pypi/pip)
```bash
+# For Alpine
+apk add libtool krb5-libs krb5-dev
+
+# For Debian/Ubuntu
+apt-get install -y libltdl7 libkrb5-3 libgssapi-krb5-2
+
+# For RHEL
+dnf install -y libtool-ltdl krb5-libs
+
+# For SUSE
+zypper install -y libltdl7 libkrb5-3 libgssapi-krb5-2
+
+# For SUSE/openSUSE
+zypper install -y libltdl7
+
pip install mssql-python
```
## Key Features
### Supported Platforms
-Windows, MacOS and Linux (manylinux2014 - Debian, Ubuntu & RHEL)
+Windows, MacOS and Linux (manylinux - Debian, Ubuntu, RHEL, SUSE (x64 only) & musllinux - Alpine)
> **Note:**
-> Support for additional Linux OSs (Alpine, SUSE Linux) will come soon
->
-
-### DBAPI v2.0 Compliance
-
-The Microsoft **mssql-python** module is designed to be fully compliant with the DB API 2.0 specification. This ensures that the driver adheres to a standardized interface for database access in Python, providing consistency and reliability across different database systems. Key aspects of DBAPI v2.0 compliance include:
-
-- **Connection Objects**: Establishing and managing connections to the database.
-- **Cursor Objects**: Executing SQL commands and retrieving results.
-- **Transaction Management**: Supporting commit and rollback operations to ensure data integrity.
-- **Error Handling**: Providing a consistent set of exceptions for handling database errors.
-- **Parameter Substitution**: Allowing the use of placeholders in SQL queries to prevent SQL injection attacks.
-
-By adhering to the DB API 2.0 specification, the mssql-python module ensures compatibility with a wide range of Python applications and frameworks, making it a versatile choice for developers working with Microsoft SQL Server, Azure SQL Database, and Azure SQL Managed Instance.
-
+> SUSE Linux ARM64 is not supported. Please use x64 architecture for SUSE deployments.
+
### Support for Microsoft Entra ID Authentication
The Microsoft mssql-python driver enables Python applications to connect to Microsoft SQL Server, Azure SQL Database, or Azure SQL Managed Instance using Microsoft Entra ID identities. It supports a variety of authentication methods, including username and password, Microsoft Entra managed identity (system-assigned and user-assigned), Integrated Windows Authentication in a federated, domain-joined environment, interactive authentication via browser, device code flow for environments without browser access, and the default authentication method based on environment and configuration. This flexibility allows developers to choose the most suitable authentication approach for their deployment scenario.
@@ -58,42 +61,65 @@ EntraID authentication is now fully supported on MacOS and Linux but with certai
| ActiveDirectoryInteractive | ✅ Yes | ✅ Yes | Interactive login via browser; requires user interaction |
| ActiveDirectoryMSI (Managed Identity) | ✅ Yes | ✅ Yes | For Azure VMs/containers with managed identity |
| ActiveDirectoryServicePrincipal | ✅ Yes | ✅ Yes | Use client ID and secret or certificate |
-| ActiveDirectoryIntegrated | ✅ Yes | ❌ No | Only works on Windows (requires Kerberos/SSPI) |
+| ActiveDirectoryIntegrated | ✅ Yes | ✅ Yes | Now supported on Windows, macOS, and Linux (requires Kerberos/SSPI or equivalent configuration) |
| ActiveDirectoryDeviceCode | ✅ Yes | ✅ Yes | Device code flow for authentication; suitable for environments without browser access |
| ActiveDirectoryDefault | ✅ Yes | ✅ Yes | Uses default authentication method based on environment and configuration |
-**NOTE**:
- - **Access Token**: the connection string **must not** contain `UID`, `PWD`, `Authentication`, or `Trusted_Connection` keywords.
- - **Device Code**: make sure to specify a `Connect Timeout` that provides enough time to go through the device code flow authentication process.
- - **Default**: Ensure you're authenticated via az login, or running within a managed identity-enabled environment.
+> For more information on Entra ID please refer this [document](https://github.com/microsoft/mssql-python/wiki/Microsoft-Entra-ID-support)
-### Enhanced Pythonic Features
-
-The driver offers a suite of Pythonic enhancements that streamline database interactions, making it easier for developers to execute queries, manage connections, and handle data more efficiently.
-
### Connection Pooling
The Microsoft mssql_python driver provides built-in support for connection pooling, which helps improve performance and scalability by reusing active database connections instead of creating a new connection for every request. This feature is enabled by default. For more information, refer [Connection Pooling Wiki](https://github.com/microsoft/mssql-python/wiki/Connection#connection-pooling).
+
+### DBAPI v2.0 Compliance
+
+The Microsoft **mssql-python** module is designed to be fully compliant with the DB API 2.0 specification. This ensures that the driver adheres to a standardized interface for database access in Python, providing consistency and reliability across different database systems. Key aspects of DBAPI v2.0 compliance include:
+
+- **Connection Objects**: Establishing and managing connections to the database.
+- **Cursor Objects**: Executing SQL commands and retrieving results.
+- **Transaction Management**: Supporting commit and rollback operations to ensure data integrity.
+- **Error Handling**: Providing a consistent set of exceptions for handling database errors.
+- **Parameter Substitution**: Allowing the use of placeholders in SQL queries to prevent SQL injection attacks.
+
+By adhering to the DB API 2.0 specification, the mssql-python module ensures compatibility with a wide range of Python applications and frameworks, making it a versatile choice for developers working with Microsoft SQL Server, Azure SQL Database, and Azure SQL Managed Instance.
+
+### Enhanced Pythonic Features
+
+The driver offers a suite of Pythonic enhancements that streamline database interactions, making it easier for developers to execute queries, manage connections, and handle data more efficiently.
## Getting Started Examples
Connect to SQL Server and execute a simple query:
```python
import mssql_python
-
+
# Establish a connection
-# Specify connection string
-connection_string = "SERVER=;DATABASE=;UID=;PWD=;Encrypt=yes;"
+# Specify connection string (semicolon-delimited key=value format preserved)
+# Uses Azure Entra ID Interactive authentication — no password in the string.
+connection_string = "SERVER=tcp:mssql-python-driver-eastus01.database.windows.net,1433;DATABASE=AdventureWorksLT;Authentication=ActiveDirectoryInteractive;Encrypt=yes;"
connection = mssql_python.connect(connection_string)
-
-# Execute a query
+
+# Execute a realistic query against AdventureWorksLT:
+# Top 10 customers by number of orders, with their total spend
cursor = connection.cursor()
-cursor.execute("SELECT * from customer")
+cursor.execute("""
+ SELECT TOP 10
+ c.CustomerID,
+ c.FirstName,
+ c.LastName,
+ COUNT(h.SalesOrderID) AS OrderCount,
+ SUM(h.TotalDue) AS TotalSpend
+ FROM SalesLT.Customer AS c
+ INNER JOIN SalesLT.SalesOrderHeader AS h
+ ON c.CustomerID = h.CustomerID
+ GROUP BY c.CustomerID, c.FirstName, c.LastName
+ ORDER BY OrderCount DESC, TotalSpend DESC
+""")
rows = cursor.fetchall()
-
+
for row in rows:
print(row)
-
+
# Close the connection
connection.close()
diff --git a/ROADMAP.md b/ROADMAP.md
index 654696c5d..22f5e6e1e 100644
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -1,51 +1,18 @@
# Roadmap for Python Driver for SQL Server
-We are thrilled to introduce Python driver for SQL Server (Public Preview) – a modern, high performant, and developer-friendly SDK designed to enhance your SQL Server database connectivity experience. This roadmap outlines the key structural improvements, new features and upcoming enhancements that will set our driver apart from existing solutions.
-
-Why a New Driver?
-
-Unlike existing Python SQL Server drivers, we are making substantial improvements to performance, maintainability, and usability by re-architecting the core internals. Our focus is on seamless integration between Python and C++, efficient memory management, better state handling, and advanced DBAPI enhancements.
-
-Here’s what’s coming:
-
-**1. Structural changes for abstraction of C++ and Python codebase**
-
-We are undertaking significant structural changes to provide a clear abstraction between C++ code and Python. This will ensure better maintainability, improved performance, and a cleaner codebase. By leveraging existing pybind11 module, we aim to create a seamless integration between the two languages, allowing for efficient execution and easier debugging.
-
-This will improve:
-- Maintainability via simplified modular architecture
-- Performance via optimized C++ code
-- Debugging, traceability and seamless interaction between C++ and Python via with PyBind11 module integration
-
-**2. Future DBAPI Enhancements**
-
-In future releases, we plan to add several DBAPI enhancements, including:
-- `Callproc()` : Support for calling stored procedures.
-- `setinputsize()` and `setoutputsize()`
-- `Output` and `InputOutput` Parameters: Handling of output and input-output parameters in stored procedures.
-- Optional DBAPIs: Additional optional DBAPI features to provide more flexibility and functionality for developers.
-
-**3. Cross-Platform Support: Additional Linux Distributions**
-
-We are committed to providing cross-platform support for our Python driver. In the next few weeks, we will release support for additional Linux distributions viz Alpine, SUSE Linux & Oracle Linux.
-
-**4. Bulk Copy (BCP)**
-
-Bulk Copy API (BCP) support is coming soon to the Python Driver for SQL Server. It enables high-speed data ingestion and offers fine-grained control over batch operations, making it ideal for large-scale ETL workflows.
-
-**5. Asynchronous Query Execution**
-
-We are also working on adding support for asynchronous query execution. This feature will allow developers to execute queries without blocking the main thread, enabling more responsive and efficient applications. Asynchronous query execution will be particularly beneficial for applications that require high concurrency and low latency.
-- No blocking of the main thread
-- Faster parallel processing – ideal for high-concurrency applications
-- Better integration with async frameworks like asyncio
-
-We are dedicated to continuously improving the Python driver for SQL Server and welcome feedback from the community. Stay tuned for updates and new features as we work towards delivering a high-quality driver that meets your needs.
-Join the Conversation!
-
-We are building this for developers, with developers. Your feedback will shape the future of the driver.
-- Follow our [Github Repo](https://github.com/microsoft/mssql-python)
-- Join Discussions – Share your ideas and suggestions
-- Try our alpha release – Help us refine and optimize the experience
-
-Stay tuned for more updates, and lets build something amazing together. Watch this space for announcements and release timelines.
+The following roadmap summarizes the features planned for the Python Driver for SQL Server.
+
+| Feature | Description | Status | Target Timeline |
+| ------------------------------ | ----------------------------------------------------------------- | ------------ | ------------------------ |
+| Parameter Dictionaries | Allow parameters to be supplied as Python dicts | Planned | Q4 2025 |
+| Return Rows as Dictionaries | Fetch rows as dictionaries for more Pythonic access | Planned | Q4 2025 |
+| Bulk Copy (BCP) | High-throughput ingestion API for ETL workloads | Under Design | Q1 2026 |
+| Asynchronous Query Execution | Non-blocking queries with asyncio support | Planned | Q1 2026 |
+| Vector Datatype Support | Native support for SQL Server vector datatype | Planned | Q1 2026 |
+| Table-Valued Parameters (TVPs) | Pass tabular data structures into stored procedures | Planned | Q1 2026 |
+| C++ Abstraction | Modular separation via pybind11 for performance & maintainability | In Progress | ETA will be updated soon |
+| JSON Datatype Support | Automatic mapping of JSON datatype to Python dicts/lists | Planned | ETA will be updated soon |
+| callproc() | Full DBAPI compliance & stored procedure enhancements | Planned | ETA will be updated soon |
+| setinputsize() | Full DBAPI compliance & stored procedure enhancements | Planned | ETA will be updated soon |
+| setoutputsize() | Full DBAPI compliance & stored procedure enhancements | Planned | ETA will be updated soon |
+| Output/InputOutput Params | Full DBAPI compliance & stored procedure enhancements | Planned | ETA will be updated soon |
diff --git a/benchmarks/README.md b/benchmarks/README.md
index bde6fb269..ce0480057 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -2,31 +2,73 @@
This directory contains benchmark scripts for testing the performance of various database operations using `pyodbc` and `mssql_python`. The goal is to evaluate and compare the performance of these libraries for common database operations.
+## Benchmark Scripts
+
+### 1. `bench_mssql.py` - Richbench Framework Benchmarks
+Comprehensive benchmarks using the richbench framework for detailed performance analysis.
+
+### 2. `perf-benchmarking.py` - Real-World Query Benchmarks
+Standalone script that tests real-world queries against AdventureWorks2022 database with statistical analysis.
+
## Why Benchmarks?
- To measure the efficiency of `pyodbc` and `mssql_python` in handling database operations.
- To identify performance bottlenecks and optimize database interactions.
- To ensure the reliability and scalability of the libraries under different workloads.
## How to Run Benchmarks
+
+### Running bench_mssql.py (Richbench Framework)
+
1. **Set Up the Environment Variable**:
- Ensure you have a running SQL Server instance.
- Set the `DB_CONNECTION_STRING` environment variable with the connection string to your database. For example:
- ```cmd
- set DB_CONNECTION_STRING=Server=your_server;Database=your_database;UID=your_user;PWD=your_password;
+ ```bash
+ export DB_CONNECTION_STRING="Server=your_server;Database=AdventureWorks2022;UID=your_user;PWD=your_password;"
```
2. **Install Richbench - Benchmarking Tool**:
- - Install richbench :
- ```cmd
- pip install richbench
- ```
+ ```bash
+ pip install richbench
+ ```
3. **Run the Benchmarks**:
- - Execute richbench from the parent folder (mssql-python) :
- ```cmd
+ - Execute richbench from the parent folder (mssql-python):
+ ```bash
richbench benchmarks
```
- Results will be displayed in the terminal with detailed performance metrics.
+ - Results will be displayed in the terminal with detailed performance metrics.
+
+### Running perf-benchmarking.py (Real-World Queries)
+
+This script tests performance with real-world queries from the AdventureWorks2022 database.
+
+1. **Prerequisites**:
+ - AdventureWorks2022 database must be available
+ - Both `pyodbc` and `mssql-python` must be installed
+ - Update the connection string in the script if needed
+
+2. **Run from project root**:
+ ```bash
+ python benchmarks/perf-benchmarking.py
+ ```
+
+3. **Features**:
+ - Runs each query multiple times (default: 5 iterations)
+ - Calculates average, min, max, and standard deviation
+ - Provides speedup comparisons between libraries
+ - Tests various query patterns:
+ - Complex joins with aggregations
+ - Large dataset retrieval (10K+ rows)
+ - Very large dataset (1.2M rows)
+ - CTEs and subqueries
+ - Detailed summary tables and conclusions
+
+4. **Output**:
+ The script provides:
+ - Progress indicators during execution
+ - Detailed results for each benchmark
+ - Summary comparison table
+ - Overall performance conclusion with speedup factors
## Key Features of `bench_mssql.py`
- **Comprehensive Benchmarks**: Includes SELECT, INSERT, UPDATE, DELETE, complex queries, stored procedures, and transaction handling.
@@ -34,7 +76,15 @@ This directory contains benchmark scripts for testing the performance of various
- **Progress Messages**: Clear progress messages are printed during execution for better visibility.
- **Automated Setup and Cleanup**: The script automatically sets up and cleans up the database environment before and after the benchmarks.
+## Key Features of `perf-benchmarking.py`
+- **Statistical Analysis**: Multiple iterations with avg/min/max/stddev calculations
+- **Real-World Queries**: Tests against AdventureWorks2022 with production-like queries
+- **Automatic Import Resolution**: Correctly imports local `mssql_python` package
+- **Comprehensive Reporting**: Detailed comparison tables and performance summaries
+- **Speedup Calculations**: Clear indication of performance differences
+
## Notes
- Ensure the database user has the necessary permissions to create and drop tables and stored procedures.
-- The script uses permanent tables prefixed with `perfbenchmark_` for benchmarking purposes.
-- A stored procedure named `perfbenchmark_stored_procedure` is created and used during the benchmarks.
\ No newline at end of file
+- The `bench_mssql.py` script uses permanent tables prefixed with `perfbenchmark_` for benchmarking purposes.
+- A stored procedure named `perfbenchmark_stored_procedure` is created and used during the benchmarks.
+- The `perf-benchmarking.py` script connects to AdventureWorks2022 and requires read permissions only.
\ No newline at end of file
diff --git a/benchmarks/bench_mssql.py b/benchmarks/bench_mssql.py
index 9aae0e56a..d73a1c1c4 100644
--- a/benchmarks/bench_mssql.py
+++ b/benchmarks/bench_mssql.py
@@ -6,7 +6,11 @@
import time
import mssql_python
-CONNECTION_STRING = "Driver={ODBC Driver 18 for SQL Server};" + os.environ.get('DB_CONNECTION_STRING')
+
+CONNECTION_STRING = "Driver={ODBC Driver 18 for SQL Server};" + os.environ.get(
+ "DB_CONNECTION_STRING"
+)
+
def setup_database():
print("Setting up the database...")
@@ -15,48 +19,58 @@ def setup_database():
try:
# Drop permanent tables and stored procedure if they exist
print("Dropping existing tables and stored procedure if they exist...")
- cursor.execute("""
+ cursor.execute(
+ """
IF OBJECT_ID('perfbenchmark_child_table', 'U') IS NOT NULL DROP TABLE perfbenchmark_child_table;
IF OBJECT_ID('perfbenchmark_parent_table', 'U') IS NOT NULL DROP TABLE perfbenchmark_parent_table;
IF OBJECT_ID('perfbenchmark_table', 'U') IS NOT NULL DROP TABLE perfbenchmark_table;
IF OBJECT_ID('perfbenchmark_stored_procedure', 'P') IS NOT NULL DROP PROCEDURE perfbenchmark_stored_procedure;
- """)
+ """
+ )
# Create permanent tables with new names
print("Creating tables...")
- cursor.execute("""
+ cursor.execute(
+ """
CREATE TABLE perfbenchmark_table (
id INT,
name NVARCHAR(50),
age INT
)
- """)
+ """
+ )
- cursor.execute("""
+ cursor.execute(
+ """
CREATE TABLE perfbenchmark_parent_table (
id INT PRIMARY KEY,
name NVARCHAR(50)
)
- """)
+ """
+ )
- cursor.execute("""
+ cursor.execute(
+ """
CREATE TABLE perfbenchmark_child_table (
id INT PRIMARY KEY,
parent_id INT,
description NVARCHAR(100),
FOREIGN KEY (parent_id) REFERENCES perfbenchmark_parent_table(id)
)
- """)
+ """
+ )
# Create stored procedure
print("Creating stored procedure...")
- cursor.execute("""
+ cursor.execute(
+ """
CREATE PROCEDURE perfbenchmark_stored_procedure
AS
BEGIN
SELECT * FROM perfbenchmark_table;
END
- """)
+ """
+ )
conn.commit()
print("Database setup completed.")
@@ -64,9 +78,11 @@ def setup_database():
cursor.close()
conn.close()
+
# Call setup_database to ensure permanent tables and procedure are recreated
setup_database()
+
def cleanup_database():
print("Cleaning up the database...")
conn = pyodbc.connect(CONNECTION_STRING)
@@ -74,21 +90,25 @@ def cleanup_database():
try:
# Drop tables and stored procedure after benchmarks
print("Dropping tables and stored procedure...")
- cursor.execute("""
+ cursor.execute(
+ """
IF OBJECT_ID('perfbenchmark_child_table', 'U') IS NOT NULL DROP TABLE perfbenchmark_child_table;
IF OBJECT_ID('perfbenchmark_parent_table', 'U') IS NOT NULL DROP TABLE perfbenchmark_parent_table;
IF OBJECT_ID('perfbenchmark_table', 'U') IS NOT NULL DROP TABLE perfbenchmark_table;
IF OBJECT_ID('perfbenchmark_stored_procedure', 'P') IS NOT NULL DROP PROCEDURE perfbenchmark_stored_procedure;
- """)
+ """
+ )
conn.commit()
print("Database cleanup completed.")
finally:
cursor.close()
conn.close()
+
# Register cleanup function to run at exit
atexit.register(cleanup_database)
+
# Define benchmark functions for pyodbc
def bench_select_pyodbc():
print("Running SELECT benchmark with pyodbc...")
@@ -106,6 +126,7 @@ def bench_select_pyodbc():
conn.close()
print("SELECT benchmark with pyodbc completed.")
+
def bench_insert_pyodbc():
print("Running INSERT benchmark with pyodbc...")
try:
@@ -119,6 +140,7 @@ def bench_insert_pyodbc():
except Exception as e:
print(f"Error during INSERT benchmark: {e}")
+
def bench_update_pyodbc():
print("Running UPDATE benchmark with pyodbc...")
try:
@@ -132,6 +154,7 @@ def bench_update_pyodbc():
except Exception as e:
print(f"Error during UPDATE benchmark: {e}")
+
def bench_delete_pyodbc():
print("Running DELETE benchmark with pyodbc...")
try:
@@ -145,16 +168,19 @@ def bench_delete_pyodbc():
except Exception as e:
print(f"Error during DELETE benchmark: {e}")
+
def bench_complex_query_pyodbc():
print("Running COMPLEX QUERY benchmark with pyodbc...")
try:
conn = pyodbc.connect(CONNECTION_STRING)
cursor = conn.cursor()
- cursor.execute("""SELECT name, COUNT(*)
+ cursor.execute(
+ """SELECT name, COUNT(*)
FROM perfbenchmark_table
GROUP BY name
HAVING COUNT(*) > 1
- """)
+ """
+ )
cursor.fetchall()
cursor.close()
conn.close()
@@ -162,12 +188,13 @@ def bench_complex_query_pyodbc():
except Exception as e:
print(f"Error during COMPLEX QUERY benchmark: {e}")
+
def bench_100_inserts_pyodbc():
print("Running 100 INSERTS benchmark with pyodbc...")
try:
conn = pyodbc.connect(CONNECTION_STRING)
cursor = conn.cursor()
- data = [(i, 'John Doe', 30) for i in range(100)]
+ data = [(i, "John Doe", 30) for i in range(100)]
cursor.executemany("INSERT INTO perfbenchmark_table (id, name, age) VALUES (?, ?, ?)", data)
conn.commit()
cursor.close()
@@ -176,6 +203,7 @@ def bench_100_inserts_pyodbc():
except Exception as e:
print(f"Error during 100 INSERTS benchmark: {e}")
+
def bench_fetchone_pyodbc():
print("Running FETCHONE benchmark with pyodbc...")
try:
@@ -189,6 +217,7 @@ def bench_fetchone_pyodbc():
except Exception as e:
print(f"Error during FETCHONE benchmark: {e}")
+
def bench_fetchmany_pyodbc():
print("Running FETCHMANY benchmark with pyodbc...")
try:
@@ -202,13 +231,14 @@ def bench_fetchmany_pyodbc():
except Exception as e:
print(f"Error during FETCHMANY benchmark: {e}")
+
def bench_executemany_pyodbc():
print("Running EXECUTEMANY benchmark with pyodbc...")
try:
conn = pyodbc.connect(CONNECTION_STRING)
cursor = conn.cursor()
cursor.fast_executemany = True
- data = [(i, 'John Doe', 30) for i in range(100)]
+ data = [(i, "John Doe", 30) for i in range(100)]
cursor.executemany("INSERT INTO perfbenchmark_table (id, name, age) VALUES (?, ?, ?)", data)
conn.commit()
cursor.close()
@@ -217,6 +247,7 @@ def bench_executemany_pyodbc():
except Exception as e:
print(f"Error during EXECUTEMANY benchmark: {e}")
+
def bench_stored_procedure_pyodbc():
print("Running STORED PROCEDURE benchmark with pyodbc...")
try:
@@ -230,16 +261,19 @@ def bench_stored_procedure_pyodbc():
except Exception as e:
print(f"Error during STORED PROCEDURE benchmark: {e}")
+
def bench_nested_query_pyodbc():
print("Running NESTED QUERY benchmark with pyodbc...")
try:
conn = pyodbc.connect(CONNECTION_STRING)
cursor = conn.cursor()
- cursor.execute("""SELECT * FROM (
+ cursor.execute(
+ """SELECT * FROM (
SELECT name, age FROM perfbenchmark_table
) AS subquery
WHERE age > 25
- """)
+ """
+ )
cursor.fetchall()
cursor.close()
conn.close()
@@ -247,15 +281,18 @@ def bench_nested_query_pyodbc():
except Exception as e:
print(f"Error during NESTED QUERY benchmark: {e}")
+
def bench_join_query_pyodbc():
print("Running JOIN QUERY benchmark with pyodbc...")
try:
conn = pyodbc.connect(CONNECTION_STRING)
cursor = conn.cursor()
- cursor.execute("""SELECT a.name, b.age
+ cursor.execute(
+ """SELECT a.name, b.age
FROM perfbenchmark_table a
JOIN perfbenchmark_table b ON a.id = b.id
- """)
+ """
+ )
cursor.fetchall()
cursor.close()
conn.close()
@@ -263,6 +300,7 @@ def bench_join_query_pyodbc():
except Exception as e:
print(f"Error during JOIN QUERY benchmark: {e}")
+
def bench_transaction_pyodbc():
print("Running TRANSACTION benchmark with pyodbc...")
try:
@@ -270,7 +308,9 @@ def bench_transaction_pyodbc():
cursor = conn.cursor()
try:
cursor.execute("BEGIN TRANSACTION")
- cursor.execute("INSERT INTO perfbenchmark_table (id, name, age) VALUES (1, 'John Doe', 30)")
+ cursor.execute(
+ "INSERT INTO perfbenchmark_table (id, name, age) VALUES (1, 'John Doe', 30)"
+ )
cursor.execute("UPDATE perfbenchmark_table SET age = 31 WHERE id = 1")
cursor.execute("DELETE FROM perfbenchmark_table WHERE id = 1")
cursor.execute("COMMIT")
@@ -282,6 +322,7 @@ def bench_transaction_pyodbc():
except Exception as e:
print(f"Error during TRANSACTION benchmark: {e}")
+
def bench_large_data_set_pyodbc():
print("Running LARGE DATA SET benchmark with pyodbc...")
try:
@@ -296,17 +337,20 @@ def bench_large_data_set_pyodbc():
except Exception as e:
print(f"Error during LARGE DATA SET benchmark: {e}")
+
def bench_update_with_join_pyodbc():
print("Running UPDATE WITH JOIN benchmark with pyodbc...")
try:
conn = pyodbc.connect(CONNECTION_STRING)
cursor = conn.cursor()
- cursor.execute("""UPDATE perfbenchmark_child_table
+ cursor.execute(
+ """UPDATE perfbenchmark_child_table
SET description = 'Updated Child 1'
FROM perfbenchmark_child_table c
JOIN perfbenchmark_parent_table p ON c.parent_id = p.id
WHERE p.name = 'Parent 1'
- """)
+ """
+ )
conn.commit()
cursor.close()
conn.close()
@@ -314,16 +358,19 @@ def bench_update_with_join_pyodbc():
except Exception as e:
print(f"Error during UPDATE WITH JOIN benchmark: {e}")
+
def bench_delete_with_join_pyodbc():
print("Running DELETE WITH JOIN benchmark with pyodbc...")
try:
conn = pyodbc.connect(CONNECTION_STRING)
cursor = conn.cursor()
- cursor.execute("""DELETE c
+ cursor.execute(
+ """DELETE c
FROM perfbenchmark_child_table c
JOIN perfbenchmark_parent_table p ON c.parent_id = p.id
WHERE p.name = 'Parent 1'
- """)
+ """
+ )
conn.commit()
cursor.close()
conn.close()
@@ -331,6 +378,7 @@ def bench_delete_with_join_pyodbc():
except Exception as e:
print(f"Error during DELETE WITH JOIN benchmark: {e}")
+
def bench_multiple_connections_pyodbc():
print("Running MULTIPLE CONNECTIONS benchmark with pyodbc...")
try:
@@ -338,19 +386,20 @@ def bench_multiple_connections_pyodbc():
for _ in range(10):
conn = pyodbc.connect(CONNECTION_STRING)
connections.append(conn)
-
+
for conn in connections:
cursor = conn.cursor()
cursor.execute("SELECT * FROM perfbenchmark_table")
cursor.fetchall()
cursor.close()
-
+
for conn in connections:
conn.close()
print("MULTIPLE CONNECTIONS benchmark with pyodbc completed.")
except Exception as e:
print(f"Error during MULTIPLE CONNECTIONS benchmark: {e}")
+
def bench_1000_connections_pyodbc():
print("Running 1000 CONNECTIONS benchmark with pyodbc...")
try:
@@ -365,6 +414,7 @@ def bench_1000_connections_pyodbc():
except Exception as e:
print(f"Error during 1000 CONNECTIONS benchmark: {e}")
+
# Define benchmark functions for mssql_python
def bench_select_mssql_python():
print("Running SELECT benchmark with mssql_python...")
@@ -385,6 +435,7 @@ def bench_select_mssql_python():
except Exception as e:
print(f"Error during SELECT benchmark with mssql_python: {e}")
+
def bench_insert_mssql_python():
print("Running INSERT benchmark with mssql_python...")
try:
@@ -398,6 +449,7 @@ def bench_insert_mssql_python():
except Exception as e:
print(f"Error during INSERT benchmark with mssql_python: {e}")
+
def bench_update_mssql_python():
print("Running UPDATE benchmark with mssql_python...")
try:
@@ -411,6 +463,7 @@ def bench_update_mssql_python():
except Exception as e:
print(f"Error during UPDATE benchmark with mssql_python: {e}")
+
def bench_delete_mssql_python():
print("Running DELETE benchmark with mssql_python...")
try:
@@ -424,16 +477,19 @@ def bench_delete_mssql_python():
except Exception as e:
print(f"Error during DELETE benchmark with mssql_python: {e}")
+
def bench_complex_query_mssql_python():
print("Running COMPLEX QUERY benchmark with mssql_python...")
try:
conn = mssql_python.connect(CONNECTION_STRING)
cursor = conn.cursor()
- cursor.execute("""SELECT name, COUNT(*)
+ cursor.execute(
+ """SELECT name, COUNT(*)
FROM perfbenchmark_table
GROUP BY name
HAVING COUNT(*) > 1
- """)
+ """
+ )
cursor.fetchall()
cursor.close()
conn.close()
@@ -441,13 +497,16 @@ def bench_complex_query_mssql_python():
except Exception as e:
print(f"Error during COMPLEX QUERY benchmark with mssql_python: {e}")
+
def bench_100_inserts_mssql_python():
print("Running 100 INSERTS benchmark with mssql_python...")
try:
conn = mssql_python.connect(CONNECTION_STRING)
cursor = conn.cursor()
- data = [(i, 'John Doe', 30) for i in range(100)]
- cursor.executemany("INSERT INTO perfbenchmark_table (id, name, age) VALUES (?, 'John Doe', 30)", data)
+ data = [(i, "John Doe", 30) for i in range(100)]
+ cursor.executemany(
+ "INSERT INTO perfbenchmark_table (id, name, age) VALUES (?, 'John Doe', 30)", data
+ )
conn.commit()
cursor.close()
conn.close()
@@ -455,6 +514,7 @@ def bench_100_inserts_mssql_python():
except Exception as e:
print(f"Error during 100 INSERTS benchmark with mssql_python: {e}")
+
def bench_fetchone_mssql_python():
print("Running FETCHONE benchmark with mssql_python...")
try:
@@ -468,6 +528,7 @@ def bench_fetchone_mssql_python():
except Exception as e:
print(f"Error during FETCHONE benchmark with mssql_python: {e}")
+
def bench_fetchmany_mssql_python():
print("Running FETCHMANY benchmark with mssql_python...")
try:
@@ -481,12 +542,13 @@ def bench_fetchmany_mssql_python():
except Exception as e:
print(f"Error during FETCHMANY benchmark with mssql_python: {e}")
+
def bench_executemany_mssql_python():
print("Running EXECUTEMANY benchmark with mssql_python...")
try:
conn = mssql_python.connect(CONNECTION_STRING)
cursor = conn.cursor()
- data = [(i, 'John Doe', 30) for i in range(100)]
+ data = [(i, "John Doe", 30) for i in range(100)]
cursor.executemany("INSERT INTO perfbenchmark_table (id, name, age) VALUES (?, ?, ?)", data)
conn.commit()
cursor.close()
@@ -495,6 +557,7 @@ def bench_executemany_mssql_python():
except Exception as e:
print(f"Error during EXECUTEMANY benchmark with mssql_python: {e}")
+
def bench_stored_procedure_mssql_python():
print("Running STORED PROCEDURE benchmark with mssql_python...")
try:
@@ -508,16 +571,19 @@ def bench_stored_procedure_mssql_python():
except Exception as e:
print(f"Error during STORED PROCEDURE benchmark with mssql_python: {e}")
+
def bench_nested_query_mssql_python():
print("Running NESTED QUERY benchmark with mssql_python...")
try:
conn = mssql_python.connect(CONNECTION_STRING)
cursor = conn.cursor()
- cursor.execute("""SELECT * FROM (
+ cursor.execute(
+ """SELECT * FROM (
SELECT name, age FROM perfbenchmark_table
) AS subquery
WHERE age > 25
- """)
+ """
+ )
cursor.fetchall()
cursor.close()
conn.close()
@@ -525,15 +591,18 @@ def bench_nested_query_mssql_python():
except Exception as e:
print(f"Error during NESTED QUERY benchmark with mssql_python: {e}")
+
def bench_join_query_mssql_python():
print("Running JOIN QUERY benchmark with mssql_python...")
try:
conn = mssql_python.connect(CONNECTION_STRING)
cursor = conn.cursor()
- cursor.execute("""SELECT a.name, b.age
+ cursor.execute(
+ """SELECT a.name, b.age
FROM perfbenchmark_table a
JOIN perfbenchmark_table b ON a.id = b.id
- """)
+ """
+ )
cursor.fetchall()
cursor.close()
conn.close()
@@ -541,6 +610,7 @@ def bench_join_query_mssql_python():
except Exception as e:
print(f"Error during JOIN QUERY benchmark with mssql_python: {e}")
+
def bench_transaction_mssql_python():
print("Running TRANSACTION benchmark with mssql_python...")
try:
@@ -548,7 +618,9 @@ def bench_transaction_mssql_python():
cursor = conn.cursor()
try:
cursor.execute("BEGIN TRANSACTION")
- cursor.execute("INSERT INTO perfbenchmark_table (id, name, age) VALUES (1, 'John Doe', 30)")
+ cursor.execute(
+ "INSERT INTO perfbenchmark_table (id, name, age) VALUES (1, 'John Doe', 30)"
+ )
cursor.execute("UPDATE perfbenchmark_table SET age = 31 WHERE id = 1")
cursor.execute("DELETE FROM perfbenchmark_table WHERE id = 1")
cursor.execute("COMMIT")
@@ -560,6 +632,7 @@ def bench_transaction_mssql_python():
except Exception as e:
print(f"Error during TRANSACTION benchmark with mssql_python: {e}")
+
def bench_large_data_set_mssql_python():
print("Running LARGE DATA SET benchmark with mssql_python...")
try:
@@ -574,17 +647,20 @@ def bench_large_data_set_mssql_python():
except Exception as e:
print(f"Error during LARGE DATA SET benchmark with mssql_python: {e}")
+
def bench_update_with_join_mssql_python():
print("Running UPDATE WITH JOIN benchmark with mssql_python...")
try:
conn = mssql_python.connect(CONNECTION_STRING)
cursor = conn.cursor()
- cursor.execute("""UPDATE perfbenchmark_child_table
+ cursor.execute(
+ """UPDATE perfbenchmark_child_table
SET description = 'Updated Child 1'
FROM perfbenchmark_child_table c
JOIN perfbenchmark_parent_table p ON c.parent_id = p.id
WHERE p.name = 'Parent 1'
- """)
+ """
+ )
conn.commit()
cursor.close()
conn.close()
@@ -592,16 +668,19 @@ def bench_update_with_join_mssql_python():
except Exception as e:
print(f"Error during UPDATE WITH JOIN benchmark with mssql_python: {e}")
+
def bench_delete_with_join_mssql_python():
print("Running DELETE WITH JOIN benchmark with mssql_python...")
try:
conn = mssql_python.connect(CONNECTION_STRING)
cursor = conn.cursor()
- cursor.execute("""DELETE c
+ cursor.execute(
+ """DELETE c
FROM perfbenchmark_child_table c
JOIN perfbenchmark_parent_table p ON c.parent_id = p.id
WHERE p.name = 'Parent 1'
- """)
+ """
+ )
conn.commit()
cursor.close()
conn.close()
@@ -609,6 +688,7 @@ def bench_delete_with_join_mssql_python():
except Exception as e:
print(f"Error during DELETE WITH JOIN benchmark with mssql_python: {e}")
+
def bench_multiple_connections_mssql_python():
print("Running MULTIPLE CONNECTIONS benchmark with mssql_python...")
try:
@@ -616,25 +696,28 @@ def bench_multiple_connections_mssql_python():
for _ in range(10):
conn = mssql_python.connect(CONNECTION_STRING)
connections.append(conn)
-
+
for conn in connections:
cursor = conn.cursor()
cursor.execute("SELECT * FROM perfbenchmark_table")
cursor.fetchall()
cursor.close()
-
+
for conn in connections:
conn.close()
print("MULTIPLE CONNECTIONS benchmark with mssql_python completed.")
except Exception as e:
print(f"Error during MULTIPLE CONNECTIONS benchmark with mssql_python: {e}")
+
def bench_1000_connections_mssql_python():
print("Running 1000 CONNECTIONS benchmark with mssql_python...")
try:
threads = []
for _ in range(1000):
- thread = threading.Thread(target=lambda: mssql_python.connect(CONNECTION_STRING).close())
+ thread = threading.Thread(
+ target=lambda: mssql_python.connect(CONNECTION_STRING).close()
+ )
threads.append(thread)
thread.start()
for thread in threads:
@@ -643,6 +726,7 @@ def bench_1000_connections_mssql_python():
except Exception as e:
print(f"Error during 1000 CONNECTIONS benchmark with mssql_python: {e}")
+
# Define benchmarks
__benchmarks__ = [
(bench_select_pyodbc, bench_select_mssql_python, "SELECT operation"),
@@ -650,17 +734,37 @@ def bench_1000_connections_mssql_python():
(bench_update_pyodbc, bench_update_mssql_python, "UPDATE operation"),
(bench_delete_pyodbc, bench_delete_mssql_python, "DELETE operation"),
(bench_complex_query_pyodbc, bench_complex_query_mssql_python, "Complex query operation"),
- (bench_multiple_connections_pyodbc, bench_multiple_connections_mssql_python, "Multiple connections operation"),
+ (
+ bench_multiple_connections_pyodbc,
+ bench_multiple_connections_mssql_python,
+ "Multiple connections operation",
+ ),
(bench_fetchone_pyodbc, bench_fetchone_mssql_python, "Fetch one operation"),
(bench_fetchmany_pyodbc, bench_fetchmany_mssql_python, "Fetch many operation"),
- (bench_stored_procedure_pyodbc, bench_stored_procedure_mssql_python, "Stored procedure operation"),
- (bench_1000_connections_pyodbc, bench_1000_connections_mssql_python, "1000 connections operation"),
+ (
+ bench_stored_procedure_pyodbc,
+ bench_stored_procedure_mssql_python,
+ "Stored procedure operation",
+ ),
+ (
+ bench_1000_connections_pyodbc,
+ bench_1000_connections_mssql_python,
+ "1000 connections operation",
+ ),
(bench_nested_query_pyodbc, bench_nested_query_mssql_python, "Nested query operation"),
(bench_large_data_set_pyodbc, bench_large_data_set_mssql_python, "Large data set operation"),
(bench_join_query_pyodbc, bench_join_query_mssql_python, "Join query operation"),
(bench_executemany_pyodbc, bench_executemany_mssql_python, "Execute many operation"),
(bench_100_inserts_pyodbc, bench_100_inserts_mssql_python, "100 inserts operation"),
(bench_transaction_pyodbc, bench_transaction_mssql_python, "Transaction operation"),
- (bench_update_with_join_pyodbc, bench_update_with_join_mssql_python, "Update with join operation"),
- (bench_delete_with_join_pyodbc, bench_delete_with_join_mssql_python, "Delete with join operation"),
-]
\ No newline at end of file
+ (
+ bench_update_with_join_pyodbc,
+ bench_update_with_join_mssql_python,
+ "Update with join operation",
+ ),
+ (
+ bench_delete_with_join_pyodbc,
+ bench_delete_with_join_mssql_python,
+ "Delete with join operation",
+ ),
+]
diff --git a/benchmarks/perf-benchmarking.py b/benchmarks/perf-benchmarking.py
new file mode 100644
index 000000000..a00a3f6fe
--- /dev/null
+++ b/benchmarks/perf-benchmarking.py
@@ -0,0 +1,377 @@
+"""
+Performance Benchmarking Script for mssql-python vs pyodbc
+
+This script runs comprehensive performance tests comparing mssql-python with pyodbc
+across multiple query types and scenarios. Each test is run multiple times to calculate
+average execution times, minimum, maximum, and standard deviation.
+
+Usage:
+ python benchmarks/perf-benchmarking.py
+
+Requirements:
+ - pyodbc
+ - mssql_python
+ - Valid SQL Server connection
+"""
+
+import os
+import sys
+import time
+import statistics
+from typing import List, Tuple
+
+# Add parent directory to path to import local mssql_python
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+
+import pyodbc
+from mssql_python import connect
+
+# Configuration
+CONN_STR = os.getenv("DB_CONNECTION_STRING")
+
+if not CONN_STR:
+ print(
+ "Error: The environment variable DB_CONNECTION_STRING is not set. Please set it to a valid SQL Server connection string and try again."
+ )
+ sys.exit(1)
+
+# Ensure pyodbc connection string has ODBC driver specified
+if CONN_STR and "Driver=" not in CONN_STR:
+ CONN_STR_PYODBC = f"Driver={{ODBC Driver 18 for SQL Server}};{CONN_STR}"
+else:
+ CONN_STR_PYODBC = CONN_STR
+
+NUM_ITERATIONS = 10 # Number of times to run each test for averaging
+
+# SQL Queries
+COMPLEX_JOIN_AGGREGATION = """
+ SELECT
+ p.ProductID,
+ p.Name AS ProductName,
+ pc.Name AS Category,
+ psc.Name AS Subcategory,
+ COUNT(sod.SalesOrderDetailID) AS TotalOrders,
+ SUM(sod.OrderQty) AS TotalQuantity,
+ SUM(sod.LineTotal) AS TotalRevenue,
+ AVG(sod.UnitPrice) AS AvgPrice
+ FROM Sales.SalesOrderDetail sod
+ INNER JOIN Production.Product p ON sod.ProductID = p.ProductID
+ INNER JOIN Production.ProductSubcategory psc ON p.ProductSubcategoryID = psc.ProductSubcategoryID
+ INNER JOIN Production.ProductCategory pc ON psc.ProductCategoryID = pc.ProductCategoryID
+ GROUP BY p.ProductID, p.Name, pc.Name, psc.Name
+ HAVING SUM(sod.LineTotal) > 10000
+ ORDER BY TotalRevenue DESC;
+"""
+
+LARGE_DATASET = """
+ SELECT
+ soh.SalesOrderID,
+ soh.OrderDate,
+ soh.DueDate,
+ soh.ShipDate,
+ soh.Status,
+ soh.SubTotal,
+ soh.TaxAmt,
+ soh.Freight,
+ soh.TotalDue,
+ c.CustomerID,
+ p.FirstName,
+ p.LastName,
+ a.AddressLine1,
+ a.City,
+ sp.Name AS StateProvince,
+ cr.Name AS Country
+ FROM Sales.SalesOrderHeader soh
+ INNER JOIN Sales.Customer c ON soh.CustomerID = c.CustomerID
+ INNER JOIN Person.Person p ON c.PersonID = p.BusinessEntityID
+ INNER JOIN Person.BusinessEntityAddress bea ON p.BusinessEntityID = bea.BusinessEntityID
+ INNER JOIN Person.Address a ON bea.AddressID = a.AddressID
+ INNER JOIN Person.StateProvince sp ON a.StateProvinceID = sp.StateProvinceID
+ INNER JOIN Person.CountryRegion cr ON sp.CountryRegionCode = cr.CountryRegionCode
+ WHERE soh.OrderDate >= '2013-01-01';
+"""
+
+VERY_LARGE_DATASET = """
+SELECT
+ sod.SalesOrderID,
+ sod.SalesOrderDetailID,
+ sod.ProductID,
+ sod.OrderQty,
+ sod.UnitPrice,
+ sod.LineTotal,
+ p.Name AS ProductName,
+ p.ProductNumber,
+ p.Color,
+ p.ListPrice,
+ n1.number AS RowMultiplier1
+FROM Sales.SalesOrderDetail sod
+CROSS JOIN (SELECT TOP 10 ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS number
+ FROM Sales.SalesOrderDetail) n1
+INNER JOIN Production.Product p ON sod.ProductID = p.ProductID;
+"""
+
+SUBQUERY_WITH_CTE = """
+ WITH SalesSummary AS (
+ SELECT
+ soh.SalesPersonID,
+ YEAR(soh.OrderDate) AS OrderYear,
+ SUM(soh.TotalDue) AS YearlyTotal
+ FROM Sales.SalesOrderHeader soh
+ WHERE soh.SalesPersonID IS NOT NULL
+ GROUP BY soh.SalesPersonID, YEAR(soh.OrderDate)
+ ),
+ RankedSales AS (
+ SELECT
+ SalesPersonID,
+ OrderYear,
+ YearlyTotal,
+ RANK() OVER (PARTITION BY OrderYear ORDER BY YearlyTotal DESC) AS SalesRank
+ FROM SalesSummary
+ )
+ SELECT
+ rs.SalesPersonID,
+ p.FirstName,
+ p.LastName,
+ rs.OrderYear,
+ rs.YearlyTotal,
+ rs.SalesRank
+ FROM RankedSales rs
+ INNER JOIN Person.Person p ON rs.SalesPersonID = p.BusinessEntityID
+ WHERE rs.SalesRank <= 10
+ ORDER BY rs.OrderYear DESC, rs.SalesRank;
+"""
+
+
+class BenchmarkResult:
+ """Class to store and calculate benchmark statistics"""
+
+ def __init__(self, name: str):
+ self.name = name
+ self.times: List[float] = []
+ self.row_count: int = 0
+
+ def add_time(self, elapsed: float, rows: int = 0):
+ """Add a timing result"""
+ self.times.append(elapsed)
+ if rows > 0:
+ self.row_count = rows
+
+ @property
+ def avg_time(self) -> float:
+ """Calculate average time"""
+ return statistics.mean(self.times) if self.times else 0.0
+
+ @property
+ def min_time(self) -> float:
+ """Get minimum time"""
+ return min(self.times) if self.times else 0.0
+
+ @property
+ def max_time(self) -> float:
+ """Get maximum time"""
+ return max(self.times) if self.times else 0.0
+
+ @property
+ def std_dev(self) -> float:
+ """Calculate standard deviation"""
+ return statistics.stdev(self.times) if len(self.times) > 1 else 0.0
+
+ def __str__(self) -> str:
+ """Format results as string"""
+ return (
+ f"{self.name}:\n"
+ f" Avg: {self.avg_time:.4f}s | Min: {self.min_time:.4f}s | "
+ f"Max: {self.max_time:.4f}s | StdDev: {self.std_dev:.4f}s | "
+ f"Rows: {self.row_count}"
+ )
+
+
+def run_benchmark_pyodbc(query: str, name: str, iterations: int) -> BenchmarkResult:
+ """Run a benchmark using pyodbc"""
+ result = BenchmarkResult(f"{name} (pyodbc)")
+
+ for i in range(iterations):
+ try:
+ start_time = time.time()
+ conn = pyodbc.connect(CONN_STR_PYODBC)
+ cursor = conn.cursor()
+ cursor.execute(query)
+ rows = cursor.fetchall()
+ elapsed = time.time() - start_time
+
+ result.add_time(elapsed, len(rows))
+
+ cursor.close()
+ conn.close()
+ except Exception as e:
+ print(f" Error in iteration {i+1}: {e}")
+ continue
+
+ return result
+
+
+def run_benchmark_mssql_python(query: str, name: str, iterations: int) -> BenchmarkResult:
+ """Run a benchmark using mssql-python"""
+ result = BenchmarkResult(f"{name} (mssql-python)")
+
+ for i in range(iterations):
+ try:
+ start_time = time.time()
+ conn = connect(CONN_STR)
+ cursor = conn.cursor()
+ cursor.execute(query)
+ rows = cursor.fetchall()
+ elapsed = time.time() - start_time
+
+ result.add_time(elapsed, len(rows))
+
+ cursor.close()
+ conn.close()
+ except Exception as e:
+ print(f" Error in iteration {i+1}: {e}")
+ continue
+
+ return result
+
+
+def calculate_speedup(
+ pyodbc_result: BenchmarkResult, mssql_python_result: BenchmarkResult
+) -> float:
+ """Calculate speedup factor"""
+ if mssql_python_result.avg_time == 0:
+ return 0.0
+ return pyodbc_result.avg_time / mssql_python_result.avg_time
+
+
+def print_comparison(pyodbc_result: BenchmarkResult, mssql_python_result: BenchmarkResult):
+ """Print detailed comparison of results"""
+ speedup = calculate_speedup(pyodbc_result, mssql_python_result)
+
+ print(f"\n{'='*80}")
+ print(f"BENCHMARK: {pyodbc_result.name.split(' (')[0]}")
+ print(f"{'='*80}")
+ print(f"\npyodbc:")
+ print(f" Avg: {pyodbc_result.avg_time:.4f}s")
+ print(f" Min: {pyodbc_result.min_time:.4f}s")
+ print(f" Max: {pyodbc_result.max_time:.4f}s")
+ print(f" StdDev: {pyodbc_result.std_dev:.4f}s")
+ print(f" Rows: {pyodbc_result.row_count}")
+
+ print(f"\nmssql-python:")
+ print(f" Avg: {mssql_python_result.avg_time:.4f}s")
+ print(f" Min: {mssql_python_result.min_time:.4f}s")
+ print(f" Max: {mssql_python_result.max_time:.4f}s")
+ print(f" StdDev: {mssql_python_result.std_dev:.4f}s")
+ print(f" Rows: {mssql_python_result.row_count}")
+
+ print(f"\nPerformance:")
+ if speedup > 1:
+ print(f" mssql-python is {speedup:.2f}x FASTER than pyodbc")
+ elif speedup < 1 and speedup > 0:
+ print(f" mssql-python is {1/speedup:.2f}x SLOWER than pyodbc")
+ else:
+ print(f" Unable to calculate speedup")
+
+ print(f" Time difference: {(pyodbc_result.avg_time - mssql_python_result.avg_time):.4f}s")
+
+
+def main():
+ """Main benchmark runner"""
+ print("=" * 80)
+ print("PERFORMANCE BENCHMARKING: mssql-python vs pyodbc")
+ print("=" * 80)
+ print(f"\nConfiguration:")
+ print(f" Iterations per test: {NUM_ITERATIONS}")
+ print(f" Database: AdventureWorks2022")
+ print(f"\n")
+
+ # Define benchmarks
+ benchmarks = [
+ (COMPLEX_JOIN_AGGREGATION, "Complex Join Aggregation"),
+ (LARGE_DATASET, "Large Dataset Retrieval"),
+ (VERY_LARGE_DATASET, "Very Large Dataset (1.2M rows)"),
+ (SUBQUERY_WITH_CTE, "Subquery with CTE"),
+ ]
+
+ # Store all results for summary
+ all_results: List[Tuple[BenchmarkResult, BenchmarkResult]] = []
+
+ # Run each benchmark
+ for query, name in benchmarks:
+ print(f"\nRunning: {name}")
+ print(f" Testing with pyodbc... ", end="", flush=True)
+ pyodbc_result = run_benchmark_pyodbc(query, name, NUM_ITERATIONS)
+ print(f"OK (avg: {pyodbc_result.avg_time:.4f}s)")
+
+ print(f" Testing with mssql-python... ", end="", flush=True)
+ mssql_python_result = run_benchmark_mssql_python(query, name, NUM_ITERATIONS)
+ print(f"OK (avg: {mssql_python_result.avg_time:.4f}s)")
+
+ all_results.append((pyodbc_result, mssql_python_result))
+
+ # Print detailed comparisons
+ print("\n\n" + "=" * 80)
+ print("DETAILED RESULTS")
+ print("=" * 80)
+
+ for pyodbc_result, mssql_python_result in all_results:
+ print_comparison(pyodbc_result, mssql_python_result)
+
+ # Print summary table
+ print("\n\n" + "=" * 80)
+ print("SUMMARY TABLE")
+ print("=" * 80)
+ print(f"\n{'Benchmark':<35} {'pyodbc (s)':<15} {'mssql-python (s)':<20} {'Speedup'}")
+ print("-" * 80)
+
+ total_pyodbc = 0.0
+ total_mssql_python = 0.0
+
+ for pyodbc_result, mssql_python_result in all_results:
+ name = pyodbc_result.name.split(" (")[0]
+ speedup = calculate_speedup(pyodbc_result, mssql_python_result)
+
+ total_pyodbc += pyodbc_result.avg_time
+ total_mssql_python += mssql_python_result.avg_time
+
+ print(
+ f"{name:<35} {pyodbc_result.avg_time:<15.4f} {mssql_python_result.avg_time:<20.4f} {speedup:.2f}x"
+ )
+
+ print("-" * 80)
+ print(
+ f"{'TOTAL':<35} {total_pyodbc:<15.4f} {total_mssql_python:<20.4f} "
+ f"{total_pyodbc/total_mssql_python if total_mssql_python > 0 else 0:.2f}x"
+ )
+
+ # Overall conclusion
+ overall_speedup = total_pyodbc / total_mssql_python if total_mssql_python > 0 else 0
+ print(f"\n{'='*80}")
+ print("OVERALL CONCLUSION")
+ print("=" * 80)
+ if overall_speedup > 1:
+ print(f"\nmssql-python is {overall_speedup:.2f}x FASTER than pyodbc on average")
+ print(
+ f"Total time saved: {total_pyodbc - total_mssql_python:.4f}s ({((total_pyodbc - total_mssql_python)/total_pyodbc*100):.1f}%)"
+ )
+ elif overall_speedup < 1 and overall_speedup > 0:
+ print(f"\nmssql-python is {1/overall_speedup:.2f}x SLOWER than pyodbc on average")
+ print(
+ f"Total time difference: {total_mssql_python - total_pyodbc:.4f}s ({((total_mssql_python - total_pyodbc)/total_mssql_python*100):.1f}%)"
+ )
+
+ print(f"\n{'='*80}\n")
+
+
+if __name__ == "__main__":
+ try:
+ main()
+ except KeyboardInterrupt:
+ print("\n\nBenchmark interrupted by user.")
+ sys.exit(1)
+ except Exception as e:
+ print(f"\n\nFatal error: {e}")
+ import traceback
+
+ traceback.print_exc()
+ sys.exit(1)
diff --git a/eng/pipelines/build-whl-pipeline.yml b/eng/pipelines/build-whl-pipeline.yml
index 365f26efe..a6540c8aa 100644
--- a/eng/pipelines/build-whl-pipeline.yml
+++ b/eng/pipelines/build-whl-pipeline.yml
@@ -7,6 +7,11 @@ trigger:
include:
- main
+pr:
+ branches:
+ include:
+ - main
+
# Schedule the pipeline to run on main branch daily at 07:00 AM IST
schedules:
- cron: "30 1 * * *"
@@ -14,6 +19,7 @@ schedules:
branches:
include:
- main
+ always: true # Always run even if there are no changes
jobs:
- job: BuildWindowsWheels
@@ -252,6 +258,9 @@ jobs:
# Install CMake on macOS
- script: |
brew update
+ # Uninstall existing CMake to avoid tap conflicts
+ brew uninstall cmake --ignore-dependencies || echo "CMake not installed or already removed"
+ # Install CMake from homebrew/core
brew install cmake
displayName: 'Install CMake'
@@ -285,8 +294,13 @@ jobs:
brew update
brew install docker colima
- # Start Colima with extra resources
- colima start --cpu 4 --memory 8 --disk 50
+ # Try VZ first, fallback to QEMU if it fails
+ # Use more conservative resource allocation for Azure DevOps runners
+ colima start --cpu 3 --memory 10 --disk 30 --vm-type=vz || \
+ colima start --cpu 3 --memory 10 --disk 30 --vm-type=qemu
+
+ # Set a timeout to ensure Colima starts properly
+ sleep 30
# Optional: set Docker context (usually automatic)
docker context use colima >/dev/null || true
@@ -295,6 +309,7 @@ jobs:
docker version
docker ps
displayName: 'Install and start Colima-based Docker'
+ timeoutInMinutes: 15
- script: |
# Pull and run SQL Server container
@@ -325,7 +340,7 @@ jobs:
python -m pytest -v
displayName: 'Run Pytest to validate bindings'
env:
- DB_CONNECTION_STRING: 'Driver=ODBC Driver 18 for SQL Server;Server=localhost;Database=master;Uid=SA;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes'
+ DB_CONNECTION_STRING: 'Server=tcp:127.0.0.1,1433;Database=master;Uid=SA;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes'
# Build wheel package for universal2
- script: |
@@ -361,746 +376,492 @@ jobs:
displayName: 'Publish all wheels as artifacts'
- job: BuildLinuxWheels
- pool:
- vmImage: 'ubuntu-latest'
displayName: 'Build Linux -'
+ pool: { vmImage: 'ubuntu-latest' }
+ timeoutInMinutes: 120
strategy:
matrix:
- # Python 3.10 (x86_64 and ARM64)
- py310_x86_64_ubuntu:
- pythonVersion: '3.10'
- shortPyVer: '310'
- targetArch: 'x86_64'
- dockerPlatform: 'linux/amd64'
- dockerImage: 'ubuntu:22.04'
- distroName: 'Ubuntu'
- packageManager: 'apt'
- py310_arm64_ubuntu:
- pythonVersion: '3.10'
- shortPyVer: '310'
- targetArch: 'arm64'
- dockerPlatform: 'linux/arm64'
- dockerImage: 'ubuntu:22.04'
- distroName: 'Ubuntu'
- packageManager: 'apt'
- py310_x86_64_debian:
- pythonVersion: '3.10'
- shortPyVer: '310'
- targetArch: 'x86_64'
- dockerPlatform: 'linux/amd64'
- dockerImage: 'debian:12'
- distroName: 'Debian'
- packageManager: 'apt'
- py310_arm64_debian:
- pythonVersion: '3.10'
- shortPyVer: '310'
- targetArch: 'arm64'
- dockerPlatform: 'linux/arm64'
- dockerImage: 'debian:12'
- distroName: 'Debian'
- packageManager: 'apt'
- py310_x86_64_rhel:
- pythonVersion: '3.10'
- shortPyVer: '310'
- targetArch: 'x86_64'
- dockerPlatform: 'linux/amd64'
- dockerImage: 'registry.access.redhat.com/ubi9/ubi:latest'
- distroName: 'RHEL'
- packageManager: 'dnf'
- buildFromSource: 'true'
- py310_arm64_rhel:
- pythonVersion: '3.10'
- shortPyVer: '310'
- targetArch: 'arm64'
- dockerPlatform: 'linux/arm64'
- dockerImage: 'registry.access.redhat.com/ubi9/ubi:latest'
- distroName: 'RHEL'
- packageManager: 'dnf'
- buildFromSource: 'true'
-
- # Python 3.11 (x86_64 and ARM64)
- py311_x86_64_ubuntu:
- pythonVersion: '3.11'
- shortPyVer: '311'
- targetArch: 'x86_64'
- dockerPlatform: 'linux/amd64'
- dockerImage: 'ubuntu:22.04'
- distroName: 'Ubuntu'
- packageManager: 'apt'
- py311_arm64_ubuntu:
- pythonVersion: '3.11'
- shortPyVer: '311'
- targetArch: 'arm64'
- dockerPlatform: 'linux/arm64'
- dockerImage: 'ubuntu:22.04'
- distroName: 'Ubuntu'
- packageManager: 'apt'
- py311_x86_64_debian:
- pythonVersion: '3.11'
- shortPyVer: '311'
- targetArch: 'x86_64'
- dockerPlatform: 'linux/amd64'
- dockerImage: 'debian:12'
- distroName: 'Debian'
- packageManager: 'apt'
- py311_arm64_debian:
- pythonVersion: '3.11'
- shortPyVer: '311'
- targetArch: 'arm64'
- dockerPlatform: 'linux/arm64'
- dockerImage: 'debian:12'
- distroName: 'Debian'
- packageManager: 'apt'
- py311_x86_64_rhel:
- pythonVersion: '3.11'
- shortPyVer: '311'
- targetArch: 'x86_64'
- dockerPlatform: 'linux/amd64'
- dockerImage: 'registry.access.redhat.com/ubi9/ubi:latest'
- distroName: 'RHEL'
- packageManager: 'dnf'
- py311_arm64_rhel:
- pythonVersion: '3.11'
- shortPyVer: '311'
- targetArch: 'arm64'
- dockerPlatform: 'linux/arm64'
- dockerImage: 'registry.access.redhat.com/ubi9/ubi:latest'
- distroName: 'RHEL'
- packageManager: 'dnf'
-
- # Python 3.12 (x86_64 and ARM64) - Note: Not available for Ubuntu 22.04 via deadsnakes PPA
- # Only build for Debian and RHEL where Python 3.12 is available
- py312_x86_64_debian:
- pythonVersion: '3.12'
- shortPyVer: '312'
- targetArch: 'x86_64'
- dockerPlatform: 'linux/amd64'
- dockerImage: 'debian:12'
- distroName: 'Debian'
- packageManager: 'apt'
- py312_arm64_debian:
- pythonVersion: '3.12'
- shortPyVer: '312'
- targetArch: 'arm64'
- dockerPlatform: 'linux/arm64'
- dockerImage: 'debian:12'
- distroName: 'Debian'
- packageManager: 'apt'
- py312_x86_64_rhel:
- pythonVersion: '3.12'
- shortPyVer: '312'
- targetArch: 'x86_64'
- dockerPlatform: 'linux/amd64'
- dockerImage: 'registry.access.redhat.com/ubi9/ubi:latest'
- distroName: 'RHEL'
- packageManager: 'dnf'
- py312_arm64_rhel:
- pythonVersion: '3.12'
- shortPyVer: '312'
- targetArch: 'arm64'
- dockerPlatform: 'linux/arm64'
- dockerImage: 'registry.access.redhat.com/ubi9/ubi:latest'
- distroName: 'RHEL'
- packageManager: 'dnf'
-
- # Python 3.13 (x86_64 and ARM64)
- py313_x86_64_ubuntu:
- pythonVersion: '3.13'
- shortPyVer: '313'
- targetArch: 'x86_64'
- dockerPlatform: 'linux/amd64'
- dockerImage: 'ubuntu:22.04'
- distroName: 'Ubuntu'
- packageManager: 'apt'
- py313_arm64_ubuntu:
- pythonVersion: '3.13'
- shortPyVer: '313'
- targetArch: 'arm64'
- dockerPlatform: 'linux/arm64'
- dockerImage: 'ubuntu:22.04'
- distroName: 'Ubuntu'
- packageManager: 'apt'
- py313_x86_64_debian:
- pythonVersion: '3.13'
- shortPyVer: '313'
- targetArch: 'x86_64'
- dockerPlatform: 'linux/amd64'
- dockerImage: 'debian:12'
- distroName: 'Debian'
- packageManager: 'apt'
- py313_arm64_debian:
- pythonVersion: '3.13'
- shortPyVer: '313'
- targetArch: 'arm64'
- dockerPlatform: 'linux/arm64'
- dockerImage: 'debian:12'
- distroName: 'Debian'
- packageManager: 'apt'
- py313_x86_64_rhel:
- pythonVersion: '3.13'
- shortPyVer: '313'
- targetArch: 'x86_64'
- dockerPlatform: 'linux/amd64'
- dockerImage: 'registry.access.redhat.com/ubi9/ubi:latest'
- distroName: 'RHEL'
- packageManager: 'dnf'
- buildFromSource: 'true'
- py313_arm64_rhel:
- pythonVersion: '3.13'
- shortPyVer: '313'
- targetArch: 'arm64'
- dockerPlatform: 'linux/arm64'
- dockerImage: 'registry.access.redhat.com/ubi9/ubi:latest'
- distroName: 'RHEL'
- packageManager: 'dnf'
- buildFromSource: 'true'
+ manylinux_x86_64:
+ LINUX_TAG: 'manylinux'
+ ARCH: 'x86_64'
+ DOCKER_PLATFORM: 'linux/amd64'
+ IMAGE: 'quay.io/pypa/manylinux_2_28_x86_64'
+ manylinux_aarch64:
+ LINUX_TAG: 'manylinux'
+ ARCH: 'aarch64'
+ DOCKER_PLATFORM: 'linux/arm64'
+ IMAGE: 'quay.io/pypa/manylinux_2_28_aarch64'
+ musllinux_x86_64:
+ LINUX_TAG: 'musllinux'
+ ARCH: 'x86_64'
+ DOCKER_PLATFORM: 'linux/amd64'
+ IMAGE: 'quay.io/pypa/musllinux_1_2_x86_64'
+ musllinux_aarch64:
+ LINUX_TAG: 'musllinux'
+ ARCH: 'aarch64'
+ DOCKER_PLATFORM: 'linux/arm64'
+ IMAGE: 'quay.io/pypa/musllinux_1_2_aarch64'
steps:
- # Set up Docker buildx for multi-architecture support
- - script: |
- docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
- docker buildx create --name multiarch --driver docker-container --use || true
- docker buildx inspect --bootstrap
- displayName: 'Setup Docker buildx for multi-architecture support'
-
- - script: |
- # Create a Docker container for building
- docker run -d --name build-container-$(distroName)-$(targetArch) \
- --platform $(dockerPlatform) \
- -v $(Build.SourcesDirectory):/workspace \
- -w /workspace \
- --network bridge \
- $(dockerImage) \
- tail -f /dev/null
- displayName: 'Create $(distroName) $(targetArch) container'
-
- - script: |
- # Start SQL Server container (always x86_64 since SQL Server doesn't support ARM64)
- docker run -d --name sqlserver-$(distroName)-$(targetArch) \
- --platform linux/amd64 \
- -e ACCEPT_EULA=Y \
- -e MSSQL_SA_PASSWORD="$(DB_PASSWORD)" \
- -p 1433:1433 \
- mcr.microsoft.com/mssql/server:2022-latest
-
- # Wait for SQL Server to be ready
- echo "Waiting for SQL Server to start..."
- for i in {1..60}; do
- if docker exec sqlserver-$(distroName)-$(targetArch) \
- /opt/mssql-tools18/bin/sqlcmd \
- -S localhost \
- -U SA \
- -P "$(DB_PASSWORD)" \
- -C -Q "SELECT 1" >/dev/null 2>&1; then
- echo "SQL Server is ready!"
- break
- fi
- echo "Waiting... ($i/60)"
- sleep 2
- done
-
- # Create test database
- docker exec sqlserver-$(distroName)-$(targetArch) \
- /opt/mssql-tools18/bin/sqlcmd \
- -S localhost \
- -U SA \
- -P "$(DB_PASSWORD)" \
- -C -Q "CREATE DATABASE TestDB"
- displayName: 'Start SQL Server container for $(distroName) $(targetArch)'
- env:
- DB_PASSWORD: $(DB_PASSWORD)
-
- - script: |
- # Install dependencies in the container
- if [ "$(packageManager)" = "apt" ]; then
- # Ubuntu/Debian
- docker exec build-container-$(distroName)-$(targetArch) bash -c "
- export DEBIAN_FRONTEND=noninteractive
- export TZ=UTC
- ln -snf /usr/share/zoneinfo/\$TZ /etc/localtime && echo \$TZ > /etc/timezone
-
- # Update package lists
- apt-get update
-
- # Install basic tools first
- apt-get install -y software-properties-common curl wget gnupg build-essential cmake
-
- # Add deadsnakes PPA for newer Python versions (Ubuntu only)
- if [ '$(distroName)' = 'Ubuntu' ]; then
- add-apt-repository -y ppa:deadsnakes/ppa
- apt-get update
- fi
-
- # Install Python and development packages
- # Handle different Python version availability per distribution
- if [ '$(distroName)' = 'Debian' ]; then
- # Debian 12 has Python 3.11 by default, some older/newer versions may not be available
- case '$(pythonVersion)' in
- '3.11')
- # Python 3.11 is the default in Debian 12
- apt-get install -y python$(pythonVersion) python$(pythonVersion)-dev python$(pythonVersion)-venv python$(pythonVersion)-distutils
- PYTHON_CMD=python$(pythonVersion)
- ;;
- '3.10'|'3.12'|'3.13')
- # These versions may not be available in Debian 12, use python3 and create symlinks
- echo 'Python $(pythonVersion) may not be available in Debian 12, using available python3'
- apt-get install -y python3 python3-dev python3-venv
- # Note: distutils is not available for Python 3.12+
- if [ '$(pythonVersion)' != '3.12' ] && [ '$(pythonVersion)' != '3.13' ]; then
- apt-get install -y python3-distutils || echo 'distutils not available for this Python version'
- fi
- # Create symlinks to make the desired version available
- # Find the actual python3 version and create proper symlinks
- ACTUAL_PYTHON=\$(python3 --version | grep -o '[0-9]\+\.[0-9]\+')
- echo 'Detected Python version:' \$ACTUAL_PYTHON
- ln -sf /usr/bin/python3 /usr/local/bin/python$(pythonVersion)
- ln -sf /usr/bin/python3 /usr/local/bin/python
- PYTHON_CMD=/usr/local/bin/python$(pythonVersion)
- ;;
- *)
- echo 'Unsupported Python version $(pythonVersion) for Debian, using python3'
- apt-get install -y python3 python3-dev python3-venv
- ln -sf /usr/bin/python3 /usr/local/bin/python$(pythonVersion)
- ln -sf /usr/bin/python3 /usr/local/bin/python
- PYTHON_CMD=/usr/local/bin/python$(pythonVersion)
- ;;
- esac
- else
- # Ubuntu has deadsnakes PPA, so more versions are available
- # Note: distutils is not available for newer Python versions (3.12+)
- if [ '$(pythonVersion)' = '3.12' ] || [ '$(pythonVersion)' = '3.13' ]; then
- apt-get install -y python$(pythonVersion) python$(pythonVersion)-dev python$(pythonVersion)-venv
+ - checkout: self
+ fetchDepth: 0
+
+ # Enable QEMU so we can run aarch64 containers on the x86_64 agent
+ - script: |
+ sudo docker run --rm --privileged tonistiigi/binfmt --install all
+ displayName: 'Enable QEMU (for aarch64)'
+
+ # Prep artifact dirs
+ - script: |
+ rm -rf $(Build.ArtifactStagingDirectory)/dist $(Build.ArtifactStagingDirectory)/ddbc-bindings
+ mkdir -p $(Build.ArtifactStagingDirectory)/dist
+ mkdir -p $(Build.ArtifactStagingDirectory)/ddbc-bindings/$(LINUX_TAG)-$(ARCH)
+ displayName: 'Prepare artifact directories'
+
+ # Start a long-lived container for this lane
+ - script: |
+ docker run -d --name build-$(LINUX_TAG)-$(ARCH) \
+ --platform $(DOCKER_PLATFORM) \
+ -v $(Build.SourcesDirectory):/workspace \
+ -w /workspace \
+ $(IMAGE) \
+ tail -f /dev/null
+ displayName: 'Start $(LINUX_TAG) $(ARCH) container'
+
+ # Install system build dependencies
+ # - Installs compiler toolchain, CMake, unixODBC headers, and Kerberos/keyutils runtimes
+ # - manylinux (glibc) uses dnf/yum; musllinux (Alpine/musl) uses apk
+ # - Kerberos/keyutils are needed because msodbcsql pulls in libgssapi_krb5.so.* and libkeyutils*.so.*
+ # - ccache is optional but speeds rebuilds inside the container
+ - script: |
+ set -euxo pipefail
+ if [[ "$(LINUX_TAG)" == "manylinux" ]]; then
+ # ===== manylinux (glibc) containers =====
+ docker exec build-$(LINUX_TAG)-$(ARCH) bash -lc '
+ set -euxo pipefail
+ # Prefer dnf (Alma/Rocky base), fall back to yum if present
+ if command -v dnf >/dev/null 2>&1; then
+ dnf -y update || true
+ # Toolchain + CMake + unixODBC headers + Kerberos + keyutils + ccache
+ dnf -y install gcc gcc-c++ make cmake unixODBC-devel krb5-libs keyutils-libs ccache || true
+ elif command -v yum >/dev/null 2>&1; then
+ yum -y update || true
+ yum -y install gcc gcc-c++ make cmake unixODBC-devel krb5-libs keyutils-libs ccache || true
else
- apt-get install -y python$(pythonVersion) python$(pythonVersion)-dev python$(pythonVersion)-venv python$(pythonVersion)-distutils
+ echo "No dnf/yum found in manylinux image" >&2
fi
- # For Ubuntu, create symlinks for consistency
- ln -sf /usr/bin/python$(pythonVersion) /usr/local/bin/python$(pythonVersion)
- ln -sf /usr/bin/python$(pythonVersion) /usr/local/bin/python
- PYTHON_CMD=/usr/local/bin/python$(pythonVersion)
- fi
-
- # Install pip for the specific Python version
- curl -sS https://bootstrap.pypa.io/get-pip.py | \$PYTHON_CMD
-
- # Install remaining packages
- apt-get install -y pybind11-dev || echo 'pybind11-dev not available, will install via pip'
-
- # Verify Python installation
- echo 'Python installation verification:'
- echo 'Using PYTHON_CMD:' \$PYTHON_CMD
- \$PYTHON_CMD --version
- if [ -f /usr/local/bin/python ]; then
- /usr/local/bin/python --version
- fi
- "
- else
- # RHEL/DNF
- docker exec build-container-$(distroName)-$(targetArch) bash -c "
- # Enable CodeReady Builder repository for additional packages (skip if not available)
- dnf install -y dnf-plugins-core || true
- dnf install -y epel-release || echo 'EPEL not available in UBI9, continuing without it'
- dnf config-manager --set-enabled crb || dnf config-manager --set-enabled powertools || echo 'No additional repos to enable'
-
- # Install dependencies
- dnf update -y
- dnf groupinstall -y 'Development Tools' || echo 'Development Tools group not available, installing individual packages'
-
- # Install development tools and cmake separately to ensure they work
- # Note: Handle curl conflicts by replacing curl-minimal with curl
- dnf install -y wget gnupg2 glibc-devel kernel-headers
- dnf install -y --allowerasing curl || dnf install -y curl || echo 'curl installation failed, continuing'
- dnf install -y gcc gcc-c++ make binutils
- dnf install -y cmake
-
- # Install additional dependencies needed for Python source compilation
- # Some packages may not be available in UBI9, so install what we can
- dnf install -y openssl-devel bzip2-devel libffi-devel zlib-devel || echo 'Some core devel packages failed'
- dnf install -y ncurses-devel sqlite-devel xz-devel || echo 'Some optional devel packages not available'
- # These are often missing in UBI9, install if available
- dnf install -y readline-devel tk-devel gdbm-devel libnsl2-devel libuuid-devel || echo 'Some Python build dependencies not available in UBI9'
-
- # If that doesn't work, try installing from different repositories
- if ! which gcc; then
- echo 'Trying alternative gcc installation...'
- dnf --enablerepo=ubi-9-codeready-builder install -y gcc gcc-c++
- fi
-
- # For RHEL, we need to handle Python versions more carefully
- # RHEL 9 UBI has python3.9 by default, but we don't support 3.9
- # We need to install specific versions or build from source
-
- # First, try to install the specific Python version
- PYTHON_INSTALLED=false
- echo 'Trying to install Python $(pythonVersion) from available repositories'
- # Try from default repos first
- if dnf install -y python$(pythonVersion) python$(pythonVersion)-devel python$(pythonVersion)-pip; then
- echo 'Successfully installed Python $(pythonVersion) from default repos'
- PYTHON_INSTALLED=true
- # Create symlinks for the specific version
- ln -sf /usr/bin/python$(pythonVersion) /usr/local/bin/python$(pythonVersion)
- ln -sf /usr/bin/python$(pythonVersion) /usr/local/bin/python
+
+ # Quick visibility for logs
+ echo "---- tool versions ----"
+ gcc --version || true
+ cmake --version || true
+ '
+ else
+ # ===== musllinux (Alpine/musl) containers =====
+ docker exec build-$(LINUX_TAG)-$(ARCH) sh -lc '
+ set -euxo pipefail
+ apk update || true
+ # Toolchain + CMake + unixODBC headers + Kerberos + keyutils + ccache
+ apk add --no-cache bash build-base cmake unixodbc-dev krb5-libs keyutils-libs ccache || true
+
+ # Quick visibility for logs
+ echo "---- tool versions ----"
+ gcc --version || true
+ cmake --version || true
+ '
+ fi
+ displayName: 'Install system build dependencies'
+
+ # Build wheels for cp310..cp313 using the prebuilt /opt/python interpreters
+ - script: |
+ set -euxo pipefail
+ if [[ "$(LINUX_TAG)" == "manylinux" ]]; then SHELL_EXE=bash; else SHELL_EXE=sh; fi
+
+ # Ensure dist exists inside the container
+ docker exec build-$(LINUX_TAG)-$(ARCH) $SHELL_EXE -lc 'mkdir -p /workspace/dist'
+
+ # Loop through CPython versions present in the image
+ for PYBIN in cp310 cp311 cp312 cp313; do
+ echo "=== Building for $PYBIN on $(LINUX_TAG)/$(ARCH) ==="
+ if [[ "$(LINUX_TAG)" == "manylinux" ]]; then
+ docker exec build-$(LINUX_TAG)-$(ARCH) bash -lc "
+ set -euxo pipefail;
+ PY=/opt/python/${PYBIN}-${PYBIN}/bin/python;
+ test -x \$PY || { echo 'Python \$PY missing'; exit 0; } # skip if not present
+ ln -sf \$PY /usr/local/bin/python;
+ python -m pip install -U pip setuptools wheel pybind11;
+ echo 'python:' \$(python -V); which python;
+ # 👉 run from the directory that has CMakeLists.txt
+ cd /workspace/mssql_python/pybind;
+ bash build.sh;
+
+ # back to repo root to build the wheel
+ cd /workspace;
+ python setup.py bdist_wheel;
+
+ # TODO: repair/tag wheel, removing this since auditwheel is trying to find/link libraries which we're not packaging, e.g. libk5crypto, libkeyutils etc. - since it uses ldd for cross-verification
+ # We're assuming that this will be provided by OS and not bundled in the wheel
+ # for W in /workspace/dist/*.whl; do auditwheel repair -w /workspace/dist \"\$W\" || true; done
+ "
else
- echo 'Python $(pythonVersion) not available in default RHEL repos'
- # For Python 3.11+ which might be available in newer RHEL versions
- if [ '$(pythonVersion)' = '3.11' ] || [ '$(pythonVersion)' = '3.12' ]; then
- echo 'Trying alternative installation for Python $(pythonVersion)'
- # Try installing from additional repos
- dnf install -y python$(pythonVersion) python$(pythonVersion)-devel python$(pythonVersion)-pip || true
- if command -v python$(pythonVersion) >/dev/null 2>&1; then
- echo 'Found Python $(pythonVersion) after alternative installation'
- PYTHON_INSTALLED=true
- ln -sf /usr/bin/python$(pythonVersion) /usr/local/bin/python$(pythonVersion)
- ln -sf /usr/bin/python$(pythonVersion) /usr/local/bin/python
- fi
- elif [ '$(pythonVersion)' = '3.10' ] || [ '$(pythonVersion)' = '3.13' ]; then
- echo 'Python $(pythonVersion) requires building from source'
-
- # Download Python source
- cd /tmp
- if [ '$(pythonVersion)' = '3.10' ]; then
- PYTHON_URL='https://www.python.org/ftp/python/3.10.15/Python-3.10.15.tgz'
- elif [ '$(pythonVersion)' = '3.13' ]; then
- PYTHON_URL='https://www.python.org/ftp/python/3.13.1/Python-3.13.1.tgz'
- fi
-
- echo \"Downloading Python from \$PYTHON_URL\"
- wget \$PYTHON_URL -O python-$(pythonVersion).tgz
- tar -xzf python-$(pythonVersion).tgz
- cd Python-$(pythonVersion)*
-
- # Configure and compile Python with optimizations disabled for missing deps
- echo 'Configuring Python build (optimizations may be disabled due to missing dependencies)'
- ./configure --prefix=/usr/local --with-ensurepip=install --enable-loadable-sqlite-extensions
-
- echo 'Compiling Python (this may take several minutes)'
- make -j\$(nproc)
-
- echo 'Installing Python'
- make altinstall
-
- # Create symlinks
- ln -sf /usr/local/bin/python$(pythonVersion) /usr/local/bin/python$(pythonVersion)
- ln -sf /usr/local/bin/python$(pythonVersion) /usr/local/bin/python
-
- # Verify installation
- /usr/local/bin/python$(pythonVersion) --version
- PYTHON_INSTALLED=true
-
- # Clean up
- cd /
- rm -rf /tmp/Python-$(pythonVersion)* /tmp/python-$(pythonVersion).tgz
-
- echo 'Successfully built and installed Python $(pythonVersion) from source'
- fi
+ docker exec build-$(LINUX_TAG)-$(ARCH) sh -lc "
+ set -euxo pipefail;
+ PY=/opt/python/${PYBIN}-${PYBIN}/bin/python;
+ test -x \$PY || { echo 'Python \$PY missing'; exit 0; } # skip if not present
+ ln -sf \$PY /usr/local/bin/python;
+ python -m pip install -U pip setuptools wheel pybind11;
+ echo 'python:' \$(python -V); which python;
+ # 👉 run from the directory that has CMakeLists.txt
+ cd /workspace/mssql_python/pybind;
+ bash build.sh;
+
+ # back to repo root to build the wheel
+ cd /workspace;
+ python setup.py bdist_wheel;
+
+ # repair/tag wheel
+ # TODO: repair/tag wheel, removing this since auditwheel is trying to find/link libraries which we're not packaging, e.g. libk5crypto, libkeyutils etc. - since it uses ldd for cross-verification
+ # We're assuming that this will be provided by OS and not bundled in the wheel
+ # for W in /workspace/dist/*.whl; do auditwheel repair -w /workspace/dist \"\$W\" || true; done
+ "
fi
-
- # If we couldn't install the specific version, fail the build
- if [ \"\$PYTHON_INSTALLED\" = \"false\" ]; then
- echo 'ERROR: Could not install Python $(pythonVersion) - unsupported version'
- echo 'Supported versions for RHEL: 3.11, 3.12 (and 3.10, 3.13 via source compilation)'
- exit 1
+ done
+ displayName: 'Run build.sh and build wheels for cp310–cp313'
+
+ # Copy artifacts back to host
+ - script: |
+ set -euxo pipefail
+ # ---- Wheels ----
+ docker cp build-$(LINUX_TAG)-$(ARCH):/workspace/dist/. "$(Build.ArtifactStagingDirectory)/dist/" || echo "No wheels to copy"
+
+ # ---- .so files: only top-level under mssql_python (exclude subdirs like pybind) ----
+ # Prepare host dest
+ mkdir -p "$(Build.ArtifactStagingDirectory)/ddbc-bindings/$(LINUX_TAG)-$(ARCH)"
+
+ # Prepare a temp out dir inside the container
+ docker exec build-$(LINUX_TAG)-$(ARCH) $([[ "$(LINUX_TAG)" == "manylinux" ]] && echo bash -lc || echo sh -lc) '
+ set -euxo pipefail;
+ echo "Listing package dirs for sanity:";
+ ls -la /workspace/mssql_python || true;
+ ls -la /workspace/mssql_python/pybind || true;
+
+ OUT="/tmp/ddbc-out-$(LINUX_TAG)-$(ARCH)";
+ rm -rf "$OUT"; mkdir -p "$OUT";
+
+ # Copy ONLY top-level .so files from mssql_python (no recursion)
+ find /workspace/mssql_python -maxdepth 1 -type f -name "*.so" -exec cp -v {} "$OUT"/ \; || true
+
+ echo "Top-level .so collected in $OUT:";
+ ls -la "$OUT" || true
+ '
+
+ # Copy those .so files from container to host
+ docker cp "build-$(LINUX_TAG)-$(ARCH):/tmp/ddbc-out-$(LINUX_TAG)-$(ARCH)/." \
+ "$(Build.ArtifactStagingDirectory)/ddbc-bindings/$(LINUX_TAG)-$(ARCH)/" \
+ || echo "No top-level .so files to copy"
+
+ # (Optional) prune non-.so just in case
+ find "$(Build.ArtifactStagingDirectory)/ddbc-bindings/$(LINUX_TAG)-$(ARCH)" -maxdepth 1 -type f ! -name "*.so" -delete || true
+ displayName: 'Copy wheels and .so back to host'
+
+ # Cleanup container
+ - script: |
+ docker stop build-$(LINUX_TAG)-$(ARCH) || true
+ docker rm build-$(LINUX_TAG)-$(ARCH) || true
+ displayName: 'Clean up container'
+ condition: always()
+
+ # Publish wheels (exact name you wanted)
+ - task: PublishBuildArtifacts@1
+ condition: succeededOrFailed()
+ inputs:
+ PathtoPublish: '$(Build.ArtifactStagingDirectory)/dist'
+ ArtifactName: 'mssql-python-wheels-dist'
+ publishLocation: 'Container'
+ displayName: 'Publish wheels as artifacts'
+
+ # Publish compiled .so files (exact name you wanted)
+ - task: PublishBuildArtifacts@1
+ condition: succeededOrFailed()
+ inputs:
+ PathtoPublish: '$(Build.ArtifactStagingDirectory)/ddbc-bindings'
+ ArtifactName: 'mssql-python-ddbc-bindings'
+ publishLocation: 'Container'
+ displayName: 'Publish .so files as artifacts'
+
+# Job to test the built wheels on different Linux distributions with SQL Server
+- job: TestWheelsOnLinux
+ displayName: 'Pytests on Linux -'
+ dependsOn: BuildLinuxWheels
+ condition: succeeded('BuildLinuxWheels') # Only run if BuildLinuxWheels succeeded
+ pool: { vmImage: 'ubuntu-latest' }
+ timeoutInMinutes: 60
+
+ strategy:
+ matrix:
+ # x86_64
+ debian12:
+ BASE_IMAGE: 'debian:12-slim'
+ ARCH: 'x86_64'
+ DOCKER_PLATFORM: 'linux/amd64'
+ rhel_ubi9:
+ BASE_IMAGE: 'registry.access.redhat.com/ubi9/ubi:latest'
+ ARCH: 'x86_64'
+ DOCKER_PLATFORM: 'linux/amd64'
+ alpine320:
+ BASE_IMAGE: 'alpine:3.20'
+ ARCH: 'x86_64'
+ DOCKER_PLATFORM: 'linux/amd64'
+ # arm64
+ debian12_arm64:
+ BASE_IMAGE: 'debian:12-slim'
+ ARCH: 'arm64'
+ DOCKER_PLATFORM: 'linux/arm64'
+ rhel_ubi9_arm64:
+ BASE_IMAGE: 'registry.access.redhat.com/ubi9/ubi:latest'
+ ARCH: 'arm64'
+ DOCKER_PLATFORM: 'linux/arm64'
+ alpine320_arm64:
+ BASE_IMAGE: 'alpine:3.20'
+ ARCH: 'arm64'
+ DOCKER_PLATFORM: 'linux/arm64'
+
+ steps:
+ - checkout: self
+
+ - task: DownloadBuildArtifacts@0
+ inputs:
+ buildType: 'current'
+ downloadType: 'single'
+ artifactName: 'mssql-python-wheels-dist'
+ downloadPath: '$(System.ArtifactsDirectory)'
+ displayName: 'Download wheel artifacts from current build'
+
+ # Verify we actually have wheels before proceeding
+ - script: |
+ set -euxo pipefail
+ WHEEL_DIR="$(System.ArtifactsDirectory)/mssql-python-wheels-dist"
+ if [ ! -d "$WHEEL_DIR" ] || [ -z "$(ls -A $WHEEL_DIR/*.whl 2>/dev/null)" ]; then
+ echo "ERROR: No wheel files found in $WHEEL_DIR"
+ echo "Contents of artifacts directory:"
+ find "$(System.ArtifactsDirectory)" -type f -name "*.whl" || echo "No .whl files found anywhere"
+ exit 1
+ fi
+ echo "Found wheel files:"
+ ls -la "$WHEEL_DIR"/*.whl
+ displayName: 'Verify wheel artifacts exist'
+
+ # Start SQL Server container for testing
+ - script: |
+ set -euxo pipefail
+ docker run -d --name sqlserver \
+ --network bridge \
+ -e ACCEPT_EULA=Y \
+ -e MSSQL_SA_PASSWORD="$(DB_PASSWORD)" \
+ -p 1433:1433 \
+ mcr.microsoft.com/mssql/server:2022-latest
+
+ # Wait for SQL Server to be ready
+ echo "Waiting for SQL Server to start..."
+ for i in {1..30}; do
+ if docker exec sqlserver /opt/mssql-tools18/bin/sqlcmd \
+ -S localhost -U SA -P "$(DB_PASSWORD)" -C -Q "SELECT 1" >/dev/null 2>&1; then
+ echo "SQL Server is ready!"
+ break
fi
-
- # Install pybind11 development headers
- dnf install -y python3-pybind11-devel || echo 'pybind11-devel not available, will install via pip'
-
- # Verify installations
- echo 'Verifying installations:'
- python3 --version
- which gcc && which g++
- gcc --version
- g++ --version
- cmake --version || echo 'cmake not found in PATH'
- which cmake || echo 'cmake binary not found'
- "
- fi
- displayName: 'Install basic dependencies in $(distroName) $(targetArch) container'
-
- - script: |
- # Install ODBC driver in the container
- if [ "$(packageManager)" = "apt" ]; then
- # Ubuntu/Debian
- docker exec build-container-$(distroName)-$(targetArch) bash -c "
- export DEBIAN_FRONTEND=noninteractive
-
- # Download the package to configure the Microsoft repo
- if [ '$(distroName)' = 'Ubuntu' ]; then
- curl -sSL -O https://packages.microsoft.com/config/ubuntu/22.04/packages-microsoft-prod.deb
+ echo "Attempt $i/30: SQL Server not ready yet..."
+ sleep 3
+ done
+
+ # Create test database
+ docker exec sqlserver /opt/mssql-tools18/bin/sqlcmd \
+ -S localhost -U SA -P "$(DB_PASSWORD)" -C \
+ -Q "CREATE DATABASE TestDB"
+ displayName: 'Start SQL Server and create test database'
+ env:
+ DB_PASSWORD: $(DB_PASSWORD)
+
+ # Test wheels on target OS
+ - script: |
+ set -euxo pipefail
+
+ # Enable QEMU for ARM64 architectures
+ if [[ "$(ARCH)" == "arm64" ]] || [[ "$(ARCH)" == "aarch64" ]]; then
+ sudo docker run --rm --privileged tonistiigi/binfmt --install all
+ fi
+
+ # Start test container with retry logic
+ for i in {1..3}; do
+ if docker run -d --name test-$(ARCH) \
+ --platform $(DOCKER_PLATFORM) \
+ --network bridge \
+ -v $(System.ArtifactsDirectory):/artifacts:ro \
+ $(BASE_IMAGE) \
+ tail -f /dev/null; then
+ echo "Container started successfully on attempt $i"
+ break
else
- # Debian 12
- curl -sSL -O https://packages.microsoft.com/config/debian/12/packages-microsoft-prod.deb
+ echo "Failed to start container on attempt $i, retrying..."
+ docker rm test-$(ARCH) 2>/dev/null || true
+ sleep 5
fi
-
- # Install the package
- dpkg -i packages-microsoft-prod.deb || true
- rm packages-microsoft-prod.deb
-
- # Update package list
- apt-get update
-
- # Install the driver
- ACCEPT_EULA=Y apt-get install -y msodbcsql18
- # optional: for bcp and sqlcmd
- ACCEPT_EULA=Y apt-get install -y mssql-tools18
- # optional: for unixODBC development headers
- apt-get install -y unixodbc-dev
- "
- else
- # RHEL/DNF
- docker exec build-container-$(distroName)-$(targetArch) bash -c "
- # Add Microsoft repository for RHEL 9
- curl -sSL -O https://packages.microsoft.com/config/rhel/9/packages-microsoft-prod.rpm
- rpm -Uvh packages-microsoft-prod.rpm
- rm packages-microsoft-prod.rpm
-
- # Update package list
- dnf update -y
-
- # Install the driver
- ACCEPT_EULA=Y dnf install -y msodbcsql18
- # optional: for bcp and sqlcmd
- ACCEPT_EULA=Y dnf install -y mssql-tools18
- # optional: for unixODBC development headers
- dnf install -y unixODBC-devel
- "
- fi
- displayName: 'Install ODBC Driver in $(distroName) $(targetArch) container'
-
- - script: |
- # Install Python dependencies in the container using virtual environment
- docker exec build-container-$(distroName)-$(targetArch) bash -c "
- # Debug: Check what Python versions are available
- echo 'Available Python interpreters:'
- ls -la /usr/bin/python* || echo 'No python in /usr/bin'
- ls -la /usr/local/bin/python* || echo 'No python in /usr/local/bin'
-
- # Determine which Python command to use
- if command -v /usr/local/bin/python$(pythonVersion) >/dev/null 2>&1; then
- PYTHON_CMD=/usr/local/bin/python$(pythonVersion)
- echo 'Using specific versioned Python from /usr/local/bin'
- elif command -v python$(pythonVersion) >/dev/null 2>&1; then
- PYTHON_CMD=python$(pythonVersion)
- echo 'Using python$(pythonVersion) from PATH'
- elif command -v python3 >/dev/null 2>&1; then
- PYTHON_CMD=python3
- echo 'Falling back to python3 instead of python$(pythonVersion)'
- else
- echo 'No Python interpreter found'
+ done
+
+ # Verify container is running
+ if ! docker ps | grep -q test-$(ARCH); then
+ echo "ERROR: Container test-$(ARCH) is not running"
+ docker logs test-$(ARCH) || true
exit 1
fi
-
- echo 'Selected Python command:' \$PYTHON_CMD
- echo 'Python version:' \$(\$PYTHON_CMD --version)
- echo 'Python executable path:' \$(which \$PYTHON_CMD)
-
- # Verify the symlink is pointing to the right version
- if [ '\$PYTHON_CMD' = '/usr/local/bin/python$(pythonVersion)' ]; then
- echo 'Symlink details:'
- ls -la /usr/local/bin/python$(pythonVersion)
- echo 'Target Python version:'
- /usr/local/bin/python$(pythonVersion) --version
+
+ # Install Python and dependencies based on OS
+ if [[ "$(BASE_IMAGE)" == alpine* ]]; then
+ echo "Setting up Alpine Linux..."
+ docker exec test-$(ARCH) sh -c "
+ apk update && apk add --no-cache python3 py3-pip python3-dev unixodbc-dev curl libtool libltdl krb5-libs
+ python3 -m venv /venv
+ /venv/bin/pip install pytest
+ "
+ PY_CMD="/venv/bin/python"
+ elif [[ "$(BASE_IMAGE)" == *ubi* ]] || [[ "$(BASE_IMAGE)" == *rocky* ]] || [[ "$(BASE_IMAGE)" == *alma* ]]; then
+ echo "Setting up RHEL-based system..."
+ docker exec test-$(ARCH) bash -c "
+ set -euo pipefail
+ echo 'Installing Python on UBI/RHEL...'
+ if command -v dnf >/dev/null; then
+ dnf clean all
+ rm -rf /var/cache/dnf
+ dnf -y makecache
+
+ dnf list --showduplicates python3.11 python3.12 || true
+
+ # NOTE: do NOT install 'curl' to avoid curl-minimal conflict
+ if dnf -y install python3.12 python3.12-pip unixODBC-devel; then
+ PY=python3.12
+ echo 'Installed Python 3.12'
+ elif dnf -y install python3.11 python3.11-pip unixODBC-devel; then
+ PY=python3.11
+ echo 'Installed Python 3.11'
+ else
+ dnf -y install python3 python3-pip unixODBC-devel
+ PY=python3
+ echo 'Falling back to default Python'
+ fi
+
+ \$PY -m venv /venv
+ /venv/bin/python -m pip install -U 'pip>=25' pytest
+ /venv/bin/python --version
+ /venv/bin/pip --version
+ else
+ echo 'ERROR: dnf not found'
+ exit 1
+ fi
+ "
+ PY_CMD="/venv/bin/python"
+ else
+ echo "Setting up Debian/Ubuntu..."
+ docker exec test-$(ARCH) bash -c "
+ export DEBIAN_FRONTEND=noninteractive
+ apt-get update
+ apt-get install -y python3 python3-pip python3-venv python3-full unixodbc-dev curl
+ python3 -m venv /venv
+ /venv/bin/pip install pytest
+ "
+ PY_CMD="/venv/bin/python"
fi
-
- # Ensure we have pip available for this Python version
- if ! \$PYTHON_CMD -m pip --version >/dev/null 2>&1; then
- echo 'Installing pip for' \$PYTHON_CMD
- curl -sS https://bootstrap.pypa.io/get-pip.py | \$PYTHON_CMD
+
+ # Install the wheel (find the appropriate one for this architecture)
+ if [[ "$(BASE_IMAGE)" == alpine* ]]; then
+ SHELL_CMD="sh -c"
+ WHEEL_PATTERN="*musllinux*$(ARCH)*.whl"
+ else
+ SHELL_CMD="bash -c"
+ WHEEL_PATTERN="*manylinux*$(ARCH)*.whl"
fi
+
+ # Install the appropriate wheel in isolated directory
+ docker exec test-$(ARCH) $SHELL_CMD "
+ # Create isolated directory for wheel testing
+ mkdir -p /test_whl
+ cd /test_whl
+
+ echo 'Available wheels:'
+ ls -la /artifacts/mssql-python-wheels-dist/*.whl
+ echo 'Installing package (letting pip auto-select in isolated environment):'
+ $PY_CMD -m pip install mssql_python --find-links /artifacts/mssql-python-wheels-dist --no-index --no-deps
+
+ # Verify package installation location
+ echo 'Installed package location:'
+ $PY_CMD -c 'import mssql_python; print(\"Package location:\", mssql_python.__file__)'
+
+ # Test basic import
+ $PY_CMD -c 'import mssql_python; print(\"Package imported successfully\")'
+ "
+
+ displayName: 'Test wheel installation and basic functionality on $(BASE_IMAGE)'
+ env:
+ DB_CONNECTION_STRING: 'Server=localhost;Database=TestDB;Uid=SA;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes'
+
+ # Run pytest with source code while testing installed wheel
+ - script: |
+ set -euxo pipefail
- # Create a virtual environment with the available Python version
- \$PYTHON_CMD -m venv /opt/venv
- source /opt/venv/bin/activate
-
- # Verify virtual environment Python version
- echo 'Python version in venv after creation:' \$(python --version)
- echo 'Python executable in venv:' \$(which python)
-
- # Upgrade pip in virtual environment
- python -m pip install --upgrade pip
-
- # Install pybind11 if not available from system packages
- python -m pip install pybind11
-
- # Install dependencies in the virtual environment
- python -m pip install -r requirements.txt
- python -m pip install wheel setuptools
-
- # Make the virtual environment globally available
- echo 'source /opt/venv/bin/activate' >> ~/.bashrc
-
- # Final verification
- echo 'Final verification:'
- echo 'Python version in venv:' \$(python --version)
- echo 'Pip version in venv:' \$(pip --version)
- echo 'Python sys.executable:' \$(python -c 'import sys; print(sys.executable)')
- "
- displayName: 'Install Python dependencies in $(distroName) $(targetArch) container'
-
- - script: |
- # Build pybind bindings in the container
- docker exec build-container-$(distroName)-$(targetArch) bash -c "
- source /opt/venv/bin/activate
+ # Copy source code to container for pytest
+ echo "Copying source code to container for pytest..."
+ docker cp $(Build.SourcesDirectory)/. test-$(ARCH):/workspace/
- # Verify build tools are available
- echo 'Verifying build tools before starting build:'
- echo 'Python version:' \$(python --version)
- echo 'CMake status:'
- if command -v cmake >/dev/null 2>&1; then
- cmake --version
+ # Set shell command based on OS and define Python command
+ if [[ "$(BASE_IMAGE)" == alpine* ]]; then
+ SHELL_CMD="sh -c"
+ PY_CMD="/venv/bin/python"
else
- echo 'ERROR: cmake not found in PATH'
- echo 'PATH:' \$PATH
- echo 'Available binaries in /usr/bin/:'
- ls -la /usr/bin/ | grep cmake || echo 'No cmake in /usr/bin'
- echo 'Trying to find cmake:'
- find /usr -name cmake 2>/dev/null || echo 'cmake not found anywhere'
-
- # Try to install cmake if missing (RHEL specific)
- if [ '$(packageManager)' = 'dnf' ]; then
- echo 'Attempting to reinstall cmake for RHEL...'
- dnf install -y cmake
- echo 'After reinstall:'
- cmake --version || echo 'cmake still not available'
- fi
+ SHELL_CMD="bash -c"
+ PY_CMD="/venv/bin/python"
fi
- echo 'GCC status:'
- gcc --version || echo 'gcc not found'
- echo 'Make status:'
- make --version || echo 'make not found'
-
- cd mssql_python/pybind
- chmod +x build.sh
- ./build.sh
- "
- displayName: 'Build pybind bindings (.so) in $(distroName) $(targetArch) container'
-
- - script: |
- # Uninstall ODBC Driver before running tests
- if [ "$(packageManager)" = "apt" ]; then
- # Ubuntu/Debian
- docker exec build-container-$(distroName)-$(targetArch) bash -c "
- export DEBIAN_FRONTEND=noninteractive
- apt-get remove --purge -y msodbcsql18 mssql-tools18 unixodbc-dev
- rm -f /usr/bin/sqlcmd
- rm -f /usr/bin/bcp
- rm -rf /opt/microsoft/msodbcsql
- rm -f /lib/x86_64-linux-gnu/libodbcinst.so.2
- rm -f /lib/aarch64-linux-gnu/libodbcinst.so.2
- odbcinst -u -d -n 'ODBC Driver 18 for SQL Server' || true
- echo 'Uninstalled ODBC Driver and cleaned up libraries'
- echo 'Verifying $(targetArch) debian_ubuntu driver library signatures:'
- if [ '$(targetArch)' = 'x86_64' ]; then
- ldd mssql_python/libs/linux/debian_ubuntu/x86_64/lib/libmsodbcsql-18.5.so.1.1
- else
- ldd mssql_python/libs/linux/debian_ubuntu/arm64/lib/libmsodbcsql-18.5.so.1.1
+ docker exec test-$(ARCH) $SHELL_CMD "
+ # Go to workspace root where source code is
+ cd /workspace
+
+ echo 'Running pytest suite with installed wheel...'
+ echo 'Current directory:' \$(pwd)
+ echo 'Python version:'
+ $PY_CMD --version
+
+ # Verify we're importing the installed wheel, not local source
+ echo 'Package import verification:'
+ $PY_CMD -c 'import mssql_python; print(\"Testing installed wheel from:\", mssql_python.__file__)'
+
+ # Install test requirements
+ if [ -f requirements.txt ]; then
+ echo 'Installing test requirements...'
+ $PY_CMD -m pip install -r requirements.txt || echo 'Failed to install some requirements'
fi
- "
- else
- # RHEL/DNF
- docker exec build-container-$(distroName)-$(targetArch) bash -c "
- dnf remove -y msodbcsql18 mssql-tools18 unixODBC-devel
- rm -f /usr/bin/sqlcmd
- rm -f /usr/bin/bcp
- rm -rf /opt/microsoft/msodbcsql
- rm -f /lib64/libodbcinst.so.2
- odbcinst -u -d -n 'ODBC Driver 18 for SQL Server' || true
- echo 'Uninstalled ODBC Driver and cleaned up libraries'
- echo 'Verifying $(targetArch) rhel driver library signatures:'
- if [ '$(targetArch)' = 'x86_64' ]; then
- ldd mssql_python/libs/linux/rhel/x86_64/lib/libmsodbcsql-18.5.so.1.1
+
+ # Ensure pytest is available
+ $PY_CMD -m pip install pytest || echo 'pytest installation failed'
+
+ # List available test files
+ echo 'Available test files:'
+ find tests/ -name 'test_*.py' 2>/dev/null || echo 'No test files found in tests/'
+
+ # Run pytest
+ if [ -d tests/ ]; then
+ echo 'Starting pytest...'
+ $PY_CMD -m pytest -v || echo 'Some tests failed - this may be expected in containerized environment'
else
- ldd mssql_python/libs/linux/rhel/arm64/lib/libmsodbcsql-18.5.so.1.1
+ echo 'No tests directory found, skipping pytest'
fi
"
- fi
- displayName: 'Uninstall ODBC Driver before running tests in $(distroName) $(targetArch) container'
-
- - script: |
- # Run tests in the container
- # Get SQL Server container IP
- SQLSERVER_IP=$(docker inspect sqlserver-$(distroName)-$(targetArch) --format='{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}')
- echo "SQL Server IP: $SQLSERVER_IP"
-
- docker exec \
- -e DB_CONNECTION_STRING="Driver=ODBC Driver 18 for SQL Server;Server=$SQLSERVER_IP;Database=TestDB;Uid=SA;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes" \
- -e DB_PASSWORD="$(DB_PASSWORD)" \
- build-container-$(distroName)-$(targetArch) bash -c "
- source /opt/venv/bin/activate
- echo 'Build successful, running tests now on $(distroName) $(targetArch)'
- echo 'Python version:' \$(python --version)
- echo 'Architecture:' \$(uname -m)
- echo 'Using connection string: Driver=ODBC Driver 18 for SQL Server;Server=$SQLSERVER_IP;Database=TestDB;Uid=SA;Pwd=***;TrustServerCertificate=yes'
- python -m pytest -v --junitxml=test-results-$(distroName)-$(targetArch).xml --cov=. --cov-report=xml:coverage-$(distroName)-$(targetArch).xml --capture=tee-sys --cache-clear
- "
- displayName: 'Run pytest with coverage in $(distroName) $(targetArch) container'
- env:
- DB_PASSWORD: $(DB_PASSWORD)
-
- - script: |
- # Build wheel package in the container
- docker exec build-container-$(distroName)-$(targetArch) bash -c "
- source /opt/venv/bin/activate
- echo 'Building wheel for $(distroName) $(targetArch) Python $(pythonVersion)'
- echo 'Python version:' \$(python --version)
- echo 'Architecture:' \$(uname -m)
- python -m pip install --upgrade pip wheel setuptools
- python setup.py bdist_wheel
-
- # Verify the wheel was created
- ls -la dist/
- "
- displayName: 'Build wheel package in $(distroName) $(targetArch) container'
-
- - script: |
- # Copy test results from container to host
- docker cp build-container-$(distroName)-$(targetArch):/workspace/test-results-$(distroName)-$(targetArch).xml $(Build.SourcesDirectory)/
- docker cp build-container-$(distroName)-$(targetArch):/workspace/coverage-$(distroName)-$(targetArch).xml $(Build.SourcesDirectory)/
-
- # Copy wheel files from container to host
- mkdir -p $(Build.ArtifactStagingDirectory)/dist
- docker cp build-container-$(distroName)-$(targetArch):/workspace/dist/. $(Build.ArtifactStagingDirectory)/dist/ || echo "Failed to copy dist directory"
-
- # Copy .so files from container to host
- mkdir -p $(Build.ArtifactStagingDirectory)/ddbc-bindings/linux/$(distroName)-$(targetArch)
- docker cp build-container-$(distroName)-$(targetArch):/workspace/mssql_python/ddbc_bindings.cp$(shortPyVer)-$(targetArch).so $(Build.ArtifactStagingDirectory)/ddbc-bindings/linux/$(distroName)-$(targetArch)/ || echo "Failed to copy .so files"
- displayName: 'Copy results and artifacts from $(distroName) $(targetArch) container'
- condition: always()
-
- - script: |
- # Clean up containers
- docker stop build-container-$(distroName)-$(targetArch) || true
- docker rm build-container-$(distroName)-$(targetArch) || true
- docker stop sqlserver-$(distroName)-$(targetArch) || true
- docker rm sqlserver-$(distroName)-$(targetArch) || true
- displayName: 'Clean up $(distroName) $(targetArch) containers'
- condition: always()
-
- - task: PublishTestResults@2
- condition: succeededOrFailed()
- inputs:
- testResultsFiles: '**/test-results-$(distroName)-$(targetArch).xml'
- testRunTitle: 'Publish pytest results on $(distroName) $(targetArch)'
-
- - task: PublishCodeCoverageResults@1
- inputs:
- codeCoverageTool: 'Cobertura'
- summaryFileLocation: 'coverage-$(distroName)-$(targetArch).xml'
- displayName: 'Publish code coverage results for $(distroName) $(targetArch)'
-
- - task: PublishBuildArtifacts@1
- condition: succeededOrFailed()
- inputs:
- PathtoPublish: '$(Build.ArtifactStagingDirectory)/ddbc-bindings'
- ArtifactName: 'mssql-python-ddbc-bindings'
- publishLocation: 'Container'
- displayName: 'Publish .so files as artifacts'
-
- - task: PublishBuildArtifacts@1
- condition: succeededOrFailed()
- inputs:
- PathtoPublish: '$(Build.ArtifactStagingDirectory)/dist'
- ArtifactName: 'mssql-python-wheels-dist'
- publishLocation: 'Container'
- displayName: 'Publish wheels as artifacts'
+ displayName: 'Run pytest suite on $(BASE_IMAGE) $(ARCH)'
+ env:
+ DB_CONNECTION_STRING: 'Server=localhost;Database=TestDB;Uid=SA;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes'
+ continueOnError: true # Don't fail pipeline if tests fail
+
+ # Cleanup
+ - script: |
+ docker stop test-$(ARCH) sqlserver || true
+ docker rm test-$(ARCH) sqlserver || true
+ displayName: 'Cleanup containers'
+ condition: always()
diff --git a/eng/pipelines/dummy-release-pipeline.yml b/eng/pipelines/dummy-release-pipeline.yml
index 728ce88b5..9fcf985c0 100644
--- a/eng/pipelines/dummy-release-pipeline.yml
+++ b/eng/pipelines/dummy-release-pipeline.yml
@@ -1,4 +1,4 @@
-name: mssql-python-official-release-pipeline
+name: mssql-python-dummy-release-pipeline
variables:
- group: 'ESRP Federated Creds (AME)'
@@ -27,8 +27,10 @@ jobs:
dir "$(Build.SourcesDirectory)\dist"
displayName: 'List contents of dist directory'
+ # The ESRP task should fail since Maven is not a valid content type
- task: EsrpRelease@9
displayName: 'ESRP Release'
+ continueOnError: true
inputs:
connectedservicename: '$(ESRPConnectedServiceName)'
usemanagedidentity: true
@@ -49,3 +51,14 @@ jobs:
ServiceEndpointUrl: 'https://api.esrp.microsoft.com'
MainPublisher: 'ESRPRELPACMAN'
DomainTenantId: '$(DomainTenantId)'
+
+ - script: |
+ echo "ESRP task completed. Checking if it failed as expected..."
+ if "%AGENT_JOBSTATUS%" == "Failed" (
+ echo "✅ ESRP task failed as expected for dummy release testing"
+ exit 0
+ ) else (
+ echo "⚠️ ESRP task unexpectedly succeeded"
+ exit 0
+ )
+ displayName: 'Validate ESRP Task Failed as Expected'
\ No newline at end of file
diff --git a/eng/pipelines/pr-validation-pipeline.yml b/eng/pipelines/pr-validation-pipeline.yml
index 5b8083ae2..15dfdb21c 100644
--- a/eng/pipelines/pr-validation-pipeline.yml
+++ b/eng/pipelines/pr-validation-pipeline.yml
@@ -7,80 +7,452 @@ trigger:
- main
jobs:
-- job: PytestOnWindows
+- job: CodeQLAnalysis
+ displayName: 'CodeQL Security Analysis'
+ pool:
+ vmImage: 'ubuntu-latest'
+
+ steps:
+ - script: |
+ sudo apt-get update
+ sudo apt-get install -y build-essential cmake curl git python3 python3-pip python3-dev python3-venv unixodbc-dev
+ displayName: 'Install build dependencies for CodeQL'
+
+ - task: UsePythonVersion@0
+ inputs:
+ versionSpec: '3.13'
+ addToPath: true
+ displayName: 'Use Python 3.13 for CodeQL'
+
+ - script: |
+ python -m pip install --upgrade pip
+ pip install -r requirements.txt
+ displayName: 'Install Python dependencies for CodeQL'
+
+ - task: CodeQL3000Init@0
+ inputs:
+ Enabled: true
+ displayName: 'Initialize CodeQL'
+
+ # Build the C++ extension for CodeQL analysis
+ - script: |
+ cd mssql_python/pybind
+ chmod +x build.sh
+ ./build.sh
+ displayName: 'Build C++ extension for CodeQL analysis'
+
+ - task: CodeQL3000Finalize@0
+ condition: always()
+ displayName: 'Finalize CodeQL'
+
+- job: pytestonwindows
+ displayName: 'Windows x64'
pool:
vmImage: 'windows-latest'
+ variables:
+ # Enable CodeQL for this job to update the old stale snapshot (build_jobname=pytestonwindows)
+ # This can be removed once the old CodeQL issue SM02986 is cleared
+ Codeql.Enabled: true
+ strategy:
+ matrix:
+ pytestonwindows:
+ # Temporary entry to clear stale CodeQL snapshot SM02986
+ # Remove this once the issue is resolved
+ sqlVersion: 'SQL2022'
+ pythonVersion: '3.13'
+ SQLServer2022:
+ sqlVersion: 'SQL2022'
+ pythonVersion: '3.13'
+ SQLServer2025:
+ sqlVersion: 'SQL2025'
+ pythonVersion: '3.14'
+ LocalDB_Python314:
+ sqlVersion: 'LocalDB'
+ pythonVersion: '3.14'
+
steps:
- task: UsePythonVersion@0
inputs:
- versionSpec: '3.13'
+ versionSpec: '$(pythonVersion)'
addToPath: true
githubToken: $(GITHUB_TOKEN)
- displayName: 'Use Python 3.13'
+ displayName: 'Use Python $(pythonVersion)'
- script: |
python -m pip install --upgrade pip
pip install -r requirements.txt
displayName: 'Install dependencies'
- # Start LocalDB instance
+ # Start LocalDB instance (for LocalDB matrix)
- powershell: |
sqllocaldb create MSSQLLocalDB
sqllocaldb start MSSQLLocalDB
displayName: 'Start LocalDB instance'
+ condition: eq(variables['sqlVersion'], 'LocalDB')
- # Create database and user
+ # Create database and user for LocalDB
- powershell: |
sqlcmd -S "(localdb)\MSSQLLocalDB" -Q "CREATE DATABASE TestDB"
sqlcmd -S "(localdb)\MSSQLLocalDB" -Q "CREATE LOGIN testuser WITH PASSWORD = '$(DB_PASSWORD)'"
sqlcmd -S "(localdb)\MSSQLLocalDB" -d TestDB -Q "CREATE USER testuser FOR LOGIN testuser"
sqlcmd -S "(localdb)\MSSQLLocalDB" -d TestDB -Q "ALTER ROLE db_owner ADD MEMBER testuser"
- displayName: 'Setup database and user'
+ displayName: 'Setup database and user for LocalDB'
+ condition: eq(variables['sqlVersion'], 'LocalDB')
+ env:
+ DB_PASSWORD: $(DB_PASSWORD)
+
+ # Install SQL Server 2022 (for SQL2022 matrix)
+ - powershell: |
+ Write-Host "Downloading SQL Server 2022 Express..."
+ # Download SQL Server 2022 Express installer
+ $ProgressPreference = 'SilentlyContinue'
+ Invoke-WebRequest -Uri "https://download.microsoft.com/download/5/1/4/5145fe04-4d30-4b85-b0d1-39533663a2f1/SQL2022-SSEI-Expr.exe" -OutFile "SQL2022-SSEI-Expr.exe"
+
+ Write-Host "Installing SQL Server 2022 Express..."
+ # Install SQL Server 2022 Express with basic features
+ Start-Process -FilePath "SQL2022-SSEI-Expr.exe" -ArgumentList "/Action=Download","/MediaPath=$env:TEMP","/MediaType=Core","/Quiet" -Wait
+
+ # Find the downloaded setup file
+ $setupFile = Get-ChildItem -Path $env:TEMP -Filter "SQLEXPR_x64_ENU.exe" -Recurse | Select-Object -First 1
+
+ if ($setupFile) {
+ Write-Host "Extracting SQL Server setup files..."
+ Start-Process -FilePath $setupFile.FullName -ArgumentList "/x:$env:TEMP\SQLSetup","/u" -Wait
+
+ Write-Host "Running SQL Server setup..."
+ Start-Process -FilePath "$env:TEMP\SQLSetup\setup.exe" -ArgumentList "/Q","/ACTION=Install","/FEATURES=SQLEngine","/INSTANCENAME=MSSQLSERVER","/SQLSVCACCOUNT=`"NT AUTHORITY\SYSTEM`"","/SQLSYSADMINACCOUNTS=`"BUILTIN\Administrators`"","/TCPENABLED=1","/SECURITYMODE=SQL","/SAPWD=$(DB_PASSWORD)","/IACCEPTSQLSERVERLICENSETERMS" -Wait
+ } else {
+ Write-Error "Failed to download SQL Server setup file"
+ exit 1
+ }
+
+ Write-Host "SQL Server 2022 installation completed"
+ displayName: 'Install SQL Server 2022 Express'
+ condition: eq(variables['sqlVersion'], 'SQL2022')
+ env:
+ DB_PASSWORD: $(DB_PASSWORD)
+
+ # Create database for SQL Server 2022
+ - powershell: |
+ # Wait for SQL Server to start
+ $maxAttempts = 30
+ $attempt = 0
+ $connected = $false
+
+ Write-Host "Waiting for SQL Server 2022 to start..."
+ while (-not $connected -and $attempt -lt $maxAttempts) {
+ try {
+ sqlcmd -S "localhost" -U "sa" -P "$(DB_PASSWORD)" -Q "SELECT 1" -C
+ $connected = $true
+ Write-Host "SQL Server is ready!"
+ } catch {
+ $attempt++
+ Write-Host "Waiting... ($attempt/$maxAttempts)"
+ Start-Sleep -Seconds 2
+ }
+ }
+
+ if (-not $connected) {
+ Write-Error "Failed to connect to SQL Server after $maxAttempts attempts"
+ exit 1
+ }
+
+ # Create database and user
+ sqlcmd -S "localhost" -U "sa" -P "$(DB_PASSWORD)" -Q "CREATE DATABASE TestDB" -C
+ sqlcmd -S "localhost" -U "sa" -P "$(DB_PASSWORD)" -Q "CREATE LOGIN testuser WITH PASSWORD = '$(DB_PASSWORD)'" -C
+ sqlcmd -S "localhost" -U "sa" -P "$(DB_PASSWORD)" -d TestDB -Q "CREATE USER testuser FOR LOGIN testuser" -C
+ sqlcmd -S "localhost" -U "sa" -P "$(DB_PASSWORD)" -d TestDB -Q "ALTER ROLE db_owner ADD MEMBER testuser" -C
+ displayName: 'Setup database and user for SQL Server 2022'
+ condition: eq(variables['sqlVersion'], 'SQL2022')
+ env:
+ DB_PASSWORD: $(DB_PASSWORD)
+
+ # Install SQL Server 2025 (for SQL2025 matrix)
+ - powershell: |
+ Write-Host "Downloading SQL Server 2025 Express..."
+ # Download SQL Server 2025 Express installer
+ $ProgressPreference = 'SilentlyContinue'
+ Invoke-WebRequest -Uri "https://go.microsoft.com/fwlink/p/?linkid=2216019&clcid=0x409&culture=en-us&country=us" -OutFile "SQL2025-SSEI-Expr.exe"
+
+ Write-Host "Installing SQL Server 2025 Express..."
+ # Install SQL Server 2025 Express with basic features
+ Start-Process -FilePath "SQL2025-SSEI-Expr.exe" -ArgumentList "/Action=Download","/MediaPath=$env:TEMP","/MediaType=Core","/Quiet" -Wait
+
+ # Find the downloaded setup file
+ $setupFile = Get-ChildItem -Path $env:TEMP -Filter "SQLEXPR_x64_ENU.exe" -Recurse | Select-Object -First 1
+
+ if ($setupFile) {
+ Write-Host "Extracting SQL Server setup files..."
+ Start-Process -FilePath $setupFile.FullName -ArgumentList "/x:$env:TEMP\SQL2025Setup","/u" -Wait
+
+ Write-Host "Running SQL Server setup..."
+ Start-Process -FilePath "$env:TEMP\SQL2025Setup\setup.exe" -ArgumentList "/Q","/ACTION=Install","/FEATURES=SQLEngine","/INSTANCENAME=MSSQLSERVER","/SQLSVCACCOUNT=`"NT AUTHORITY\SYSTEM`"","/SQLSYSADMINACCOUNTS=`"BUILTIN\Administrators`"","/TCPENABLED=1","/SECURITYMODE=SQL","/SAPWD=$(DB_PASSWORD)","/IACCEPTSQLSERVERLICENSETERMS" -Wait
+ } else {
+ Write-Error "Failed to download SQL Server setup file"
+ exit 1
+ }
+
+ Write-Host "SQL Server 2025 installation completed"
+ displayName: 'Install SQL Server 2025 Express'
+ condition: eq(variables['sqlVersion'], 'SQL2025')
+ env:
+ DB_PASSWORD: $(DB_PASSWORD)
+
+ # Create database for SQL Server 2025
+ - powershell: |
+ # Wait for SQL Server to start
+ $maxAttempts = 30
+ $attempt = 0
+ $connected = $false
+
+ Write-Host "Waiting for SQL Server 2025 to start..."
+ while (-not $connected -and $attempt -lt $maxAttempts) {
+ try {
+ sqlcmd -S "localhost" -U "sa" -P "$(DB_PASSWORD)" -Q "SELECT 1" -C
+ $connected = $true
+ Write-Host "SQL Server is ready!"
+ } catch {
+ $attempt++
+ Write-Host "Waiting... ($attempt/$maxAttempts)"
+ Start-Sleep -Seconds 2
+ }
+ }
+
+ if (-not $connected) {
+ Write-Error "Failed to connect to SQL Server after $maxAttempts attempts"
+ exit 1
+ }
+
+ # Create database and user
+ sqlcmd -S "localhost" -U "sa" -P "$(DB_PASSWORD)" -Q "CREATE DATABASE TestDB" -C
+ sqlcmd -S "localhost" -U "sa" -P "$(DB_PASSWORD)" -Q "CREATE LOGIN testuser WITH PASSWORD = '$(DB_PASSWORD)'" -C
+ sqlcmd -S "localhost" -U "sa" -P "$(DB_PASSWORD)" -d TestDB -Q "CREATE USER testuser FOR LOGIN testuser" -C
+ sqlcmd -S "localhost" -U "sa" -P "$(DB_PASSWORD)" -d TestDB -Q "ALTER ROLE db_owner ADD MEMBER testuser" -C
+ displayName: 'Setup database and user for SQL Server 2025'
+ condition: eq(variables['sqlVersion'], 'SQL2025')
env:
DB_PASSWORD: $(DB_PASSWORD)
+ # ============== CodeQL Init (temporary - remove after SM02986 is cleared) ==============
+ - task: CodeQL3000Init@0
+ inputs:
+ Enabled: true
+ displayName: 'Initialize CodeQL (temporary)'
+
- script: |
cd mssql_python\pybind
build.bat x64
displayName: 'Build .pyd file'
+ # ============== CodeQL Finalize (temporary - remove after SM02986 is cleared) ==============
+ - task: CodeQL3000Finalize@0
+ condition: always()
+ displayName: 'Finalize CodeQL (temporary)'
+
+ # Run tests for LocalDB
- script: |
- python -m pytest -v --junitxml=test-results.xml --cov=. --cov-report=xml --capture=tee-sys --cache-clear
- displayName: 'Run tests with coverage'
+ python -m pytest -v --junitxml=test-results-localdb.xml --cov=. --cov-report=xml:coverage-localdb.xml --capture=tee-sys --cache-clear
+ displayName: 'Run tests with coverage on LocalDB'
+ condition: eq(variables['sqlVersion'], 'LocalDB')
env:
DB_CONNECTION_STRING: 'Server=(localdb)\MSSQLLocalDB;Database=TestDB;Uid=testuser;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes'
- - task: PublishBuildArtifacts@1
+ # Run tests for SQL Server 2022
+ - script: |
+ python -m pytest -v --junitxml=test-results-sql2022.xml --cov=. --cov-report=xml:coverage-sql2022.xml --capture=tee-sys --cache-clear
+ displayName: 'Run tests with coverage on SQL Server 2022'
+ condition: eq(variables['sqlVersion'], 'SQL2022')
+ env:
+ DB_CONNECTION_STRING: 'Server=localhost;Database=TestDB;Uid=testuser;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes'
+
+ # Run tests for SQL Server 2025
+ - script: |
+ python -m pytest -v --junitxml=test-results-sql2025.xml --cov=. --cov-report=xml:coverage-sql2025.xml --capture=tee-sys --cache-clear
+ displayName: 'Run tests with coverage on SQL Server 2025'
+ condition: eq(variables['sqlVersion'], 'SQL2025')
+ env:
+ DB_CONNECTION_STRING: 'Server=localhost;Database=TestDB;Uid=testuser;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes'
+
+ # Download and restore AdventureWorks2022 database for benchmarking
+ - powershell: |
+ Write-Host "Downloading AdventureWorks2022.bak..."
+ $ProgressPreference = 'SilentlyContinue'
+ Invoke-WebRequest -Uri "https://github.com/Microsoft/sql-server-samples/releases/download/adventureworks/AdventureWorks2022.bak" -OutFile "$env:TEMP\AdventureWorks2022.bak"
+
+ Write-Host "Restoring AdventureWorks2022 database..."
+ # Get the default data and log paths
+ $dataPath = sqlcmd -S "localhost" -U "sa" -P "$(DB_PASSWORD)" -Q "SET NOCOUNT ON; SELECT SERVERPROPERTY('InstanceDefaultDataPath') AS DataPath" -h -1 -C | Out-String
+ $logPath = sqlcmd -S "localhost" -U "sa" -P "$(DB_PASSWORD)" -Q "SET NOCOUNT ON; SELECT SERVERPROPERTY('InstanceDefaultLogPath') AS LogPath" -h -1 -C | Out-String
+
+ $dataPath = $dataPath.Trim()
+ $logPath = $logPath.Trim()
+
+ Write-Host "Data path: $dataPath"
+ Write-Host "Log path: $logPath"
+
+ # Restore the database
+ sqlcmd -S "localhost" -U "sa" -P "$(DB_PASSWORD)" -C -Q @"
+ RESTORE DATABASE AdventureWorks2022
+ FROM DISK = '$env:TEMP\AdventureWorks2022.bak'
+ WITH
+ MOVE 'AdventureWorks2022' TO '${dataPath}AdventureWorks2022.mdf',
+ MOVE 'AdventureWorks2022_log' TO '${logPath}AdventureWorks2022_log.ldf',
+ REPLACE
+ "@
+
+ if ($LASTEXITCODE -eq 0) {
+ Write-Host "AdventureWorks2022 database restored successfully"
+ } else {
+ Write-Error "Failed to restore AdventureWorks2022 database"
+ exit 1
+ }
+ displayName: 'Download and restore AdventureWorks2022 database'
+ condition: or(eq(variables['sqlVersion'], 'SQL2022'), eq(variables['sqlVersion'], 'SQL2025'))
+ env:
+ DB_PASSWORD: $(DB_PASSWORD)
+
+ # Run performance benchmarks on SQL Server 2022
+ - powershell: |
+ Write-Host "Checking and installing ODBC Driver 18 for SQL Server..."
+
+ # Check if ODBC Driver 18 is registered in Windows registry
+ $odbcDriverKey = "HKLM:\SOFTWARE\ODBC\ODBCINST.INI\ODBC Driver 18 for SQL Server"
+ $driverExists = Test-Path $odbcDriverKey
+
+ if ($driverExists) {
+ Write-Host "✓ ODBC Driver 18 for SQL Server is already installed and registered"
+ $driverPath = (Get-ItemProperty -Path $odbcDriverKey -Name "Driver" -ErrorAction SilentlyContinue).Driver
+ if ($driverPath) {
+ Write-Host " Driver location: $driverPath"
+ }
+ } else {
+ Write-Host "ODBC Driver 18 for SQL Server not found, installing..."
+
+ # Download ODBC Driver 18.5.2.1 (x64) from official Microsoft link
+ $ProgressPreference = 'SilentlyContinue'
+ $installerUrl = "https://go.microsoft.com/fwlink/?linkid=2335671"
+ $installerPath = "$env:TEMP\msodbcsql_18.5.2.1_x64.msi"
+
+ Write-Host "Downloading ODBC Driver 18 (x64) from Microsoft..."
+ Write-Host " URL: $installerUrl"
+ try {
+ Invoke-WebRequest -Uri $installerUrl -OutFile $installerPath -UseBasicParsing
+ Write-Host "✓ Download completed: $installerPath"
+ } catch {
+ Write-Error "Failed to download ODBC driver: $_"
+ exit 1
+ }
+
+ Write-Host "Installing ODBC Driver 18..."
+ $installArgs = @(
+ "/i"
+ "`"$installerPath`""
+ "/quiet"
+ "/qn"
+ "/norestart"
+ "IACCEPTMSODBCSQLLICENSETERMS=YES"
+ "/l*v"
+ "`"$env:TEMP\odbc_install.log`""
+ )
+
+ $installCmd = "msiexec.exe $($installArgs -join ' ')"
+ Write-Host " Command: $installCmd"
+
+ $process = Start-Process msiexec.exe -ArgumentList $installArgs -Wait -PassThru -NoNewWindow
+
+ if ($process.ExitCode -eq 0) {
+ Write-Host "✓ ODBC Driver 18 installation completed successfully"
+ } elseif ($process.ExitCode -eq 3010) {
+ Write-Host "✓ ODBC Driver 18 installed (reboot recommended but not required)"
+ } else {
+ Write-Error "ODBC Driver 18 installation failed with exit code: $($process.ExitCode)"
+ Write-Host "Check installation log: $env:TEMP\odbc_install.log"
+ Get-Content "$env:TEMP\odbc_install.log" -Tail 50 -ErrorAction SilentlyContinue
+ exit 1
+ }
+
+ # Wait for registry update
+ Start-Sleep -Seconds 2
+
+ # Clean up installer
+ Remove-Item $installerPath -ErrorAction SilentlyContinue
+ }
+
+ # Final verification using registry
+ Write-Host "`nVerifying ODBC Driver 18 installation..."
+ $verifyKey = Test-Path "HKLM:\SOFTWARE\ODBC\ODBCINST.INI\ODBC Driver 18 for SQL Server"
+
+ if ($verifyKey) {
+ $driverInfo = Get-ItemProperty -Path "HKLM:\SOFTWARE\ODBC\ODBCINST.INI\ODBC Driver 18 for SQL Server" -ErrorAction SilentlyContinue
+ Write-Host "✓ SUCCESS: ODBC Driver 18 for SQL Server is registered"
+ Write-Host " Driver: $($driverInfo.Driver)"
+ Write-Host " Setup: $($driverInfo.Setup)"
+ } else {
+ Write-Error "ODBC Driver 18 for SQL Server is not registered in ODBC"
+ Write-Host "`nListing all installed ODBC drivers from registry:"
+ Get-ChildItem "HKLM:\SOFTWARE\ODBC\ODBCINST.INI" -ErrorAction SilentlyContinue | ForEach-Object { Write-Host " - $($_.PSChildName)" }
+ exit 1
+ }
+
+ Write-Host "`nInstalling pyodbc..."
+ pip install pyodbc
+
+ Write-Host "`nRunning performance benchmarks..."
+ python benchmarks/perf-benchmarking.py
+ displayName: 'Run performance benchmarks on SQL Server 2022/2025'
+ condition: or(eq(variables['sqlVersion'], 'SQL2022'), eq(variables['sqlVersion'], 'SQL2025'))
+ continueOnError: true
+ env:
+ DB_CONNECTION_STRING: 'Server=localhost;Database=AdventureWorks2022;Uid=sa;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes'
+
+ - task: CopyFiles@2
inputs:
- PathtoPublish: 'mssql_python/ddbc_bindings.cp313-amd64.pyd'
- ArtifactName: 'ddbc_bindings'
- publishLocation: 'Container'
- displayName: 'Publish pyd file as artifact'
+ SourceFolder: 'mssql_python'
+ Contents: 'ddbc_bindings.cp*-amd64.pyd'
+ TargetFolder: '$(Build.ArtifactStagingDirectory)'
+ displayName: 'Copy pyd file to staging'
+
+ - task: CopyFiles@2
+ inputs:
+ SourceFolder: 'mssql_python'
+ Contents: 'ddbc_bindings.cp*-amd64.pdb'
+ TargetFolder: '$(Build.ArtifactStagingDirectory)'
+ displayName: 'Copy pdb file to staging'
- task: PublishBuildArtifacts@1
inputs:
- PathtoPublish: 'mssql_python/ddbc_bindings.cp313-amd64.pdb'
+ PathtoPublish: '$(Build.ArtifactStagingDirectory)'
ArtifactName: 'ddbc_bindings'
publishLocation: 'Container'
- displayName: 'Publish pdb file as artifact'
+ displayName: 'Publish build artifacts'
- task: PublishTestResults@2
condition: succeededOrFailed()
inputs:
- testResultsFiles: '**/test-results.xml'
- testRunTitle: 'Publish test results'
+ testResultsFiles: '**/test-results-*.xml'
+ testRunTitle: 'Publish test results for Windows $(sqlVersion)'
- - task: PublishCodeCoverageResults@1
- inputs:
- codeCoverageTool: 'Cobertura'
- summaryFileLocation: 'coverage.xml'
- displayName: 'Publish code coverage results'
+ # - task: PublishCodeCoverageResults@1
+ # inputs:
+ # codeCoverageTool: 'Cobertura'
+ # summaryFileLocation: 'coverage.xml'
+ # displayName: 'Publish code coverage results'
- job: PytestOnMacOS
+ displayName: 'macOS x86_64'
pool:
vmImage: 'macos-latest'
+ strategy:
+ matrix:
+ SQL2022:
+ sqlServerImage: 'mcr.microsoft.com/mssql/server:2022-latest'
+ sqlVersion: 'SQL2022'
+ SQL2025:
+ sqlServerImage: 'mcr.microsoft.com/mssql/server:2025-latest'
+ sqlVersion: 'SQL2025'
+
steps:
- task: UsePythonVersion@0
inputs:
@@ -90,6 +462,9 @@ jobs:
- script: |
brew update
+ # Uninstall existing CMake to avoid tap conflicts
+ brew uninstall cmake --ignore-dependencies || echo "CMake not installed or already removed"
+ # Install CMake from homebrew/core
brew install cmake
displayName: 'Install CMake'
@@ -110,13 +485,13 @@ jobs:
- script: |
# Pull and run SQL Server container
- docker pull mcr.microsoft.com/mssql/server:2022-latest
+ docker pull $(sqlServerImage)
docker run \
--name sqlserver \
-e ACCEPT_EULA=Y \
-e MSSQL_SA_PASSWORD="${DB_PASSWORD}" \
-p 1433:1433 \
- -d mcr.microsoft.com/mssql/server:2022-latest
+ -d $(sqlServerImage)
# Starting SQL Server container…
for i in {1..30}; do
@@ -147,22 +522,17 @@ jobs:
python -m pytest -v --junitxml=test-results.xml --cov=. --cov-report=xml --capture=tee-sys --cache-clear
displayName: 'Run pytest with coverage'
env:
- DB_CONNECTION_STRING: 'Driver=ODBC Driver 18 for SQL Server;Server=localhost;Database=master;Uid=SA;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes'
+ DB_CONNECTION_STRING: 'Server=tcp:127.0.0.1,1433;Database=master;Uid=SA;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes'
DB_PASSWORD: $(DB_PASSWORD)
- task: PublishTestResults@2
condition: succeededOrFailed()
inputs:
testResultsFiles: '**/test-results.xml'
- testRunTitle: 'Publish pytest results on macOS'
-
- - task: PublishCodeCoverageResults@1
- inputs:
- codeCoverageTool: 'Cobertura'
- summaryFileLocation: 'coverage.xml'
- displayName: 'Publish code coverage results'
+ testRunTitle: 'Publish pytest results on macOS $(sqlVersion)'
- job: PytestOnLinux
+ displayName: 'Linux x86_64'
pool:
vmImage: 'ubuntu-latest'
@@ -171,9 +541,29 @@ jobs:
Ubuntu:
dockerImage: 'ubuntu:22.04'
distroName: 'Ubuntu'
+ sqlServerImage: 'mcr.microsoft.com/mssql/server:2022-latest'
+ useAzureSQL: 'false'
+ Ubuntu_SQL2025:
+ dockerImage: 'ubuntu:22.04'
+ distroName: 'Ubuntu-SQL2025'
+ sqlServerImage: 'mcr.microsoft.com/mssql/server:2025-latest'
+ useAzureSQL: 'false'
+ ${{ if ne(variables['AZURE_CONNECTION_STRING'], '') }}:
+ Ubuntu_AzureSQL:
+ dockerImage: 'ubuntu:22.04'
+ distroName: 'Ubuntu-AzureSQL'
+ sqlServerImage: ''
+ useAzureSQL: 'true'
Debian:
dockerImage: 'debian:12'
distroName: 'Debian'
+ sqlServerImage: 'mcr.microsoft.com/mssql/server:2022-latest'
+ useAzureSQL: 'false'
+ Debian_SQL2025:
+ dockerImage: 'debian:12'
+ distroName: 'Debian-SQL2025'
+ sqlServerImage: 'mcr.microsoft.com/mssql/server:2025-latest'
+ useAzureSQL: 'false'
steps:
- script: |
@@ -192,7 +582,7 @@ jobs:
-e ACCEPT_EULA=Y \
-e MSSQL_SA_PASSWORD="$(DB_PASSWORD)" \
-p 1433:1433 \
- mcr.microsoft.com/mssql/server:2022-latest
+ $(sqlServerImage)
# Wait for SQL Server to be ready
echo "Waiting for SQL Server to start..."
@@ -218,6 +608,7 @@ jobs:
-P "$(DB_PASSWORD)" \
-C -Q "CREATE DATABASE TestDB"
displayName: 'Start SQL Server container for $(distroName)'
+ condition: eq(variables['useAzureSQL'], 'false')
env:
DB_PASSWORD: $(DB_PASSWORD)
@@ -316,20 +707,121 @@ jobs:
- script: |
# Run tests in the container
- # Get SQL Server container IP
- SQLSERVER_IP=$(docker inspect sqlserver-$(distroName) --format='{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}')
- echo "SQL Server IP: $SQLSERVER_IP"
-
- docker exec \
- -e DB_CONNECTION_STRING="Driver=ODBC Driver 18 for SQL Server;Server=$SQLSERVER_IP;Database=TestDB;Uid=SA;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes" \
- -e DB_PASSWORD="$(DB_PASSWORD)" \
- test-container-$(distroName) bash -c "
- source /opt/venv/bin/activate
- echo 'Build successful, running tests now on $(distroName)'
- echo 'Using connection string: Driver=ODBC Driver 18 for SQL Server;Server=$SQLSERVER_IP;Database=TestDB;Uid=SA;Pwd=***;TrustServerCertificate=yes'
- python -m pytest -v --junitxml=test-results-$(distroName).xml --cov=. --cov-report=xml:coverage-$(distroName).xml --capture=tee-sys --cache-clear
- "
+ if [ "$(useAzureSQL)" = "true" ]; then
+ # Azure SQL Database testing
+ echo "Testing against Azure SQL Database"
+
+ docker exec \
+ -e DB_CONNECTION_STRING="$(AZURE_CONNECTION_STRING)" \
+ test-container-$(distroName) bash -c "
+ source /opt/venv/bin/activate
+ echo 'Build successful, running tests now on $(distroName) with Azure SQL'
+ echo 'Using Azure SQL connection string'
+ python -m pytest -v --junitxml=test-results-$(distroName).xml --cov=. --cov-report=xml:coverage-$(distroName).xml --capture=tee-sys --cache-clear
+ "
+ else
+ # Local SQL Server testing
+ SQLSERVER_IP=$(docker inspect sqlserver-$(distroName) --format='{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}')
+ echo "SQL Server IP: $SQLSERVER_IP"
+
+ docker exec \
+ -e DB_CONNECTION_STRING="Server=$SQLSERVER_IP;Database=TestDB;Uid=SA;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes" \
+ -e DB_PASSWORD="$(DB_PASSWORD)" \
+ test-container-$(distroName) bash -c "
+ source /opt/venv/bin/activate
+ echo 'Build successful, running tests now on $(distroName)'
+ echo 'Using connection string: Server=$SQLSERVER_IP;Database=TestDB;Uid=SA;Pwd=***;TrustServerCertificate=yes'
+ python -m pytest -v --junitxml=test-results-$(distroName).xml --cov=. --cov-report=xml:coverage-$(distroName).xml --capture=tee-sys --cache-clear
+ "
+ fi
displayName: 'Run pytest with coverage in $(distroName) container'
+ condition: or(eq(variables['useAzureSQL'], 'false'), and(eq(variables['useAzureSQL'], 'true'), ne(variables['AZURE_CONNECTION_STRING'], '')))
+ env:
+ DB_PASSWORD: $(DB_PASSWORD)
+
+ - script: |
+ # Download and restore AdventureWorks2022 database for benchmarking on Ubuntu only
+ if [ "$(distroName)" = "Ubuntu" ] && [ "$(useAzureSQL)" = "false" ]; then
+ echo "Downloading AdventureWorks2022.bak..."
+ wget -q https://github.com/Microsoft/sql-server-samples/releases/download/adventureworks/AdventureWorks2022.bak -O /tmp/AdventureWorks2022.bak
+
+ echo "Copying backup file into SQL Server container..."
+ docker cp /tmp/AdventureWorks2022.bak sqlserver-$(distroName):/tmp/AdventureWorks2022.bak
+
+ echo "Restoring AdventureWorks2022 database..."
+ docker exec sqlserver-$(distroName) /opt/mssql-tools18/bin/sqlcmd \
+ -S localhost \
+ -U SA \
+ -P "$(DB_PASSWORD)" \
+ -C \
+ -Q "RESTORE DATABASE AdventureWorks2022 FROM DISK = '/tmp/AdventureWorks2022.bak' WITH MOVE 'AdventureWorks2022' TO '/var/opt/mssql/data/AdventureWorks2022.mdf', MOVE 'AdventureWorks2022_log' TO '/var/opt/mssql/data/AdventureWorks2022_log.ldf', REPLACE"
+
+ if [ $? -eq 0 ]; then
+ echo "AdventureWorks2022 database restored successfully"
+ else
+ echo "Failed to restore AdventureWorks2022 database"
+ fi
+
+ # Clean up (ignore errors if files are locked)
+ rm -f /tmp/AdventureWorks2022.bak || true
+ docker exec sqlserver-$(distroName) rm -f /tmp/AdventureWorks2022.bak || true
+ fi
+ displayName: 'Download and restore AdventureWorks2022 database in $(distroName)'
+ condition: and(eq(variables['distroName'], 'Ubuntu'), eq(variables['useAzureSQL'], 'false'))
+ continueOnError: true
+ env:
+ DB_PASSWORD: $(DB_PASSWORD)
+
+ - script: |
+ # Run performance benchmarks on Ubuntu with SQL Server 2022 only
+ if [ "$(distroName)" = "Ubuntu" ] && [ "$(useAzureSQL)" = "false" ]; then
+ SQLSERVER_IP=$(docker inspect sqlserver-$(distroName) --format='{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}')
+ echo "Running performance benchmarks on Ubuntu with SQL Server IP: $SQLSERVER_IP"
+
+ docker exec \
+ -e DB_CONNECTION_STRING="Server=$SQLSERVER_IP;Database=AdventureWorks2022;Uid=SA;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes" \
+ test-container-$(distroName) bash -c "
+ source /opt/venv/bin/activate
+
+ echo 'Reinstalling ODBC Driver for benchmarking...'
+ export DEBIAN_FRONTEND=noninteractive
+
+ # Remove duplicate repository sources if they exist
+ rm -f /etc/apt/sources.list.d/microsoft-prod.list
+
+ # Add Microsoft repository
+ curl -sSL https://packages.microsoft.com/keys/microsoft.asc | apt-key add -
+ curl -sSL https://packages.microsoft.com/config/ubuntu/22.04/prod.list > /etc/apt/sources.list.d/mssql-release.list
+
+ # Update package lists
+ apt-get update -qq
+
+ # Install unixodbc and its dependencies first (provides libodbcinst.so.2 needed by msodbcsql18)
+ echo 'Installing unixODBC dependencies...'
+ apt-get install -y --no-install-recommends unixodbc unixodbc-dev libodbc1 odbcinst odbcinst1debian2
+
+ # Verify libodbcinst.so.2 is available
+ ldconfig
+ ls -la /usr/lib/x86_64-linux-gnu/libodbcinst.so.2 || echo 'Warning: libodbcinst.so.2 not found'
+
+ # Install ODBC Driver 18
+ echo 'Installing msodbcsql18...'
+ ACCEPT_EULA=Y apt-get install -y msodbcsql18
+
+ # Verify ODBC driver installation
+ odbcinst -q -d -n 'ODBC Driver 18 for SQL Server' || echo 'Warning: ODBC Driver 18 not registered'
+
+ echo 'Installing pyodbc for benchmarking...'
+ pip install pyodbc
+ echo 'Running performance benchmarks on $(distroName)'
+ python benchmarks/perf-benchmarking.py || echo 'Performance benchmark failed or database not available'
+ "
+ else
+ echo "Skipping performance benchmarks on $(distroName) (only runs on Ubuntu with local SQL Server)"
+ fi
+ displayName: 'Run performance benchmarks in $(distroName) container'
+ condition: and(eq(variables['distroName'], 'Ubuntu'), eq(variables['useAzureSQL'], 'false'))
+ continueOnError: true
env:
DB_PASSWORD: $(DB_PASSWORD)
@@ -344,8 +836,10 @@ jobs:
# Clean up containers
docker stop test-container-$(distroName) || true
docker rm test-container-$(distroName) || true
- docker stop sqlserver-$(distroName) || true
- docker rm sqlserver-$(distroName) || true
+ if [ "$(useAzureSQL)" = "false" ]; then
+ docker stop sqlserver-$(distroName) || true
+ docker rm sqlserver-$(distroName) || true
+ fi
displayName: 'Clean up $(distroName) containers'
condition: always()
@@ -355,13 +849,8 @@ jobs:
testResultsFiles: '**/test-results-$(distroName).xml'
testRunTitle: 'Publish pytest results on $(distroName)'
- - task: PublishCodeCoverageResults@1
- inputs:
- codeCoverageTool: 'Cobertura'
- summaryFileLocation: 'coverage-$(distroName).xml'
- displayName: 'Publish code coverage results for $(distroName)'
-
- job: PytestOnLinux_ARM64
+ displayName: 'Linux ARM64'
pool:
vmImage: 'ubuntu-latest'
@@ -513,6 +1002,7 @@ jobs:
./build.sh
"
displayName: 'Build pybind bindings (.so) in $(distroName) ARM64 container'
+ retryCountOnTaskFailure: 2
- script: |
# Uninstall ODBC Driver before running tests
@@ -537,13 +1027,13 @@ jobs:
echo "SQL Server IP: $SQLSERVER_IP"
docker exec \
- -e DB_CONNECTION_STRING="Driver=ODBC Driver 18 for SQL Server;Server=$SQLSERVER_IP;Database=TestDB;Uid=SA;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes" \
+ -e DB_CONNECTION_STRING="Server=$SQLSERVER_IP;Database=TestDB;Uid=SA;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes" \
-e DB_PASSWORD="$(DB_PASSWORD)" \
test-container-$(distroName)-$(archName) bash -c "
source /opt/venv/bin/activate
echo 'Build successful, running tests now on $(distroName) ARM64'
echo 'Architecture:' \$(uname -m)
- echo 'Using connection string: Driver=ODBC Driver 18 for SQL Server;Server=$SQLSERVER_IP;Database=TestDB;Uid=SA;Pwd=***;TrustServerCertificate=yes'
+ echo 'Using connection string: Server=$SQLSERVER_IP;Database=TestDB;Uid=SA;Pwd=***;TrustServerCertificate=yes'
python main.py
python -m pytest -v --junitxml=test-results-$(distroName)-$(archName).xml --cov=. --cov-report=xml:coverage-$(distroName)-$(archName).xml --capture=tee-sys --cache-clear
"
@@ -573,13 +1063,8 @@ jobs:
testResultsFiles: '**/test-results-$(distroName)-$(archName).xml'
testRunTitle: 'Publish pytest results on $(distroName) ARM64'
- - task: PublishCodeCoverageResults@1
- inputs:
- codeCoverageTool: 'Cobertura'
- summaryFileLocation: 'coverage-$(distroName)-$(archName).xml'
- displayName: 'Publish code coverage results for $(distroName) ARM64'
-
- job: PytestOnLinux_RHEL9
+ displayName: 'Linux RedHat x86_64'
pool:
vmImage: 'ubuntu-latest'
@@ -637,18 +1122,17 @@ jobs:
dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm
subscription-manager repos --enable codeready-builder-for-rhel-9-$(arch)-rpms || dnf config-manager --set-enabled ubi-9-codeready-builder
- # Install Python 3.9 (available in RHEL 9 UBI) and development tools
- dnf install -y python3 python3-pip python3-devel cmake curl wget gnupg2 glibc-devel kernel-headers
- dnf install -y python3-libs python3-debug
- dnf install -y gcc gcc-c++ make binutils
- dnf install -y cmake
+ # Install Python 3.12 (available in RHEL 9.4+) and development tools
+ # Note: curl and wget omitted to avoid conflicts with curl-minimal
+ dnf install -y python3.12 python3.12-pip python3.12-devel python3.12-libs gnupg2 glibc-devel kernel-headers
+ dnf install -y gcc gcc-c++ make binutils cmake
# If that doesn't work, try installing from different repositories
if ! which gcc; then
echo 'Trying alternative gcc installation...'
dnf --enablerepo=ubi-9-codeready-builder install -y gcc gcc-c++
fi
# Verify installation
- python3 --version
+ python3.12 --version
which gcc && which g++
gcc --version
g++ --version
@@ -700,8 +1184,8 @@ jobs:
- script: |
# Install Python dependencies in the container using virtual environment
docker exec test-container-rhel9 bash -c "
- # Create a virtual environment with Python 3.9
- python3 -m venv myvenv
+ # Create a virtual environment with Python 3.12
+ python3.12 -m venv myvenv
source myvenv/bin/activate
# Install dependencies in the virtual environment
@@ -717,7 +1201,7 @@ jobs:
# Build pybind bindings in the container
docker exec test-container-rhel9 bash -c "
source myvenv/bin/activate
- ls /usr/include/python3.9
+ ls /usr/include/python3.12
# Set compiler environment variables
export CC=/usr/bin/gcc
export CXX=/usr/bin/g++
@@ -750,12 +1234,12 @@ jobs:
echo "SQL Server IP: $SQLSERVER_IP"
docker exec \
- -e DB_CONNECTION_STRING="Driver=ODBC Driver 18 for SQL Server;Server=$SQLSERVER_IP;Database=TestDB;Uid=SA;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes" \
+ -e DB_CONNECTION_STRING="Server=$SQLSERVER_IP;Database=TestDB;Uid=SA;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes" \
-e DB_PASSWORD="$(DB_PASSWORD)" \
test-container-rhel9 bash -c "
source myvenv/bin/activate
echo 'Build successful, running tests now on RHEL 9'
- echo 'Using connection string: Driver=ODBC Driver 18 for SQL Server;Server=$SQLSERVER_IP;Database=TestDB;Uid=SA;Pwd=***;TrustServerCertificate=yes'
+ echo 'Using connection string: Server=$SQLSERVER_IP;Database=TestDB;Uid=SA;Pwd=***;TrustServerCertificate=yes'
python main.py
python -m pytest -v --junitxml=test-results-rhel9.xml --cov=. --cov-report=xml:coverage-rhel9.xml --capture=tee-sys --cache-clear
"
@@ -785,13 +1269,8 @@ jobs:
testResultsFiles: '**/test-results-rhel9.xml'
testRunTitle: 'Publish pytest results on RHEL 9'
- - task: PublishCodeCoverageResults@1
- inputs:
- codeCoverageTool: 'Cobertura'
- summaryFileLocation: 'coverage-rhel9.xml'
- displayName: 'Publish code coverage results for RHEL 9'
-
- job: PytestOnLinux_RHEL9_ARM64
+ displayName: 'Linux RedHat ARM64'
pool:
vmImage: 'ubuntu-latest'
@@ -858,18 +1337,17 @@ jobs:
dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm
subscription-manager repos --enable codeready-builder-for-rhel-9-$(arch)-rpms || dnf config-manager --set-enabled ubi-9-codeready-builder
- # Install Python 3.9 (available in RHEL 9 UBI) and development tools
- dnf install -y python3 python3-pip python3-devel cmake curl wget gnupg2 glibc-devel kernel-headers
- dnf install -y python3-libs python3-debug
- dnf install -y gcc gcc-c++ make binutils
- dnf install -y cmake
+ # Install Python 3.12 (available in RHEL 9.4+) and development tools
+ # Note: curl and wget omitted to avoid conflicts with curl-minimal
+ dnf install -y python3.12 python3.12-pip python3.12-devel python3.12-libs gnupg2 glibc-devel kernel-headers
+ dnf install -y gcc gcc-c++ make binutils cmake
# If that doesn't work, try installing from different repositories
if ! which gcc; then
echo 'Trying alternative gcc installation...'
dnf --enablerepo=ubi-9-codeready-builder install -y gcc gcc-c++
fi
# Verify installation and architecture
- python3 --version
+ python3.12 --version
which gcc && which g++
gcc --version
g++ --version
@@ -924,8 +1402,8 @@ jobs:
- script: |
# Install Python dependencies in the container using virtual environment
docker exec test-container-rhel9-arm64 bash -c "
- # Create a virtual environment with Python 3.9
- python3 -m venv myvenv
+ # Create a virtual environment with Python 3.12
+ python3.12 -m venv myvenv
source myvenv/bin/activate
# Install dependencies in the virtual environment
@@ -941,7 +1419,7 @@ jobs:
# Build pybind bindings in the ARM64 container
docker exec test-container-rhel9-arm64 bash -c "
source myvenv/bin/activate
- ls /usr/include/python3.9
+ ls /usr/include/python3.12
# Set compiler environment variables
export CC=/usr/bin/gcc
export CXX=/usr/bin/g++
@@ -951,6 +1429,7 @@ jobs:
./build.sh
"
displayName: 'Build pybind bindings (.so) in RHEL 9 ARM64 container'
+ retryCountOnTaskFailure: 2
- script: |
# Uninstall ODBC Driver before running tests
@@ -974,13 +1453,13 @@ jobs:
echo "SQL Server IP: $SQLSERVER_IP"
docker exec \
- -e DB_CONNECTION_STRING="Driver=ODBC Driver 18 for SQL Server;Server=$SQLSERVER_IP;Database=TestDB;Uid=SA;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes" \
+ -e DB_CONNECTION_STRING="Server=$SQLSERVER_IP;Database=TestDB;Uid=SA;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes" \
-e DB_PASSWORD="$(DB_PASSWORD)" \
test-container-rhel9-arm64 bash -c "
source myvenv/bin/activate
echo 'Build successful, running tests now on RHEL 9 ARM64'
echo 'Architecture:' \$(uname -m)
- echo 'Using connection string: Driver=ODBC Driver 18 for SQL Server;Server=$SQLSERVER_IP;Database=TestDB;Uid=SA;Pwd=***;TrustServerCertificate=yes'
+ echo 'Using connection string: Server=$SQLSERVER_IP;Database=TestDB;Uid=SA;Pwd=***;TrustServerCertificate=yes'
python -m pytest -v --junitxml=test-results-rhel9-arm64.xml --cov=. --cov-report=xml:coverage-rhel9-arm64.xml --capture=tee-sys --cache-clear
"
displayName: 'Run pytest with coverage in RHEL 9 ARM64 container'
@@ -1009,8 +1488,565 @@ jobs:
testResultsFiles: '**/test-results-rhel9-arm64.xml'
testRunTitle: 'Publish pytest results on RHEL 9 ARM64'
- - task: PublishCodeCoverageResults@1
+- job: PytestOnLinux_Alpine
+ displayName: 'Linux Alpine x86_64'
+ pool:
+ vmImage: 'ubuntu-latest'
+
+ steps:
+ - script: |
+ # Set up Docker buildx for multi-architecture support
+ docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
+ docker buildx create --name multiarch --driver docker-container --use
+ docker buildx inspect --bootstrap
+ displayName: 'Setup Docker buildx for multi-architecture support'
+
+ - script: |
+ # Create a Docker container for testing on x86_64
+ docker run -d --name test-container-alpine \
+ --platform linux/amd64 \
+ -v $(Build.SourcesDirectory):/workspace \
+ -w /workspace \
+ --network bridge \
+ alpine:latest \
+ tail -f /dev/null
+ displayName: 'Create Alpine x86_64 container'
+
+ - script: |
+ # Start SQL Server container (x86_64)
+ docker run -d --name sqlserver-alpine \
+ --platform linux/amd64 \
+ -e ACCEPT_EULA=Y \
+ -e MSSQL_SA_PASSWORD="$(DB_PASSWORD)" \
+ -p 1433:1433 \
+ mcr.microsoft.com/mssql/server:2022-latest
+
+ # Wait for SQL Server to be ready
+ echo "Waiting for SQL Server to start..."
+ for i in {1..60}; do
+ if docker exec sqlserver-alpine \
+ /opt/mssql-tools18/bin/sqlcmd \
+ -S localhost \
+ -U SA \
+ -P "$(DB_PASSWORD)" \
+ -C -Q "SELECT 1" >/dev/null 2>&1; then
+ echo "SQL Server is ready!"
+ break
+ fi
+ echo "Waiting... ($i/60)"
+ sleep 2
+ done
+
+ # Create test database
+ docker exec sqlserver-alpine \
+ /opt/mssql-tools18/bin/sqlcmd \
+ -S localhost \
+ -U SA \
+ -P "$(DB_PASSWORD)" \
+ -C -Q "CREATE DATABASE TestDB"
+ displayName: 'Start SQL Server container for Alpine x86_64'
+ env:
+ DB_PASSWORD: $(DB_PASSWORD)
+
+ - script: |
+ # Install dependencies in the Alpine x86_64 container
+ docker exec test-container-alpine sh -c "
+ # Update package index
+ apk update
+
+ # Install build tools and system dependencies
+ apk add --no-cache \
+ build-base \
+ cmake \
+ clang \
+ git \
+ bash \
+ wget \
+ curl \
+ gnupg \
+ unixodbc \
+ unixodbc-dev \
+ libffi-dev \
+ openssl-dev \
+ zlib-dev \
+ py3-pip \
+ python3-dev \
+ patchelf
+
+ # Create symlinks for Python compatibility
+ ln -sf python3 /usr/bin/python || true
+ ln -sf pip3 /usr/bin/pip || true
+
+ # Verify installation and architecture
+ uname -m
+ python --version
+ which cmake
+ "
+ displayName: 'Install basic dependencies in Alpine x86_64 container'
+
+ - script: |
+ # Install ODBC driver in the Alpine x86_64 container
+ docker exec test-container-alpine bash -c "
+ # Detect architecture for ODBC driver download
+ case \$(uname -m) in
+ x86_64) architecture='amd64' ;;
+ arm64|aarch64) architecture='arm64' ;;
+ *) architecture='unsupported' ;;
+ esac
+
+ if [[ 'unsupported' == '\$architecture' ]]; then
+ echo 'Alpine architecture \$(uname -m) is not currently supported.'
+ exit 1
+ fi
+
+ echo 'Detected architecture: '\$architecture
+
+ # Download the packages
+ curl -O https://download.microsoft.com/download/fae28b9a-d880-42fd-9b98-d779f0fdd77f/msodbcsql18_18.5.1.1-1_\$architecture.apk
+ curl -O https://download.microsoft.com/download/7/6d/76de322a-d860-4894-9945-f0cc5d6a45f8/mssql-tools18_18.4.1.1-1_\$architecture.apk
+
+ # Download signatures for verification
+ curl -O https://download.microsoft.com/download/fae28b9a-d880-42fd-9b98-d779f0fdd77f/msodbcsql18_18.5.1.1-1_\$architecture.sig
+ curl -O https://download.microsoft.com/download/7/6d/76de322a-d860-4894-9945-f0cc5d6a45f8/mssql-tools18_18.4.1.1-1_\$architecture.sig
+
+ # Import Microsoft GPG key and verify packages
+ curl https://packages.microsoft.com/keys/microsoft.asc | gpg --import -
+ gpg --verify msodbcsql18_18.5.1.1-1_\$architecture.sig msodbcsql18_18.5.1.1-1_\$architecture.apk
+ gpg --verify mssql-tools18_18.4.1.1-1_\$architecture.sig mssql-tools18_18.4.1.1-1_\$architecture.apk
+
+ # Install the packages
+ apk add --allow-untrusted msodbcsql18_18.5.1.1-1_\$architecture.apk
+ apk add --allow-untrusted mssql-tools18_18.4.1.1-1_\$architecture.apk
+
+ # Cleanup
+ rm -f msodbcsql18_18.5.1.1-1_\$architecture.* mssql-tools18_18.4.1.1-1_\$architecture.*
+
+ # Add mssql-tools to PATH
+ export PATH=\"\$PATH:/opt/mssql-tools18/bin\"
+ echo 'export PATH=\"\$PATH:/opt/mssql-tools18/bin\"' >> ~/.bashrc
+ "
+ displayName: 'Install ODBC Driver in Alpine x86_64 container'
+
+ - script: |
+ # Install Python dependencies in the Alpine x86_64 container using virtual environment
+ docker exec test-container-alpine bash -c "
+ # Create virtual environment
+ python -m venv /workspace/venv
+
+ # Activate virtual environment and install dependencies
+ source /workspace/venv/bin/activate
+
+ # Upgrade pip and install dependencies
+ python -m pip install --upgrade pip
+ python -m pip install -r requirements.txt
+
+ # Verify virtual environment is active
+ which python
+ which pip
+ "
+ displayName: 'Install Python dependencies in Alpine x86_64 container'
+
+ - script: |
+ # Build pybind bindings in the Alpine x86_64 container
+ docker exec test-container-alpine bash -c "
+ # Activate virtual environment
+ source /workspace/venv/bin/activate
+
+ cd mssql_python/pybind
+ chmod +x build.sh
+ ./build.sh
+ "
+ displayName: 'Build pybind bindings (.so) in Alpine x86_64 container'
+
+ - script: |
+ # Uninstall ODBC Driver before running tests to use bundled libraries
+ docker exec test-container-alpine bash -c "
+ # Remove system ODBC installation
+ apk del msodbcsql18 mssql-tools18 unixodbc-dev || echo 'ODBC packages not installed via apk'
+ rm -f /usr/bin/sqlcmd
+ rm -f /usr/bin/bcp
+ rm -rf /opt/microsoft/msodbcsql18
+ rm -f /usr/lib/libodbcinst.so.2
+ odbcinst -u -d -n 'ODBC Driver 18 for SQL Server' || true
+ echo 'Uninstalled system ODBC Driver and cleaned up libraries'
+ echo 'Verifying x86_64 alpine driver library signatures:'
+ ldd mssql_python/libs/linux/alpine/x86_64/lib/libmsodbcsql-18.5.so.1.1 || echo 'Driver library not found'
+ "
+ displayName: 'Uninstall system ODBC Driver before running tests in Alpine x86_64 container'
+
+ - script: |
+ # Run tests in the Alpine x86_64 container
+ # Get SQL Server container IP
+ SQLSERVER_IP=$(docker inspect sqlserver-alpine --format='{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}')
+ echo "SQL Server IP: $SQLSERVER_IP"
+
+ docker exec \
+ -e DB_CONNECTION_STRING="Server=$SQLSERVER_IP;Database=TestDB;Uid=SA;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes" \
+ -e DB_PASSWORD="$(DB_PASSWORD)" \
+ test-container-alpine bash -c "
+ echo 'Build successful, running tests now on Alpine x86_64'
+ echo 'Architecture:' \$(uname -m)
+ echo 'Alpine version:' \$(cat /etc/alpine-release)
+ echo 'Using connection string: Server=$SQLSERVER_IP;Database=TestDB;Uid=SA;Pwd=***;TrustServerCertificate=yes'
+
+ # Activate virtual environment
+ source /workspace/venv/bin/activate
+
+ # Test basic Python import first
+ python -c 'import mssql_python; print(\"mssql_python imported successfully\")'
+
+ # Run main.py if it exists
+ if [ -f main.py ]; then
+ echo 'Running main.py...'
+ python main.py
+ fi
+
+ # Run pytest
+ python -m pytest -v --junitxml=test-results-alpine.xml --cov=. --cov-report=xml:coverage-alpine.xml --capture=tee-sys --cache-clear
+ "
+ displayName: 'Run pytest with coverage in Alpine x86_64 container'
+ env:
+ DB_PASSWORD: $(DB_PASSWORD)
+
+ - script: |
+ # Copy test results from container to host
+ docker cp test-container-alpine:/workspace/test-results-alpine.xml $(Build.SourcesDirectory)/ || echo 'Failed to copy test results'
+ docker cp test-container-alpine:/workspace/coverage-alpine.xml $(Build.SourcesDirectory)/ || echo 'Failed to copy coverage results'
+ displayName: 'Copy test results from Alpine x86_64 container'
+ condition: always()
+
+ - script: |
+ # Clean up containers
+ docker stop test-container-alpine || true
+ docker rm test-container-alpine || true
+ docker stop sqlserver-alpine || true
+ docker rm sqlserver-alpine || true
+ displayName: 'Clean up Alpine x86_64 containers'
+ condition: always()
+
+ - task: PublishTestResults@2
+ condition: succeededOrFailed()
+ inputs:
+ testResultsFiles: '**/test-results-alpine.xml'
+ testRunTitle: 'Publish pytest results on Alpine x86_64'
+
+- job: PytestOnLinux_Alpine_ARM64
+ displayName: 'Linux Alpine ARM64'
+ pool:
+ vmImage: 'ubuntu-latest'
+
+ steps:
+ - script: |
+ # Set up Docker buildx for multi-architecture support
+ docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
+ docker buildx create --name multiarch --driver docker-container --use
+ docker buildx inspect --bootstrap
+ displayName: 'Setup Docker buildx for ARM64 emulation'
+
+ - script: |
+ # Create a Docker container for testing on ARM64
+ # TODO(AB#40901): Temporary pin to 3.22 due to msodbcsql ARM64 package arch mismatch
+ # Revert to alpine:latest once ODBC team releases fixed ARM64 package
+ docker run -d --name test-container-alpine-arm64 \
+ --platform linux/arm64 \
+ -v $(Build.SourcesDirectory):/workspace \
+ -w /workspace \
+ --network bridge \
+ alpine:3.22 \
+ tail -f /dev/null
+ displayName: 'Create Alpine ARM64 container'
+
+ - script: |
+ # Start SQL Server container (x86_64 - SQL Server doesn't support ARM64)
+ docker run -d --name sqlserver-alpine-arm64 \
+ --platform linux/amd64 \
+ -e ACCEPT_EULA=Y \
+ -e MSSQL_SA_PASSWORD="$(DB_PASSWORD)" \
+ -p 1433:1433 \
+ mcr.microsoft.com/mssql/server:2022-latest
+
+ # Wait for SQL Server to be ready
+ echo "Waiting for SQL Server to start..."
+ for i in {1..60}; do
+ if docker exec sqlserver-alpine-arm64 \
+ /opt/mssql-tools18/bin/sqlcmd \
+ -S localhost \
+ -U SA \
+ -P "$(DB_PASSWORD)" \
+ -C -Q "SELECT 1" >/dev/null 2>&1; then
+ echo "SQL Server is ready!"
+ break
+ fi
+ echo "Waiting... ($i/60)"
+ sleep 2
+ done
+
+ # Create test database
+ docker exec sqlserver-alpine-arm64 \
+ /opt/mssql-tools18/bin/sqlcmd \
+ -S localhost \
+ -U SA \
+ -P "$(DB_PASSWORD)" \
+ -C -Q "CREATE DATABASE TestDB"
+ displayName: 'Start SQL Server container for Alpine ARM64'
+ env:
+ DB_PASSWORD: $(DB_PASSWORD)
+
+ - script: |
+ # Install dependencies in the Alpine ARM64 container
+ docker exec test-container-alpine-arm64 sh -c "
+ # Update package index
+ apk update
+
+ # Install build tools and system dependencies
+ apk add --no-cache \
+ build-base \
+ cmake \
+ clang \
+ git \
+ bash \
+ wget \
+ curl \
+ gnupg \
+ unixodbc \
+ unixodbc-dev \
+ libffi-dev \
+ openssl-dev \
+ zlib-dev \
+ py3-pip \
+ python3-dev \
+ patchelf
+
+ # Create symlinks for Python compatibility
+ ln -sf python3 /usr/bin/python || true
+ ln -sf pip3 /usr/bin/pip || true
+
+ # Verify installation and architecture
+ uname -m
+ python --version
+ which cmake
+ "
+ displayName: 'Install basic dependencies in Alpine ARM64 container'
+
+ - script: |
+ # Install ODBC driver in the Alpine ARM64 container
+ docker exec test-container-alpine-arm64 bash -c "
+ # Detect architecture for ODBC driver download
+ case \$(uname -m) in
+ x86_64) architecture='amd64' ;;
+ arm64|aarch64) architecture='arm64' ;;
+ *) architecture='unsupported' ;;
+ esac
+
+ if [[ 'unsupported' == '\$architecture' ]]; then
+ echo 'Alpine architecture \$(uname -m) is not currently supported.'
+ exit 1
+ fi
+
+ echo 'Detected architecture: '\$architecture
+
+ # Download the packages
+ curl -O https://download.microsoft.com/download/fae28b9a-d880-42fd-9b98-d779f0fdd77f/msodbcsql18_18.5.1.1-1_\$architecture.apk
+ curl -O https://download.microsoft.com/download/7/6d/76de322a-d860-4894-9945-f0cc5d6a45f8/mssql-tools18_18.4.1.1-1_\$architecture.apk
+
+ # Download signatures for verification
+ curl -O https://download.microsoft.com/download/fae28b9a-d880-42fd-9b98-d779f0fdd77f/msodbcsql18_18.5.1.1-1_\$architecture.sig
+ curl -O https://download.microsoft.com/download/7/6d/76de322a-d860-4894-9945-f0cc5d6a45f8/mssql-tools18_18.4.1.1-1_\$architecture.sig
+
+ # Import Microsoft GPG key and verify packages
+ curl https://packages.microsoft.com/keys/microsoft.asc | gpg --import -
+ gpg --verify msodbcsql18_18.5.1.1-1_\$architecture.sig msodbcsql18_18.5.1.1-1_\$architecture.apk
+ gpg --verify mssql-tools18_18.4.1.1-1_\$architecture.sig mssql-tools18_18.4.1.1-1_\$architecture.apk
+
+ # Install the packages
+ apk add --allow-untrusted msodbcsql18_18.5.1.1-1_\$architecture.apk
+ apk add --allow-untrusted mssql-tools18_18.4.1.1-1_\$architecture.apk
+
+ # Cleanup
+ rm -f msodbcsql18_18.5.1.1-1_\$architecture.* mssql-tools18_18.4.1.1-1_\$architecture.*
+
+ # Add mssql-tools to PATH
+ export PATH=\"\$PATH:/opt/mssql-tools18/bin\"
+ echo 'export PATH=\"\$PATH:/opt/mssql-tools18/bin\"' >> ~/.bashrc
+ "
+ displayName: 'Install ODBC Driver in Alpine ARM64 container'
+
+ - script: |
+ # Install Python dependencies in the Alpine ARM64 container using virtual environment
+ docker exec test-container-alpine-arm64 bash -c "
+ # Create virtual environment
+ python -m venv /workspace/venv
+
+ # Activate virtual environment and install dependencies
+ source /workspace/venv/bin/activate
+
+ # Upgrade pip and install dependencies
+ python -m pip install --upgrade pip
+ python -m pip install -r requirements.txt
+
+ # Verify virtual environment is active
+ which python
+ which pip
+ "
+ displayName: 'Install Python dependencies in Alpine ARM64 container'
+
+ - script: |
+ # Build pybind bindings in the Alpine ARM64 container
+ docker exec test-container-alpine-arm64 bash -c "
+ # Activate virtual environment
+ source /workspace/venv/bin/activate
+
+ cd mssql_python/pybind
+ chmod +x build.sh
+ ./build.sh
+ "
+ displayName: 'Build pybind bindings (.so) in Alpine ARM64 container'
+ retryCountOnTaskFailure: 2
+
+ - script: |
+ # Uninstall ODBC Driver before running tests to use bundled libraries
+ docker exec test-container-alpine-arm64 bash -c "
+ # Remove system ODBC installation
+ apk del msodbcsql18 mssql-tools18 unixodbc-dev || echo 'ODBC packages not installed via apk'
+ rm -f /usr/bin/sqlcmd
+ rm -f /usr/bin/bcp
+ rm -rf /opt/microsoft/msodbcsql18
+ rm -f /usr/lib/libodbcinst.so.2
+ odbcinst -u -d -n 'ODBC Driver 18 for SQL Server' || true
+ echo 'Uninstalled system ODBC Driver and cleaned up libraries'
+ echo 'Verifying arm64 alpine driver library signatures:'
+ ldd mssql_python/libs/linux/alpine/arm64/lib/libmsodbcsql-18.5.so.1.1 || echo 'Driver library not found'
+ "
+ displayName: 'Uninstall system ODBC Driver before running tests in Alpine ARM64 container'
+
+ - script: |
+ # Run tests in the Alpine ARM64 container
+ # Get SQL Server container IP
+ SQLSERVER_IP=$(docker inspect sqlserver-alpine-arm64 --format='{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}')
+ echo "SQL Server IP: $SQLSERVER_IP"
+
+ docker exec \
+ -e DB_CONNECTION_STRING="Server=$SQLSERVER_IP;Database=TestDB;Uid=SA;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes" \
+ -e DB_PASSWORD="$(DB_PASSWORD)" \
+ test-container-alpine-arm64 bash -c "
+ echo 'Build successful, running tests now on Alpine ARM64'
+ echo 'Architecture:' \$(uname -m)
+ echo 'Alpine version:' \$(cat /etc/alpine-release)
+ echo 'Using connection string: Server=$SQLSERVER_IP;Database=TestDB;Uid=SA;Pwd=***;TrustServerCertificate=yes'
+
+ # Activate virtual environment
+ source /workspace/venv/bin/activate
+
+ # Test basic Python import first
+ python -c 'import mssql_python; print(\"mssql_python imported successfully\")'
+
+ # Run main.py if it exists
+ if [ -f main.py ]; then
+ echo 'Running main.py...'
+ python main.py
+ fi
+
+ # Run pytest
+ python -m pytest -v --junitxml=test-results-alpine-arm64.xml --cov=. --cov-report=xml:coverage-alpine-arm64.xml --capture=tee-sys --cache-clear
+ "
+ displayName: 'Run pytest with coverage in Alpine ARM64 container'
+ env:
+ DB_PASSWORD: $(DB_PASSWORD)
+
+ - script: |
+ # Copy test results from container to host
+ docker cp test-container-alpine-arm64:/workspace/test-results-alpine-arm64.xml $(Build.SourcesDirectory)/ || echo 'Failed to copy test results'
+ docker cp test-container-alpine-arm64:/workspace/coverage-alpine-arm64.xml $(Build.SourcesDirectory)/ || echo 'Failed to copy coverage results'
+ displayName: 'Copy test results from Alpine ARM64 container'
+ condition: always()
+
+ - script: |
+ # Clean up containers
+ docker stop test-container-alpine-arm64 || true
+ docker rm test-container-alpine-arm64 || true
+ docker stop sqlserver-alpine-arm64 || true
+ docker rm sqlserver-alpine-arm64 || true
+ displayName: 'Clean up Alpine ARM64 containers'
+ condition: always()
+
+ - task: PublishTestResults@2
+ condition: succeededOrFailed()
+ inputs:
+ testResultsFiles: '**/test-results-alpine-arm64.xml'
+ testRunTitle: 'Publish pytest results on Alpine ARM64'
+
+- job: CodeCoverageReport
+ displayName: 'Full Code Coverage Report in Ubuntu x86_64'
+ pool:
+ vmImage: 'ubuntu-latest'
+
+ steps:
+ - script: |
+ # Install build dependencies
+ sudo apt-get update
+ sudo apt-get install -y cmake gcc g++ lcov unixodbc-dev llvm clang
+ displayName: 'Install build dependencies'
+
+ - script: |
+ # Start SQL Server container
+ docker pull mcr.microsoft.com/mssql/server:2022-latest
+ docker run \
+ --name sqlserver \
+ -e ACCEPT_EULA=Y \
+ -e MSSQL_SA_PASSWORD="$(DB_PASSWORD)" \
+ -p 1433:1433 \
+ -d mcr.microsoft.com/mssql/server:2022-latest
+
+ # Wait until SQL Server is ready
+ for i in {1..30}; do
+ docker exec sqlserver \
+ /opt/mssql-tools18/bin/sqlcmd \
+ -S localhost \
+ -U SA \
+ -P "$(DB_PASSWORD)" \
+ -C -Q "SELECT 1" && break
+ sleep 2
+ done
+ displayName: 'Start SQL Server container'
+ env:
+ DB_PASSWORD: $(DB_PASSWORD)
+
+ - script: |
+ # Install Python dependencies
+ python -m pip install --upgrade pip
+ pip install -r requirements.txt
+ pip install coverage-lcov lcov-cobertura
+ displayName: 'Install Python dependencies'
+
+ - script: |
+ # Build pybind bindings with coverage instrumentation
+ cd mssql_python/pybind
+ ./build.sh codecov
+ displayName: 'Build pybind bindings with coverage'
+
+ - script: |
+ # Generate unified coverage (Python + C++)
+ chmod +x ./generate_codecov.sh
+ ./generate_codecov.sh
+
+ # Convert unified LCOV to Cobertura XML for ADO reporting
+ lcov_cobertura total.info --output unified-coverage/coverage.xml
+ displayName: 'Generate unified coverage (Python + C++)'
+ env:
+ DB_CONNECTION_STRING: 'Server=tcp:127.0.0.1,1433;Database=master;Uid=SA;Pwd=$(DB_PASSWORD);TrustServerCertificate=yes'
+ DB_PASSWORD: $(DB_PASSWORD)
+
+ - task: PublishTestResults@2
+ condition: succeededOrFailed()
+ inputs:
+ testResultsFiles: '**/test-results.xml'
+ testRunTitle: 'Publish pytest results with unified coverage'
+
+ - task: PublishCodeCoverageResults@2
+ condition: succeededOrFailed()
inputs:
- codeCoverageTool: 'Cobertura'
- summaryFileLocation: 'coverage-rhel9-arm64.xml'
- displayName: 'Publish code coverage results for RHEL 9 ARM64'
\ No newline at end of file
+ codeCoverageTool: Cobertura
+ summaryFileLocation: 'unified-coverage/coverage.xml'
+ reportDirectory: 'unified-coverage'
+ failIfCoverageEmpty: true
+ displayName: 'Publish unified code coverage results'
diff --git a/es-metadata.yml b/es-metadata.yml
new file mode 100644
index 000000000..53f8b18bb
--- /dev/null
+++ b/es-metadata.yml
@@ -0,0 +1,12 @@
+schemaVersion: 1.0.0
+providers:
+- provider: InventoryAsCode
+ version: 1.0.0
+ metadata:
+ isProduction: true
+ accountableOwners:
+ service: ae66a2ba-2c8a-4e77-8323-305cfad11f0e
+ routing:
+ defaultAreaPath:
+ org: sqlclientdrivers
+ path: mssql-python
\ No newline at end of file
diff --git a/generate_codecov.sh b/generate_codecov.sh
new file mode 100644
index 000000000..f24dd78d5
--- /dev/null
+++ b/generate_codecov.sh
@@ -0,0 +1,109 @@
+#!/bin/bash
+set -euo pipefail
+
+echo "==================================="
+echo "[STEP 1] Installing dependencies"
+echo "==================================="
+
+# Update package list
+sudo apt-get update
+
+# Install LLVM (for llvm-profdata, llvm-cov)
+if ! command -v llvm-profdata &>/dev/null; then
+ echo "[ACTION] Installing LLVM via apt"
+ sudo apt-get install -y llvm
+fi
+
+# Install lcov (provides lcov + genhtml)
+if ! command -v genhtml &>/dev/null; then
+ echo "[ACTION] Installing lcov via apt"
+ sudo apt-get install -y lcov
+fi
+
+# Install Python plugin for LCOV export
+if ! python -m pip show coverage-lcov &>/dev/null; then
+ echo "[ACTION] Installing coverage-lcov via pip"
+ python -m pip install coverage-lcov
+fi
+
+# Install LCOV → Cobertura converter (for ADO)
+if ! python -m pip show lcov-cobertura &>/dev/null; then
+ echo "[ACTION] Installing lcov-cobertura via pip"
+ python -m pip install lcov-cobertura
+fi
+
+echo "==================================="
+echo "[STEP 2] Running pytest with Python coverage"
+echo "==================================="
+
+# Cleanup old coverage
+rm -f .coverage coverage.xml python-coverage.info cpp-coverage.info total.info
+rm -rf htmlcov unified-coverage
+
+# Run pytest with Python coverage (XML + HTML output)
+python -m pytest -v \
+ --junitxml=test-results.xml \
+ --cov=mssql_python \
+ --cov-report=xml:coverage.xml \
+ --cov-report=html \
+ --capture=tee-sys \
+ --cache-clear
+
+# Convert Python coverage to LCOV format (restrict to repo only)
+echo "[ACTION] Converting Python coverage to LCOV"
+coverage lcov -o python-coverage.info --include="mssql_python/*"
+
+echo "==================================="
+echo "[STEP 3] Processing C++ coverage (Clang/LLVM)"
+echo "==================================="
+
+# Merge raw profile data from pybind runs
+if [ ! -f default.profraw ]; then
+ echo "[ERROR] default.profraw not found. Did you build with -fprofile-instr-generate?"
+ exit 1
+fi
+
+llvm-profdata merge -sparse default.profraw -o default.profdata
+
+# Find the pybind .so file (Linux build)
+PYBIND_SO=$(find mssql_python -name "*.so" | head -n 1)
+if [ -z "$PYBIND_SO" ]; then
+ echo "[ERROR] Could not find pybind .so"
+ exit 1
+fi
+
+echo "[INFO] Using pybind module: $PYBIND_SO"
+
+# Export C++ coverage, excluding Python headers, pybind11, and system includes
+llvm-cov export "$PYBIND_SO" \
+ -instr-profile=default.profdata \
+ -ignore-filename-regex='(python3\.[0-9]+|cpython|pybind11|/usr/include/|/usr/lib/)' \
+ --skip-functions \
+ -format=lcov > cpp-coverage.info
+
+# Note: LCOV exclusion markers (LCOV_EXCL_LINE) should be added to source code
+# to exclude LOG() statements from coverage. However, for automated exclusion
+# of all LOG lines without modifying source code, we can use geninfo's --omit-lines
+# feature during the merge step (see below).
+
+echo "==================================="
+echo "[STEP 4] Merging Python + C++ coverage"
+echo "==================================="
+
+# Merge LCOV reports (ignore inconsistencies in Python LCOV export)
+echo "[ACTION] Merging Python and C++ coverage"
+lcov -a python-coverage.info -a cpp-coverage.info -o total.info \
+ --ignore-errors inconsistent,corrupt
+
+# Normalize paths so everything starts from mssql_python/
+echo "[ACTION] Normalizing paths in LCOV report"
+sed -i "s|$(pwd)/||g" total.info
+
+# Generate full HTML report
+genhtml total.info \
+ --output-directory unified-coverage \
+ --quiet \
+ --title "Unified Coverage Report"
+
+# Generate Cobertura XML (for Azure DevOps Code Coverage tab)
+lcov_cobertura total.info --output coverage.xml
diff --git a/main.py b/main.py
index b45b88d73..7e56b2feb 100644
--- a/main.py
+++ b/main.py
@@ -1,15 +1,12 @@
from mssql_python import connect
-from mssql_python import setup_logging
+from mssql_python.logging import setup_logging
import os
-import decimal
-setup_logging('stdout')
+# Clean one-liner: set level and output mode together
+setup_logging(output="both")
conn_str = os.getenv("DB_CONNECTION_STRING")
conn = connect(conn_str)
-
-# conn.autocommit = True
-
cursor = conn.cursor()
cursor.execute("SELECT database_id, name from sys.databases;")
rows = cursor.fetchall()
@@ -18,4 +15,4 @@
print(f"Database ID: {row[0]}, Name: {row[1]}")
cursor.close()
-conn.close()
\ No newline at end of file
+conn.close()
diff --git a/mssql_python/__init__.py b/mssql_python/__init__.py
index 6bf957779..2bcac47bb 100644
--- a/mssql_python/__init__.py
+++ b/mssql_python/__init__.py
@@ -4,8 +4,23 @@
This module initializes the mssql_python package.
"""
+import atexit
+import sys
+import threading
+import types
+import weakref
+from typing import Dict
+
+# Import settings from helpers to avoid circular imports
+from .helpers import Settings, get_settings, _settings, _settings_lock
+
+# Driver version
+__version__ = "1.3.0"
+
# Exceptions
# https://www.python.org/dev/peps/pep-0249/#exceptions
+
+# Import necessary modules
from .exceptions import (
Warning,
Error,
@@ -17,6 +32,7 @@
InternalError,
ProgrammingError,
NotSupportedError,
+ ConnectionStringParseError,
)
# Type Objects
@@ -38,37 +54,306 @@
# Connection Objects
from .db_connection import connect, Connection
+# Connection String Handling
+from .connection_string_parser import _ConnectionStringParser
+from .connection_string_builder import _ConnectionStringBuilder
+
# Cursor Objects
from .cursor import Cursor
-# Logging Configuration
-from .logging_config import setup_logging, get_logger
+# Logging Configuration (Simplified single-level DEBUG system)
+from .logging import logger, setup_logging, driver_logger
# Constants
-from .constants import ConstantsDDBC
+from .constants import ConstantsDDBC, GetInfoConstants
+
+# Pooling
+from .pooling import PoolingManager
+
+# Global registry for tracking active connections (using weak references)
+_active_connections = weakref.WeakSet()
+_connections_lock = threading.Lock()
+
+
+def _register_connection(conn):
+ """Register a connection for cleanup before shutdown."""
+ with _connections_lock:
+ _active_connections.add(conn)
+
+
+def _cleanup_connections():
+ """
+ Cleanup function called by atexit to close all active connections.
+
+ This prevents resource leaks during interpreter shutdown by ensuring
+ all ODBC handles are freed in the correct order before Python finalizes.
+ """
+ # Make a copy of the connections to avoid modification during iteration
+ with _connections_lock:
+ connections_to_close = list(_active_connections)
+
+ for conn in connections_to_close:
+ try:
+ # Check if connection is still valid and not closed
+ if hasattr(conn, "_closed") and not conn._closed:
+ # Close will handle both cursors and the connection
+ conn.close()
+ except Exception as e:
+ # Log errors during shutdown cleanup for debugging
+ # We're prioritizing crash prevention over error propagation
+ try:
+ driver_logger.error(
+ f"Error during connection cleanup at shutdown: {type(e).__name__}: {e}"
+ )
+ except Exception:
+ # If logging fails during shutdown, silently ignore
+ pass
+
+
+# Register cleanup function to run before Python exits
+atexit.register(_cleanup_connections)
# GLOBALS
# Read-Only
-apilevel = "2.0"
-paramstyle = "qmark"
-threadsafety = 1
+apilevel: str = "2.0"
+paramstyle: str = "pyformat"
+threadsafety: int = 1
-from .pooling import PoolingManager
-def pooling(max_size=100, idle_timeout=600, enabled=True):
-# """
-# Enable connection pooling with the specified parameters.
-# By default:
-# - If not explicitly called, pooling will be auto-enabled with default values.
-
-# Args:
-# max_size (int): Maximum number of connections in the pool.
-# idle_timeout (int): Time in seconds before idle connections are closed.
-
-# Returns:
-# None
-# """
+# Set the initial decimal separator in C++
+try:
+ from .ddbc_bindings import DDBCSetDecimalSeparator
+
+ DDBCSetDecimalSeparator(_settings.decimal_separator)
+except ImportError:
+ # Handle case where ddbc_bindings is not available
+ DDBCSetDecimalSeparator = None
+
+
+# New functions for decimal separator control
+def setDecimalSeparator(separator: str) -> None:
+ """
+ Sets the decimal separator character used when parsing NUMERIC/DECIMAL values
+ from the database, e.g. the "." in "1,234.56".
+
+ The default is to use the current locale's "decimal_point" value when the module
+ was first imported, or "." if the locale is not available. This function overrides
+ the default.
+
+ Args:
+ separator (str): The character to use as decimal separator
+
+ Raises:
+ ValueError: If the separator is not a single character string
+ """
+ # Type validation
+ if not isinstance(separator, str):
+ raise ValueError("Decimal separator must be a string")
+
+ # Length validation
+ if len(separator) == 0:
+ raise ValueError("Decimal separator cannot be empty")
+
+ if len(separator) > 1:
+ raise ValueError("Decimal separator must be a single character")
+
+ # Character validation
+ if separator.isspace():
+ raise ValueError("Whitespace characters are not allowed as decimal separators")
+
+ # Check for specific disallowed characters
+ if separator in ["\t", "\n", "\r", "\v", "\f"]:
+ raise ValueError(
+ f"Control character '{repr(separator)}' is not allowed as a decimal separator"
+ )
+
+ # Set in Python side settings
+ _settings.decimal_separator = separator
+
+ # Update the C++ side
+ if DDBCSetDecimalSeparator is not None:
+ DDBCSetDecimalSeparator(separator)
+
+
+def getDecimalSeparator() -> str:
+ """
+ Returns the decimal separator character used when parsing NUMERIC/DECIMAL values
+ from the database.
+
+ Returns:
+ str: The current decimal separator character
+ """
+ return _settings.decimal_separator
+
+
+# Export specific constants for setencoding()
+SQL_CHAR: int = ConstantsDDBC.SQL_CHAR.value
+SQL_WCHAR: int = ConstantsDDBC.SQL_WCHAR.value
+SQL_WMETADATA: int = -99
+
+# Export connection attribute constants for set_attr()
+# Only include driver-level attributes that the SQL Server ODBC driver can handle directly
+
+# Core driver-level attributes
+SQL_ATTR_ACCESS_MODE: int = ConstantsDDBC.SQL_ATTR_ACCESS_MODE.value
+SQL_ATTR_CONNECTION_TIMEOUT: int = ConstantsDDBC.SQL_ATTR_CONNECTION_TIMEOUT.value
+SQL_ATTR_CURRENT_CATALOG: int = ConstantsDDBC.SQL_ATTR_CURRENT_CATALOG.value
+SQL_ATTR_LOGIN_TIMEOUT: int = ConstantsDDBC.SQL_ATTR_LOGIN_TIMEOUT.value
+SQL_ATTR_PACKET_SIZE: int = ConstantsDDBC.SQL_ATTR_PACKET_SIZE.value
+SQL_ATTR_TXN_ISOLATION: int = ConstantsDDBC.SQL_ATTR_TXN_ISOLATION.value
+
+# Transaction Isolation Level Constants
+SQL_TXN_READ_UNCOMMITTED: int = ConstantsDDBC.SQL_TXN_READ_UNCOMMITTED.value
+SQL_TXN_READ_COMMITTED: int = ConstantsDDBC.SQL_TXN_READ_COMMITTED.value
+SQL_TXN_REPEATABLE_READ: int = ConstantsDDBC.SQL_TXN_REPEATABLE_READ.value
+SQL_TXN_SERIALIZABLE: int = ConstantsDDBC.SQL_TXN_SERIALIZABLE.value
+
+# Access Mode Constants
+SQL_MODE_READ_WRITE: int = ConstantsDDBC.SQL_MODE_READ_WRITE.value
+SQL_MODE_READ_ONLY: int = ConstantsDDBC.SQL_MODE_READ_ONLY.value
+
+
+def pooling(max_size: int = 100, idle_timeout: int = 600, enabled: bool = True) -> None:
+ """
+ Enable connection pooling with the specified parameters.
+ By default:
+ - If not explicitly called, pooling will be auto-enabled with default values.
+
+ Args:
+ max_size (int): Maximum number of connections in the pool.
+ idle_timeout (int): Time in seconds before idle connections are closed.
+ enabled (bool): Whether to enable or disable pooling.
+
+ Returns:
+ None
+ """
if not enabled:
PoolingManager.disable()
else:
PoolingManager.enable(max_size, idle_timeout)
-
\ No newline at end of file
+
+
+_original_module_setattr = sys.modules[__name__].__setattr__
+
+
+def _custom_setattr(name, value):
+ if name == "lowercase":
+ with _settings_lock:
+ _settings.lowercase = bool(value)
+ # Update the module's lowercase variable
+ _original_module_setattr(name, _settings.lowercase)
+ else:
+ _original_module_setattr(name, value)
+
+
+# Replace the module's __setattr__ with our custom version
+sys.modules[__name__].__setattr__ = _custom_setattr
+
+
+# Export SQL constants at module level
+SQL_VARCHAR: int = ConstantsDDBC.SQL_VARCHAR.value
+SQL_LONGVARCHAR: int = ConstantsDDBC.SQL_LONGVARCHAR.value
+SQL_WVARCHAR: int = ConstantsDDBC.SQL_WVARCHAR.value
+SQL_WLONGVARCHAR: int = ConstantsDDBC.SQL_WLONGVARCHAR.value
+SQL_DECIMAL: int = ConstantsDDBC.SQL_DECIMAL.value
+SQL_NUMERIC: int = ConstantsDDBC.SQL_NUMERIC.value
+SQL_BIT: int = ConstantsDDBC.SQL_BIT.value
+SQL_TINYINT: int = ConstantsDDBC.SQL_TINYINT.value
+SQL_SMALLINT: int = ConstantsDDBC.SQL_SMALLINT.value
+SQL_INTEGER: int = ConstantsDDBC.SQL_INTEGER.value
+SQL_BIGINT: int = ConstantsDDBC.SQL_BIGINT.value
+SQL_REAL: int = ConstantsDDBC.SQL_REAL.value
+SQL_FLOAT: int = ConstantsDDBC.SQL_FLOAT.value
+SQL_DOUBLE: int = ConstantsDDBC.SQL_DOUBLE.value
+SQL_BINARY: int = ConstantsDDBC.SQL_BINARY.value
+SQL_VARBINARY: int = ConstantsDDBC.SQL_VARBINARY.value
+SQL_LONGVARBINARY: int = ConstantsDDBC.SQL_LONGVARBINARY.value
+SQL_DATE: int = ConstantsDDBC.SQL_DATE.value
+SQL_TIME: int = ConstantsDDBC.SQL_TIME.value
+SQL_TIMESTAMP: int = ConstantsDDBC.SQL_TIMESTAMP.value
+
+# Export GetInfo constants at module level
+# Driver and database information
+SQL_DRIVER_NAME: int = GetInfoConstants.SQL_DRIVER_NAME.value
+SQL_DRIVER_VER: int = GetInfoConstants.SQL_DRIVER_VER.value
+SQL_DRIVER_ODBC_VER: int = GetInfoConstants.SQL_DRIVER_ODBC_VER.value
+SQL_DATA_SOURCE_NAME: int = GetInfoConstants.SQL_DATA_SOURCE_NAME.value
+SQL_DATABASE_NAME: int = GetInfoConstants.SQL_DATABASE_NAME.value
+SQL_SERVER_NAME: int = GetInfoConstants.SQL_SERVER_NAME.value
+SQL_USER_NAME: int = GetInfoConstants.SQL_USER_NAME.value
+
+# SQL conformance and support
+SQL_SQL_CONFORMANCE: int = GetInfoConstants.SQL_SQL_CONFORMANCE.value
+SQL_KEYWORDS: int = GetInfoConstants.SQL_KEYWORDS.value
+SQL_IDENTIFIER_QUOTE_CHAR: int = GetInfoConstants.SQL_IDENTIFIER_QUOTE_CHAR.value
+SQL_SEARCH_PATTERN_ESCAPE: int = GetInfoConstants.SQL_SEARCH_PATTERN_ESCAPE.value
+
+# Catalog and schema support
+SQL_CATALOG_TERM: int = GetInfoConstants.SQL_CATALOG_TERM.value
+SQL_SCHEMA_TERM: int = GetInfoConstants.SQL_SCHEMA_TERM.value
+SQL_TABLE_TERM: int = GetInfoConstants.SQL_TABLE_TERM.value
+SQL_PROCEDURE_TERM: int = GetInfoConstants.SQL_PROCEDURE_TERM.value
+
+# Transaction support
+SQL_TXN_CAPABLE: int = GetInfoConstants.SQL_TXN_CAPABLE.value
+SQL_DEFAULT_TXN_ISOLATION: int = GetInfoConstants.SQL_DEFAULT_TXN_ISOLATION.value
+
+# Data type support
+SQL_NUMERIC_FUNCTIONS: int = GetInfoConstants.SQL_NUMERIC_FUNCTIONS.value
+SQL_STRING_FUNCTIONS: int = GetInfoConstants.SQL_STRING_FUNCTIONS.value
+SQL_DATETIME_FUNCTIONS: int = GetInfoConstants.SQL_DATETIME_FUNCTIONS.value
+
+# Limits
+SQL_MAX_COLUMN_NAME_LEN: int = GetInfoConstants.SQL_MAX_COLUMN_NAME_LEN.value
+SQL_MAX_TABLE_NAME_LEN: int = GetInfoConstants.SQL_MAX_TABLE_NAME_LEN.value
+SQL_MAX_SCHEMA_NAME_LEN: int = GetInfoConstants.SQL_MAX_SCHEMA_NAME_LEN.value
+SQL_MAX_CATALOG_NAME_LEN: int = GetInfoConstants.SQL_MAX_CATALOG_NAME_LEN.value
+SQL_MAX_IDENTIFIER_LEN: int = GetInfoConstants.SQL_MAX_IDENTIFIER_LEN.value
+
+
+# Also provide a function to get all constants
+def get_info_constants() -> Dict[str, int]:
+ """
+ Returns a dictionary of all available GetInfo constants.
+
+ This provides all SQLGetInfo constants that can be used with the Connection.getinfo() method
+ to retrieve metadata about the database server and driver.
+
+ Returns:
+ dict: Dictionary mapping constant names to their integer values
+ """
+ return {name: member.value for name, member in GetInfoConstants.__members__.items()}
+
+
+# Create a custom module class that uses properties instead of __setattr__
+class _MSSQLModule(types.ModuleType):
+ @property
+ def lowercase(self) -> bool:
+ """Get the lowercase setting."""
+ return _settings.lowercase
+
+ @lowercase.setter
+ def lowercase(self, value: bool) -> None:
+ """Set the lowercase setting."""
+ if not isinstance(value, bool):
+ raise ValueError("lowercase must be a boolean value")
+ with _settings_lock:
+ _settings.lowercase = value
+
+
+# Replace the current module with our custom module class
+old_module: types.ModuleType = sys.modules[__name__]
+new_module: _MSSQLModule = _MSSQLModule(__name__)
+
+# Copy all existing attributes to the new module
+for attr_name in dir(old_module):
+ if attr_name != "__class__":
+ try:
+ setattr(new_module, attr_name, getattr(old_module, attr_name))
+ except AttributeError:
+ pass
+
+# Replace the module in sys.modules
+sys.modules[__name__] = new_module
+
+# Initialize property values
+lowercase: bool = _settings.lowercase
diff --git a/mssql_python/auth.py b/mssql_python/auth.py
index c7e6683ac..33607f002 100644
--- a/mssql_python/auth.py
+++ b/mssql_python/auth.py
@@ -6,43 +6,82 @@
import platform
import struct
-from typing import Tuple, Dict, Optional, Union
+from typing import Tuple, Dict, Optional, List
+
+from mssql_python.logging import logger
from mssql_python.constants import AuthType
+
class AADAuth:
"""Handles Azure Active Directory authentication"""
-
+
@staticmethod
def get_token_struct(token: str) -> bytes:
"""Convert token to SQL Server compatible format"""
+ logger.debug(
+ "get_token_struct: Converting token to SQL Server format - token_length=%d chars",
+ len(token),
+ )
token_bytes = token.encode("UTF-16-LE")
+ logger.debug(
+ "get_token_struct: Token encoded to UTF-16-LE - byte_length=%d", len(token_bytes)
+ )
return struct.pack(f" bytes:
"""Get token using the specified authentication type"""
- from azure.identity import (
- DefaultAzureCredential,
- DeviceCodeCredential,
- InteractiveBrowserCredential
- )
- from azure.core.exceptions import ClientAuthenticationError
-
+ # Import Azure libraries inside method to support test mocking
+ # pylint: disable=import-outside-toplevel
+ try:
+ from azure.identity import (
+ DefaultAzureCredential,
+ DeviceCodeCredential,
+ InteractiveBrowserCredential,
+ )
+ from azure.core.exceptions import ClientAuthenticationError
+ except ImportError as e:
+ raise RuntimeError(
+ "Azure authentication libraries are not installed. "
+ "Please install with: pip install azure-identity azure-core"
+ ) from e
+
# Mapping of auth types to credential classes
credential_map = {
"default": DefaultAzureCredential,
"devicecode": DeviceCodeCredential,
"interactive": InteractiveBrowserCredential,
}
-
+
credential_class = credential_map[auth_type]
-
+ logger.info(
+ "get_token: Starting Azure AD authentication - auth_type=%s, credential_class=%s",
+ auth_type,
+ credential_class.__name__,
+ )
+
try:
+ logger.debug(
+ "get_token: Creating credential instance - credential_class=%s",
+ credential_class.__name__,
+ )
credential = credential_class()
+ logger.debug(
+ "get_token: Requesting token from Azure AD - scope=https://database.windows.net/.default"
+ )
token = credential.get_token("https://database.windows.net/.default").token
+ logger.info(
+ "get_token: Azure AD token acquired successfully - token_length=%d chars",
+ len(token),
+ )
return AADAuth.get_token_struct(token)
except ClientAuthenticationError as e:
# Re-raise with more specific context about Azure AD authentication failure
+ logger.error(
+ "get_token: Azure AD authentication failed - credential_class=%s, error=%s",
+ credential_class.__name__,
+ str(e),
+ )
raise RuntimeError(
f"Azure AD authentication failed for {credential_class.__name__}: {e}. "
f"This could be due to invalid credentials, missing environment variables, "
@@ -50,21 +89,28 @@ def get_token(auth_type: str) -> bytes:
) from e
except Exception as e:
# Catch any other unexpected exceptions
+ logger.error(
+ "get_token: Unexpected error during credential creation - credential_class=%s, error=%s",
+ credential_class.__name__,
+ str(e),
+ )
raise RuntimeError(f"Failed to create {credential_class.__name__}: {e}") from e
-def process_auth_parameters(parameters: list) -> Tuple[list, Optional[str]]:
+
+def process_auth_parameters(parameters: List[str]) -> Tuple[List[str], Optional[str]]:
"""
Process connection parameters and extract authentication type.
-
+
Args:
parameters: List of connection string parameters
-
+
Returns:
Tuple[list, Optional[str]]: Modified parameters and authentication type
-
+
Raises:
ValueError: If an invalid authentication type is provided
"""
+ logger.debug("process_auth_parameters: Processing %d connection parameters", len(parameters))
modified_parameters = []
auth_type = None
@@ -85,77 +131,142 @@ def process_auth_parameters(parameters: list) -> Tuple[list, Optional[str]]:
# Check for supported authentication types and set auth_type accordingly
if value_lower == AuthType.INTERACTIVE.value:
auth_type = "interactive"
+ logger.debug("process_auth_parameters: Interactive authentication detected")
# Interactive authentication (browser-based); only append parameter for non-Windows
if platform.system().lower() == "windows":
+ logger.debug(
+ "process_auth_parameters: Windows platform - using native AADInteractive"
+ )
auth_type = None # Let Windows handle AADInteractive natively
-
+
elif value_lower == AuthType.DEVICE_CODE.value:
# Device code authentication (for devices without browser)
+ logger.debug("process_auth_parameters: Device code authentication detected")
auth_type = "devicecode"
elif value_lower == AuthType.DEFAULT.value:
# Default authentication (uses DefaultAzureCredential)
+ logger.debug("process_auth_parameters: Default Azure authentication detected")
auth_type = "default"
modified_parameters.append(param)
+ logger.debug(
+ "process_auth_parameters: Processing complete - auth_type=%s, param_count=%d",
+ auth_type,
+ len(modified_parameters),
+ )
return modified_parameters, auth_type
-def remove_sensitive_params(parameters: list) -> list:
+
+def remove_sensitive_params(parameters: List[str]) -> List[str]:
"""Remove sensitive parameters from connection string"""
+ logger.debug(
+ "remove_sensitive_params: Removing sensitive parameters - input_count=%d", len(parameters)
+ )
exclude_keys = [
- "uid=", "pwd=", "encrypt=", "trustservercertificate=", "authentication="
+ "uid=",
+ "pwd=",
+ "trusted_connection=",
+ "authentication=",
]
- return [
- param for param in parameters
+ result = [
+ param
+ for param in parameters
if not any(param.lower().startswith(exclude) for exclude in exclude_keys)
]
+ logger.debug(
+ "remove_sensitive_params: Sensitive parameters removed - output_count=%d", len(result)
+ )
+ return result
+
def get_auth_token(auth_type: str) -> Optional[bytes]:
"""Get authentication token based on auth type"""
+ logger.debug("get_auth_token: Starting - auth_type=%s", auth_type)
if not auth_type:
+ logger.debug("get_auth_token: No auth_type specified, returning None")
return None
-
+
# Handle platform-specific logic for interactive auth
if auth_type == "interactive" and platform.system().lower() == "windows":
+ logger.debug("get_auth_token: Windows interactive auth - delegating to native handler")
return None # Let Windows handle AADInteractive natively
-
+
try:
- return AADAuth.get_token(auth_type)
- except (ValueError, RuntimeError):
+ token = AADAuth.get_token(auth_type)
+ logger.info("get_auth_token: Token acquired successfully - auth_type=%s", auth_type)
+ return token
+ except (ValueError, RuntimeError) as e:
+ logger.warning(
+ "get_auth_token: Token acquisition failed - auth_type=%s, error=%s", auth_type, str(e)
+ )
return None
-def process_connection_string(connection_string: str) -> Tuple[str, Optional[Dict]]:
+
+def process_connection_string(
+ connection_string: str,
+) -> Tuple[str, Optional[Dict[int, bytes]]]:
"""
Process connection string and handle authentication.
-
+
Args:
connection_string: The connection string to process
-
+
Returns:
Tuple[str, Optional[Dict]]: Processed connection string and attrs_before dict if needed
-
+
Raises:
ValueError: If the connection string is invalid or empty
"""
+ logger.debug(
+ "process_connection_string: Starting - conn_str_length=%d",
+ len(connection_string) if isinstance(connection_string, str) else 0,
+ )
# Check type first
if not isinstance(connection_string, str):
+ logger.error(
+ "process_connection_string: Invalid type - expected str, got %s",
+ type(connection_string).__name__,
+ )
raise ValueError("Connection string must be a string")
# Then check if empty
if not connection_string:
+ logger.error("process_connection_string: Connection string is empty")
raise ValueError("Connection string cannot be empty")
parameters = connection_string.split(";")
-
+ logger.debug(
+ "process_connection_string: Split connection string - parameter_count=%d", len(parameters)
+ )
+
# Validate that there's at least one valid parameter
- if not any('=' in param for param in parameters):
+ if not any("=" in param for param in parameters):
+ logger.error(
+ "process_connection_string: Invalid connection string format - no key=value pairs found"
+ )
raise ValueError("Invalid connection string format")
modified_parameters, auth_type = process_auth_parameters(parameters)
if auth_type:
+ logger.info(
+ "process_connection_string: Authentication type detected - auth_type=%s", auth_type
+ )
modified_parameters = remove_sensitive_params(modified_parameters)
token_struct = get_auth_token(auth_type)
if token_struct:
+ logger.info(
+ "process_connection_string: Token authentication configured successfully - auth_type=%s",
+ auth_type,
+ )
return ";".join(modified_parameters) + ";", {1256: token_struct}
+ else:
+ logger.warning(
+ "process_connection_string: Token acquisition failed, proceeding without token"
+ )
- return ";".join(modified_parameters) + ";", None
\ No newline at end of file
+ logger.debug(
+ "process_connection_string: Connection string processing complete - has_auth=%s",
+ bool(auth_type),
+ )
+ return ";".join(modified_parameters) + ";", None
diff --git a/mssql_python/bcp_options.py b/mssql_python/bcp_options.py
deleted file mode 100644
index 7dab82d55..000000000
--- a/mssql_python/bcp_options.py
+++ /dev/null
@@ -1,121 +0,0 @@
-from dataclasses import dataclass, field
-from typing import List, Optional, Literal
-
-
-@dataclass
-class ColumnFormat:
- """
- Represents the format of a column in a bulk copy operation.
- Attributes:
- prefix_len (int): Option: (format_file) or (prefix_len, data_len).
- The length of the prefix for fixed-length data types. Must be non-negative.
- data_len (int): Option: (format_file) or (prefix_len, data_len).
- The length of the data. Must be non-negative.
- field_terminator (Optional[bytes]): Option: (-t). The field terminator string.
- e.g., b',' for comma-separated values.
- row_terminator (Optional[bytes]): Option: (-r). The row terminator string.
- e.g., b'\\n' for newline-terminated rows.
- server_col (int): Option: (format_file) or (server_col). The 1-based column number
- in the SQL Server table. Defaults to 1, representing the first column.
- Must be a positive integer.
- file_col (int): Option: (format_file) or (file_col). The 1-based column number
- in the data file. Defaults to 1, representing the first column.
- Must be a positive integer.
- """
-
- prefix_len: int
- data_len: int
- field_terminator: Optional[bytes] = None
- row_terminator: Optional[bytes] = None
- server_col: int = 1
- file_col: int = 1
-
- def __post_init__(self):
- if self.prefix_len < 0:
- raise ValueError("prefix_len must be a non-negative integer.")
- if self.data_len < 0:
- raise ValueError("data_len must be a non-negative integer.")
- if self.server_col <= 0:
- raise ValueError("server_col must be a positive integer (1-based).")
- if self.file_col <= 0:
- raise ValueError("file_col must be a positive integer (1-based).")
- if self.field_terminator is not None and not isinstance(
- self.field_terminator, bytes
- ):
- raise TypeError("field_terminator must be bytes or None.")
- if self.row_terminator is not None and not isinstance(
- self.row_terminator, bytes
- ):
- raise TypeError("row_terminator must be bytes or None.")
-
-
-@dataclass
-class BCPOptions:
- """
- Represents the options for a bulk copy operation.
- Attributes:
- direction (Literal[str]): 'in' or 'out'. Option: (-i or -o).
- data_file (str): The data file. Option: (positional argument).
- error_file (Optional[str]): The error file. Option: (-e).
- format_file (Optional[str]): The format file to use for 'in'/'out'. Option: (-f).
- batch_size (Optional[int]): The batch size. Option: (-b).
- max_errors (Optional[int]): The maximum number of errors allowed. Option: (-m).
- first_row (Optional[int]): The first row to process. Option: (-F).
- last_row (Optional[int]): The last row to process. Option: (-L).
- code_page (Optional[str]): The code page. Option: (-C).
- keep_identity (bool): Keep identity values. Option: (-E).
- keep_nulls (bool): Keep null values. Option: (-k).
- hints (Optional[str]): Additional hints. Option: (-h).
- bulk_mode (str): Bulk mode ('native', 'char', 'unicode'). Option: (-n, -c, -w).
- Defaults to "native".
- columns (List[ColumnFormat]): Column formats.
- """
-
- direction: Literal["in", "out"]
- data_file: str # data_file is mandatory for 'in' and 'out'
- error_file: Optional[str] = None
- format_file: Optional[str] = None
- # write_format_file is removed as 'format' direction is not actively supported
- batch_size: Optional[int] = None
- max_errors: Optional[int] = None
- first_row: Optional[int] = None
- last_row: Optional[int] = None
- code_page: Optional[str] = None
- keep_identity: bool = False
- keep_nulls: bool = False
- hints: Optional[str] = None
- bulk_mode: Literal["native", "char", "unicode"] = "native"
- columns: List[ColumnFormat] = field(default_factory=list)
-
- def __post_init__(self):
- if self.direction not in ["in", "out"]:
- raise ValueError("direction must be 'in' or 'out'.")
- if not self.data_file:
- raise ValueError("data_file must be provided and non-empty for 'in' or 'out' directions.")
- if self.error_file is None or not self.error_file: # Making error_file mandatory for in/out
- raise ValueError("error_file must be provided and non-empty for 'in' or 'out' directions.")
-
- if self.format_file is not None and not self.format_file:
- raise ValueError("format_file, if provided, must not be an empty string.")
- if self.batch_size is not None and self.batch_size <= 0:
- raise ValueError("batch_size must be a positive integer.")
- if self.max_errors is not None and self.max_errors < 0:
- raise ValueError("max_errors must be a non-negative integer.")
- if self.first_row is not None and self.first_row <= 0:
- raise ValueError("first_row must be a positive integer.")
- if self.last_row is not None and self.last_row <= 0:
- raise ValueError("last_row must be a positive integer.")
- if self.last_row is not None and self.first_row is None:
- raise ValueError("first_row must be specified if last_row is specified.")
- if (
- self.first_row is not None
- and self.last_row is not None
- and self.last_row < self.first_row
- ):
- raise ValueError("last_row must be greater than or equal to first_row.")
- if self.code_page is not None and not self.code_page:
- raise ValueError("code_page, if provided, must not be an empty string.")
- if self.hints is not None and not self.hints:
- raise ValueError("hints, if provided, must not be an empty string.")
- if self.bulk_mode not in ["native", "char", "unicode"]:
- raise ValueError("bulk_mode must be 'native', 'char', or 'unicode'.")
diff --git a/mssql_python/connection.py b/mssql_python/connection.py
index d1ed6e78c..ba79e2a3f 100644
--- a/mssql_python/connection.py
+++ b/mssql_python/connection.py
@@ -2,7 +2,7 @@
Copyright (c) Microsoft Corporation.
Licensed under the MIT license.
This module defines the Connection class, which is used to manage a connection to a database.
-The class provides methods to establish a connection, create cursors, commit transactions,
+The class provides methods to establish a connection, create cursors, commit transactions,
roll back transactions, and close the connection.
Resource Management:
- All cursors created from this connection are tracked internally.
@@ -10,14 +10,136 @@
- Do not use any cursor after the connection is closed; doing so will raise an exception.
- Cursors are also cleaned up automatically when no longer referenced, to prevent memory leaks.
"""
+
import weakref
import re
+import codecs
+from typing import Any, Dict, Optional, Union, List, Tuple, Callable, TYPE_CHECKING
+import threading
+
+import mssql_python
from mssql_python.cursor import Cursor
-from mssql_python.helpers import add_driver_to_connection_str, sanitize_connection_string, log
+from mssql_python.helpers import (
+ sanitize_connection_string,
+ sanitize_user_input,
+ validate_attribute_value,
+)
+from mssql_python.logging import logger
from mssql_python import ddbc_bindings
from mssql_python.pooling import PoolingManager
-from mssql_python.exceptions import InterfaceError
+from mssql_python.exceptions import (
+ Warning, # pylint: disable=redefined-builtin
+ Error,
+ InterfaceError,
+ DatabaseError,
+ DataError,
+ OperationalError,
+ IntegrityError,
+ InternalError,
+ ProgrammingError,
+ NotSupportedError,
+)
from mssql_python.auth import process_connection_string
+from mssql_python.constants import ConstantsDDBC, GetInfoConstants
+from mssql_python.connection_string_parser import _ConnectionStringParser
+from mssql_python.connection_string_builder import _ConnectionStringBuilder
+from mssql_python.constants import _RESERVED_PARAMETERS
+
+if TYPE_CHECKING:
+ from mssql_python.row import Row
+
+# Add SQL_WMETADATA constant for metadata decoding configuration
+SQL_WMETADATA: int = -99 # Special flag for column name decoding
+# Threshold to determine if an info type is string-based
+INFO_TYPE_STRING_THRESHOLD: int = 10000
+
+# UTF-16 encoding variants that should use SQL_WCHAR by default
+# Note: "utf-16" with BOM is NOT included as it's problematic for SQL_WCHAR
+UTF16_ENCODINGS: frozenset[str] = frozenset(["utf-16le", "utf-16be"])
+
+
+def _validate_utf16_wchar_compatibility(
+ encoding: str, wchar_type: int, context: str = "SQL_WCHAR"
+) -> None:
+ """
+ Validates UTF-16 encoding compatibility with SQL_WCHAR.
+
+ Centralizes the validation logic to eliminate duplication across setencoding/setdecoding.
+
+ Args:
+ encoding: The encoding string (already normalized to lowercase)
+ wchar_type: The SQL_WCHAR constant value to check against
+ context: Context string for error messages ('SQL_WCHAR', 'SQL_WCHAR ctype', etc.)
+
+ Raises:
+ ProgrammingError: If encoding is incompatible with SQL_WCHAR
+ """
+ if encoding == "utf-16":
+ # UTF-16 with BOM is rejected due to byte order ambiguity
+ logger.warning("utf-16 with BOM rejected for %s", context)
+ raise ProgrammingError(
+ driver_error="UTF-16 with Byte Order Mark not supported for SQL_WCHAR",
+ ddbc_error=(
+ "Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
+ "Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
+ ),
+ )
+ elif encoding not in UTF16_ENCODINGS:
+ # Non-UTF-16 encodings are not supported with SQL_WCHAR
+ logger.warning(
+ "Non-UTF-16 encoding %s attempted with %s", sanitize_user_input(encoding), context
+ )
+
+ # Generate context-appropriate error messages
+ if "ctype" in context:
+ driver_error = f"SQL_WCHAR ctype only supports UTF-16 encodings"
+ ddbc_context = "SQL_WCHAR ctype"
+ else:
+ driver_error = f"SQL_WCHAR only supports UTF-16 encodings"
+ ddbc_context = "SQL_WCHAR"
+
+ raise ProgrammingError(
+ driver_error=driver_error,
+ ddbc_error=(
+ f"Cannot use encoding '{encoding}' with {ddbc_context}. "
+ f"SQL_WCHAR requires UTF-16 encodings (utf-16le, utf-16be)"
+ ),
+ )
+
+
+def _validate_encoding(encoding: str) -> bool:
+ """
+ Cached encoding validation using codecs.lookup().
+
+ Args:
+ encoding (str): The encoding name to validate.
+
+ Returns:
+ bool: True if encoding is valid, False otherwise.
+
+ Note:
+ Uses LRU cache to avoid repeated expensive codecs.lookup() calls.
+ Cache size is limited to 128 entries which should cover most use cases.
+ Also validates that encoding name only contains safe characters.
+ """
+ # Basic security checks - prevent obvious attacks
+ if not encoding or not isinstance(encoding, str):
+ return False
+
+ # Check length limit (prevent DOS)
+ if len(encoding) > 100:
+ return False
+
+ # Prevent null bytes and control characters that could cause issues
+ if "\x00" in encoding or any(ord(c) < 32 and c not in "\t\n\r" for c in encoding):
+ return False
+
+ # Then check if it's a valid Python codec
+ try:
+ codecs.lookup(encoding)
+ return True
+ except LookupError:
+ return False
class Connection:
@@ -29,6 +151,23 @@ class Connection:
to be used in a context where database operations are required, such as executing queries
and fetching results.
+ The Connection class supports the Python context manager protocol (with statement).
+ When used as a context manager, it will automatically close the connection when
+ exiting the context, ensuring proper resource cleanup.
+
+ Example usage:
+ with connect(connection_string) as conn:
+ cursor = conn.cursor()
+ cursor.execute("INSERT INTO table VALUES (?)", [value])
+ # Connection is automatically closed when exiting the with block
+
+ For long-lived connections, use without context manager:
+ conn = connect(connection_string)
+ try:
+ # Multiple operations...
+ finally:
+ conn.close()
+
Methods:
__init__(database: str) -> None:
connect_to_db() -> None:
@@ -36,17 +175,51 @@ class Connection:
commit() -> None:
rollback() -> None:
close() -> None:
+ __enter__() -> Connection:
+ __exit__() -> None:
+ setencoding(encoding=None, ctype=None) -> None:
+ setdecoding(sqltype, encoding=None, ctype=None) -> None:
+ getdecoding(sqltype) -> dict:
+ set_attr(attribute, value) -> None:
"""
- def __init__(self, connection_str: str = "", autocommit: bool = False, attrs_before: dict = None, **kwargs) -> None:
+ # DB-API 2.0 Exception attributes
+ # These allow users to catch exceptions using connection.Error,
+ # connection.ProgrammingError, etc.
+ Warning = Warning
+ Error = Error
+ InterfaceError = InterfaceError
+ DatabaseError = DatabaseError
+ DataError = DataError
+ OperationalError = OperationalError
+ IntegrityError = IntegrityError
+ InternalError = InternalError
+ ProgrammingError = ProgrammingError
+ NotSupportedError = NotSupportedError
+
+ def __init__(
+ self,
+ connection_str: str = "",
+ autocommit: bool = False,
+ attrs_before: Optional[Dict[int, Union[int, str, bytes]]] = None,
+ timeout: int = 0,
+ **kwargs: Any,
+ ) -> None:
"""
Initialize the connection object with the specified connection string and parameters.
Args:
- - connection_str (str): The connection string to connect to.
- - autocommit (bool): If True, causes a commit to be performed after each SQL statement.
+ connection_str (str): The connection string to connect to.
+ autocommit (bool): If True, causes a commit to be performed after
+ each SQL statement.
+ attrs_before (dict, optional): Dictionary of connection attributes to set before
+ connection establishment. Keys are SQL_ATTR_* constants,
+ and values are their corresponding settings.
+ Use this for attributes that must be set before
+ connecting, such as SQL_ATTR_LOGIN_TIMEOUT,
+ SQL_ATTR_ODBC_CURSORS, and SQL_ATTR_PACKET_SIZE.
+ timeout (int): Login timeout in seconds. 0 means no timeout.
**kwargs: Additional key/value pairs for the connection string.
- Not including below properties since we are driver doesn't support this:
Returns:
None
@@ -55,14 +228,41 @@ def __init__(self, connection_str: str = "", autocommit: bool = False, attrs_bef
ValueError: If the connection string is invalid or connection fails.
This method sets up the initial state for the connection object,
- preparing it for further operations such as connecting to the
+ preparing it for further operations such as connecting to the
database, executing queries, etc.
+
+ Example:
+ >>> # Setting login timeout using attrs_before
+ >>> import mssql_python as ms
+ >>> conn = ms.connect("Server=myserver;Database=mydb",
+ ... attrs_before={ms.SQL_ATTR_LOGIN_TIMEOUT: 30})
"""
- self.connection_str = self._construct_connection_string(
- connection_str, **kwargs
- )
+ self.connection_str = self._construct_connection_string(connection_str, **kwargs)
self._attrs_before = attrs_before or {}
+ # Initialize encoding settings with defaults for Python 3
+ # Python 3 only has str (which is Unicode), so we use utf-16le by default
+ self._encoding_settings = {
+ "encoding": "utf-16le",
+ "ctype": ConstantsDDBC.SQL_WCHAR.value,
+ }
+
+ # Initialize decoding settings with Python 3 defaults
+ self._decoding_settings = {
+ ConstantsDDBC.SQL_CHAR.value: {
+ "encoding": "utf-8",
+ "ctype": ConstantsDDBC.SQL_CHAR.value,
+ },
+ ConstantsDDBC.SQL_WCHAR.value: {
+ "encoding": "utf-16le",
+ "ctype": ConstantsDDBC.SQL_WCHAR.value,
+ },
+ SQL_WMETADATA: {
+ "encoding": "utf-16le",
+ "ctype": ConstantsDDBC.SQL_WCHAR.value,
+ },
+ }
+
# Check if the connection string contains authentication parameters
# This is important for processing the connection string correctly.
# If authentication is specified, it will be processed to handle
@@ -72,60 +272,157 @@ def __init__(self, connection_str: str = "", autocommit: bool = False, attrs_bef
self.connection_str = connection_result[0]
if connection_result[1]:
self._attrs_before.update(connection_result[1])
-
+
self._closed = False
-
- # Using WeakSet which automatically removes cursors when they are no longer in use
+ self._timeout = timeout
+
+ # Using WeakSet which automatically removes cursors when they are no
+ # longer in use
# It is a set that holds weak references to its elements.
- # When an object is only weakly referenced, it can be garbage collected even if it's still in the set.
- # It prevents memory leaks by ensuring that cursors are cleaned up when no longer in use without requiring explicit deletion.
- # TODO: Think and implement scenarios for multi-threaded access to cursors
+ # When an object is only weakly referenced, it can be garbage
+ # collected even if it's still in the set.
+ # It prevents memory leaks by ensuring that cursors are cleaned up
+ # when no longer in use without requiring explicit deletion.
+ # TODO: Think and implement scenarios for multi-threaded access
+ # to cursors
self._cursors = weakref.WeakSet()
+ # Initialize output converters dictionary and its lock for thread safety
+ self._output_converters = {}
+ self._converters_lock = threading.Lock()
+
+ # Initialize encoding/decoding settings lock for thread safety
+ # This lock protects both _encoding_settings and _decoding_settings dictionaries
+ # from concurrent modification. We use a simple Lock (not RLock) because:
+ # - Write operations (setencoding/setdecoding) replace the entire dict atomically
+ # - Read operations (getencoding/getdecoding) return a copy, so they're safe
+ # - No recursive locking is needed in our usage pattern
+ # This is more performant than RLock for the multiple-readers-single-writer pattern
+ self._encoding_lock = threading.Lock()
+
+ # Initialize search escape character
+ self._searchescape = None
+
# Auto-enable pooling if user never called
if not PoolingManager.is_initialized():
PoolingManager.enable()
self._pooling = PoolingManager.is_enabled()
- self._conn = ddbc_bindings.Connection(self.connection_str, self._pooling, self._attrs_before)
+ self._conn = ddbc_bindings.Connection(
+ self.connection_str, self._pooling, self._attrs_before
+ )
self.setautocommit(autocommit)
- def _construct_connection_string(self, connection_str: str = "", **kwargs) -> str:
+ # Register this connection for cleanup before Python shutdown
+ # This ensures ODBC handles are freed in correct order, preventing leaks
+ try:
+ if hasattr(mssql_python, "_register_connection"):
+ mssql_python._register_connection(self)
+ except AttributeError as e:
+ # If registration fails, continue - cleanup will still happen via __del__
+ logger.warning(
+ f"Failed to register connection for shutdown cleanup: {type(e).__name__}: {e}"
+ )
+ except Exception as e:
+ # Catch any other unexpected errors during registration
+ logger.error(
+ f"Unexpected error during connection registration: {type(e).__name__}: {e}"
+ )
+
+ def _construct_connection_string(self, connection_str: str = "", **kwargs: Any) -> str:
"""
- Construct the connection string by concatenating the connection string
- with key/value pairs from kwargs.
+ Construct the connection string by parsing, validating, and merging parameters.
+
+ This method performs a 6-step process:
+ 1. Parse and validate the base connection_str (validates against allowlist)
+ 2. Normalize parameter names (e.g., addr/address -> Server, uid -> UID)
+ 3. Merge kwargs (which override connection_str params after normalization)
+ 4. Build connection string from normalized, merged params
+ 5. Add Driver and APP parameters (always controlled by the driver)
+ 6. Return the final connection string
Args:
connection_str (str): The base connection string.
**kwargs: Additional key/value pairs for the connection string.
Returns:
- str: The constructed connection string.
+ str: The constructed and validated connection string.
"""
- # Add the driver attribute to the connection string
- conn_str = add_driver_to_connection_str(connection_str)
- # Add additional key-value pairs to the connection string
+ # Step 1: Parse base connection string with allowlist validation
+ # The parser validates everything: unknown params, reserved params, duplicates, syntax
+ parser = _ConnectionStringParser(validate_keywords=True)
+ parsed_params = parser._parse(connection_str)
+
+ # Step 2: Normalize parameter names (e.g., addr/address -> Server, uid -> UID)
+ # This handles synonym mapping and deduplication via normalized keys
+ normalized_params = _ConnectionStringParser._normalize_params(
+ parsed_params, warn_rejected=False
+ )
+
+ # Step 3: Process kwargs and merge with normalized_params
+ # kwargs override connection string values (processed after, so they take precedence)
for key, value in kwargs.items():
- if key.lower() == "host" or key.lower() == "server":
- key = "Server"
- elif key.lower() == "user" or key.lower() == "uid":
- key = "Uid"
- elif key.lower() == "password" or key.lower() == "pwd":
- key = "Pwd"
- elif key.lower() == "database":
- key = "Database"
- elif key.lower() == "encrypt":
- key = "Encrypt"
- elif key.lower() == "trust_server_certificate":
- key = "TrustServerCertificate"
+ normalized_key = _ConnectionStringParser.normalize_key(key)
+ if normalized_key:
+ # Driver and APP are reserved - raise error if user tries to set them
+ if normalized_key in _RESERVED_PARAMETERS:
+ raise ValueError(
+ f"Connection parameter '{key}' is reserved and controlled by the driver. "
+ f"It cannot be set by the user."
+ )
+ # kwargs override any existing values from connection string
+ normalized_params[normalized_key] = str(value)
else:
- continue
- conn_str += f"{key}={value};"
+ logger.warning(f"Ignoring unknown connection parameter from kwargs: {key}")
- log('info', "Final connection string: %s", sanitize_connection_string(conn_str))
+ # Step 4: Build connection string with merged params
+ builder = _ConnectionStringBuilder(normalized_params)
+
+ # Step 5: Add Driver and APP parameters (always controlled by the driver)
+ # These maintain existing behavior: Driver is always hardcoded, APP is always MSSQL-Python
+ builder.add_param("Driver", "ODBC Driver 18 for SQL Server")
+ builder.add_param("APP", "MSSQL-Python")
+
+ # Step 6: Build final string
+ conn_str = builder.build()
+
+ logger.info("Final connection string: %s", sanitize_connection_string(conn_str))
return conn_str
-
+
+ @property
+ def timeout(self) -> int:
+ """
+ Get the current query timeout setting in seconds.
+
+ Returns:
+ int: The timeout value in seconds. Zero means no timeout (wait indefinitely).
+ """
+ return self._timeout
+
+ @timeout.setter
+ def timeout(self, value: int) -> None:
+ """
+ Set the query timeout for all operations performed by this connection.
+
+ Args:
+ value (int): The timeout value in seconds. Zero means no timeout.
+
+ Returns:
+ None
+
+ Note:
+ This timeout applies to all cursors created from this connection.
+ It cannot be changed for individual cursors or SQL statements.
+ If a query timeout occurs, an OperationalError exception will be raised.
+ """
+ if not isinstance(value, int):
+ raise TypeError("Timeout must be an integer")
+ if value < 0:
+ raise ValueError("Timeout cannot be negative")
+ self._timeout = value
+ logger.info(f"Query timeout set to {value} seconds")
+
@property
def autocommit(self) -> bool:
"""
@@ -145,9 +442,25 @@ def autocommit(self, value: bool) -> None:
None
"""
self.setautocommit(value)
- log('info', "Autocommit mode set to %s.", value)
+ logger.info("Autocommit mode set to %s.", value)
+
+ @property
+ def closed(self) -> bool:
+ """
+ Returns True if the connection is closed, False otherwise.
+
+ This property indicates whether close() was explicitly called on
+ the connection. Note that this does not indicate whether the
+ connection is healthy/alive - if a timeout or network issue breaks
+ the connection, closed would still be False until close() is
+ explicitly called.
- def setautocommit(self, value: bool = True) -> None:
+ Returns:
+ bool: True if the connection is closed, False otherwise.
+ """
+ return self._closed
+
+ def setautocommit(self, value: bool = False) -> None:
"""
Set the autocommit mode of the connection.
Args:
@@ -159,6 +472,424 @@ def setautocommit(self, value: bool = True) -> None:
"""
self._conn.set_autocommit(value)
+ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = None) -> None:
+ """
+ Sets the text encoding for SQL statements and text parameters.
+
+ Since Python 3 only has str (which is Unicode), this method configures
+ how text is encoded when sending to the database.
+
+ Args:
+ encoding (str, optional): The encoding to use. This must be a valid Python
+ encoding that converts text to bytes. If None, defaults to 'utf-16le'.
+ ctype (int, optional): The C data type to use when passing data:
+ SQL_CHAR or SQL_WCHAR. If not provided, SQL_WCHAR is used for
+ UTF-16 variants (see UTF16_ENCODINGS constant). SQL_CHAR is used
+ for all other encodings.
+
+ Returns:
+ None
+
+ Raises:
+ ProgrammingError: If the encoding is not valid or not supported.
+ InterfaceError: If the connection is closed.
+
+ Example:
+ # For databases that only communicate with UTF-8
+ cnxn.setencoding(encoding='utf-8')
+
+ # For explicitly using SQL_CHAR
+ cnxn.setencoding(encoding='utf-8', ctype=mssql_python.SQL_CHAR)
+ """
+ logger.debug(
+ "setencoding: Configuring encoding=%s, ctype=%s",
+ str(encoding) if encoding else "default",
+ str(ctype) if ctype else "auto",
+ )
+ if self._closed:
+ logger.debug("setencoding: Connection is closed")
+ raise InterfaceError(
+ driver_error="Connection is closed",
+ ddbc_error="Connection is closed",
+ )
+
+ # Set default encoding if not provided
+ if encoding is None:
+ encoding = "utf-16le"
+ logger.debug("setencoding: Using default encoding=utf-16le")
+
+ # Validate encoding using cached validation for better performance
+ if not _validate_encoding(encoding):
+ # Log the sanitized encoding for security
+ logger.warning(
+ "Invalid encoding attempted: %s",
+ sanitize_user_input(str(encoding)),
+ )
+ raise ProgrammingError(
+ driver_error=f"Unsupported encoding: {encoding}",
+ ddbc_error=f"The encoding '{encoding}' is not supported by Python",
+ )
+
+ # Normalize encoding to casefold for more robust Unicode handling
+ encoding = encoding.casefold()
+ logger.debug("setencoding: Encoding normalized to %s", encoding)
+
+ # Early validation if ctype is already specified as SQL_WCHAR
+ if ctype == ConstantsDDBC.SQL_WCHAR.value:
+ _validate_utf16_wchar_compatibility(encoding, ctype, "SQL_WCHAR")
+
+ # Set default ctype based on encoding if not provided
+ if ctype is None:
+ if encoding in UTF16_ENCODINGS:
+ ctype = ConstantsDDBC.SQL_WCHAR.value
+ logger.debug("setencoding: Auto-selected SQL_WCHAR for UTF-16")
+ else:
+ ctype = ConstantsDDBC.SQL_CHAR.value
+ logger.debug("setencoding: Auto-selected SQL_CHAR for non-UTF-16")
+
+ # Validate ctype
+ valid_ctypes = [ConstantsDDBC.SQL_CHAR.value, ConstantsDDBC.SQL_WCHAR.value]
+ if ctype not in valid_ctypes:
+ # Log the sanitized ctype for security
+ logger.warning(
+ "Invalid ctype attempted: %s",
+ sanitize_user_input(str(ctype)),
+ )
+ raise ProgrammingError(
+ driver_error=f"Invalid ctype: {ctype}",
+ ddbc_error=(
+ f"ctype must be SQL_CHAR ({ConstantsDDBC.SQL_CHAR.value}) or "
+ f"SQL_WCHAR ({ConstantsDDBC.SQL_WCHAR.value})"
+ ),
+ )
+
+ # Final validation: SQL_WCHAR ctype only supports UTF-16 encodings (without BOM)
+ if ctype == ConstantsDDBC.SQL_WCHAR.value:
+ _validate_utf16_wchar_compatibility(encoding, ctype, "SQL_WCHAR")
+
+ # Store the encoding settings (thread-safe with lock)
+ with self._encoding_lock:
+ self._encoding_settings = {"encoding": encoding, "ctype": ctype}
+
+ # Log with sanitized values for security
+ logger.info(
+ "Text encoding set to %s with ctype %s",
+ sanitize_user_input(encoding),
+ sanitize_user_input(str(ctype)),
+ )
+
+ def getencoding(self) -> Dict[str, Union[str, int]]:
+ """
+ Gets the current text encoding settings (thread-safe).
+
+ Returns:
+ dict: A dictionary containing 'encoding' and 'ctype' keys.
+
+ Raises:
+ InterfaceError: If the connection is closed.
+
+ Example:
+ settings = cnxn.getencoding()
+ print(f"Current encoding: {settings['encoding']}")
+ print(f"Current ctype: {settings['ctype']}")
+
+ Note:
+ This method is thread-safe and can be called from multiple threads concurrently.
+ Returns a copy of the settings to prevent external modification.
+ """
+ if self._closed:
+ raise InterfaceError(
+ driver_error="Connection is closed",
+ ddbc_error="Connection is closed",
+ )
+
+ # Thread-safe read with lock to prevent race conditions
+ with self._encoding_lock:
+ return self._encoding_settings.copy()
+
+ def setdecoding(
+ self, sqltype: int, encoding: Optional[str] = None, ctype: Optional[int] = None
+ ) -> None:
+ """
+ Sets the text decoding used when reading SQL_CHAR and SQL_WCHAR from the database.
+
+ This method configures how text data is decoded when reading from the database.
+ In Python 3, all text is Unicode (str), so this primarily affects the encoding
+ used to decode bytes from the database.
+
+ Args:
+ sqltype (int): The SQL type being configured: SQL_CHAR, SQL_WCHAR, or SQL_WMETADATA.
+ SQL_WMETADATA is a special flag for configuring column name decoding.
+ encoding (str, optional): The Python encoding to use when decoding the data.
+ If None, uses default encoding based on sqltype.
+ ctype (int, optional): The C data type to request from SQLGetData:
+ SQL_CHAR or SQL_WCHAR. If None, uses default based on encoding.
+
+ Returns:
+ None
+
+ Raises:
+ ProgrammingError: If the sqltype, encoding, or ctype is invalid.
+ InterfaceError: If the connection is closed.
+
+ Example:
+ # Configure SQL_CHAR to use UTF-8 decoding
+ cnxn.setdecoding(mssql_python.SQL_CHAR, encoding='utf-8')
+
+ # Configure column metadata decoding
+ cnxn.setdecoding(mssql_python.SQL_WMETADATA, encoding='utf-16le')
+
+ # Use explicit ctype
+ cnxn.setdecoding(mssql_python.SQL_WCHAR, encoding='utf-16le',
+ ctype=mssql_python.SQL_WCHAR)
+ """
+ if self._closed:
+ raise InterfaceError(
+ driver_error="Connection is closed",
+ ddbc_error="Connection is closed",
+ )
+
+ # Validate sqltype
+ valid_sqltypes = [
+ ConstantsDDBC.SQL_CHAR.value,
+ ConstantsDDBC.SQL_WCHAR.value,
+ SQL_WMETADATA,
+ ]
+ if sqltype not in valid_sqltypes:
+ logger.warning(
+ "Invalid sqltype attempted: %s",
+ sanitize_user_input(str(sqltype)),
+ )
+ raise ProgrammingError(
+ driver_error=f"Invalid sqltype: {sqltype}",
+ ddbc_error=(
+ f"sqltype must be SQL_CHAR ({ConstantsDDBC.SQL_CHAR.value}), "
+ f"SQL_WCHAR ({ConstantsDDBC.SQL_WCHAR.value}), or "
+ f"SQL_WMETADATA ({SQL_WMETADATA})"
+ ),
+ )
+
+ # Set default encoding based on sqltype if not provided
+ if encoding is None:
+ if sqltype == ConstantsDDBC.SQL_CHAR.value:
+ encoding = "utf-8" # Default for SQL_CHAR in Python 3
+ else: # SQL_WCHAR or SQL_WMETADATA
+ encoding = "utf-16le" # Default for SQL_WCHAR in Python 3
+
+ # Validate encoding using cached validation for better performance
+ if not _validate_encoding(encoding):
+ logger.warning(
+ "Invalid encoding attempted: %s",
+ sanitize_user_input(str(encoding)),
+ )
+ raise ProgrammingError(
+ driver_error=f"Unsupported encoding: {encoding}",
+ ddbc_error=f"The encoding '{encoding}' is not supported by Python",
+ )
+
+ # Normalize encoding to lowercase for consistency
+ encoding = encoding.lower()
+
+ # Validate SQL_WCHAR encoding compatibility
+ if sqltype == ConstantsDDBC.SQL_WCHAR.value:
+ _validate_utf16_wchar_compatibility(encoding, sqltype, "SQL_WCHAR sqltype")
+
+ # SQL_WMETADATA can use any valid encoding (UTF-8, UTF-16, etc.)
+ # No restriction needed here - let users configure as needed
+
+ # Set default ctype based on encoding if not provided
+ if ctype is None:
+ if encoding in UTF16_ENCODINGS:
+ ctype = ConstantsDDBC.SQL_WCHAR.value
+ else:
+ ctype = ConstantsDDBC.SQL_CHAR.value
+
+ # Validate ctype
+ valid_ctypes = [ConstantsDDBC.SQL_CHAR.value, ConstantsDDBC.SQL_WCHAR.value]
+ if ctype not in valid_ctypes:
+ logger.warning(
+ "Invalid ctype attempted: %s",
+ sanitize_user_input(str(ctype)),
+ )
+ raise ProgrammingError(
+ driver_error=f"Invalid ctype: {ctype}",
+ ddbc_error=(
+ f"ctype must be SQL_CHAR ({ConstantsDDBC.SQL_CHAR.value}) or "
+ f"SQL_WCHAR ({ConstantsDDBC.SQL_WCHAR.value})"
+ ),
+ )
+
+ # Validate SQL_WCHAR ctype encoding compatibility
+ if ctype == ConstantsDDBC.SQL_WCHAR.value:
+ _validate_utf16_wchar_compatibility(encoding, ctype, "SQL_WCHAR ctype")
+
+ # Store the decoding settings for the specified sqltype (thread-safe with lock)
+ with self._encoding_lock:
+ self._decoding_settings[sqltype] = {"encoding": encoding, "ctype": ctype}
+
+ # Log with sanitized values for security
+ sqltype_name = {
+ ConstantsDDBC.SQL_CHAR.value: "SQL_CHAR",
+ ConstantsDDBC.SQL_WCHAR.value: "SQL_WCHAR",
+ SQL_WMETADATA: "SQL_WMETADATA",
+ }.get(sqltype, str(sqltype))
+
+ logger.info(
+ "Text decoding set for %s to %s with ctype %s",
+ sqltype_name,
+ sanitize_user_input(encoding),
+ sanitize_user_input(str(ctype)),
+ )
+
+ def getdecoding(self, sqltype: int) -> Dict[str, Union[str, int]]:
+ """
+ Gets the current text decoding settings for the specified SQL type (thread-safe).
+
+ Args:
+ sqltype (int): The SQL type to get settings for: SQL_CHAR, SQL_WCHAR, or SQL_WMETADATA.
+
+ Returns:
+ dict: A dictionary containing 'encoding' and 'ctype' keys for the specified sqltype.
+
+ Raises:
+ ProgrammingError: If the sqltype is invalid.
+ InterfaceError: If the connection is closed.
+
+ Example:
+ settings = cnxn.getdecoding(mssql_python.SQL_CHAR)
+ print(f"SQL_CHAR encoding: {settings['encoding']}")
+ print(f"SQL_CHAR ctype: {settings['ctype']}")
+
+ Note:
+ This method is thread-safe and can be called from multiple threads concurrently.
+ Returns a copy of the settings to prevent external modification.
+ """
+ if self._closed:
+ raise InterfaceError(
+ driver_error="Connection is closed",
+ ddbc_error="Connection is closed",
+ )
+
+ # Validate sqltype
+ valid_sqltypes = [
+ ConstantsDDBC.SQL_CHAR.value,
+ ConstantsDDBC.SQL_WCHAR.value,
+ SQL_WMETADATA,
+ ]
+ if sqltype not in valid_sqltypes:
+ raise ProgrammingError(
+ driver_error=f"Invalid sqltype: {sqltype}",
+ ddbc_error=(
+ f"sqltype must be SQL_CHAR ({ConstantsDDBC.SQL_CHAR.value}), "
+ f"SQL_WCHAR ({ConstantsDDBC.SQL_WCHAR.value}), or "
+ f"SQL_WMETADATA ({SQL_WMETADATA})"
+ ),
+ )
+
+ # Thread-safe read with lock to prevent race conditions
+ with self._encoding_lock:
+ return self._decoding_settings[sqltype].copy()
+
+ def set_attr(self, attribute: int, value: Union[int, str, bytes, bytearray]) -> None:
+ """
+ Set a connection attribute.
+
+ This method sets a connection attribute using SQLSetConnectAttr.
+ It provides pyodbc-compatible functionality for configuring connection
+ behavior such as autocommit mode, transaction isolation level, and
+ connection timeouts.
+
+ Args:
+ attribute (int): The connection attribute to set. Should be one of the
+ SQL_ATTR_* constants (e.g., SQL_ATTR_AUTOCOMMIT,
+ SQL_ATTR_TXN_ISOLATION).
+ value: The value to set for the attribute. Can be an integer, string,
+ bytes, or bytearray depending on the attribute type.
+
+ Raises:
+ InterfaceError: If the connection is closed or attribute is invalid.
+ ProgrammingError: If the value type or range is invalid.
+ ProgrammingError: If the attribute cannot be set after connection.
+
+ Example:
+ >>> conn.set_attr(SQL_ATTR_TXN_ISOLATION, SQL_TXN_READ_COMMITTED)
+
+ Note:
+ Some attributes (like SQL_ATTR_LOGIN_TIMEOUT, SQL_ATTR_ODBC_CURSORS, and
+ SQL_ATTR_PACKET_SIZE) can only be set before connection establishment and
+ must be provided in the attrs_before parameter when creating the connection.
+ Attempting to set these attributes after connection will raise a ProgrammingError.
+ """
+ if self._closed:
+ raise InterfaceError(
+ "Cannot set attribute on closed connection", "Connection is closed"
+ )
+
+ # Use the integrated validation helper function with connection state
+ is_valid, error_message, sanitized_attr, sanitized_val = validate_attribute_value(
+ attribute, value, is_connected=True
+ )
+
+ if not is_valid:
+ # Use the already sanitized values for logging
+ logger.debug(
+ "warning",
+ f"Invalid attribute or value: {sanitized_attr}={sanitized_val}, {error_message}",
+ )
+ raise ProgrammingError(
+ driver_error=f"Invalid attribute or value: {error_message}",
+ ddbc_error=error_message,
+ )
+
+ # Log with sanitized values
+ logger.debug(f"Setting connection attribute: {sanitized_attr}={sanitized_val}")
+
+ try:
+ # Call the underlying C++ method
+ self._conn.set_attr(attribute, value)
+ logger.info(f"Connection attribute {sanitized_attr} set successfully")
+
+ except Exception as e:
+ error_msg = f"Failed to set connection attribute {sanitized_attr}: {str(e)}"
+
+ # Determine appropriate exception type based on error content
+ error_str = str(e).lower()
+ if "invalid" in error_str or "unsupported" in error_str or "cast" in error_str:
+ logger.error(error_msg)
+ raise InterfaceError(error_msg, str(e)) from e
+ logger.error(error_msg)
+ raise ProgrammingError(error_msg, str(e)) from e
+
+ @property
+ def searchescape(self) -> str:
+ """
+ The ODBC search pattern escape character, as returned by
+ SQLGetInfo(SQL_SEARCH_PATTERN_ESCAPE), used to escape special characters
+ such as '%' and '_' in LIKE clauses. These are driver specific.
+
+ Returns:
+ str: The search pattern escape character (usually '\' or another character)
+ """
+ if not hasattr(self, "_searchescape") or self._searchescape is None:
+ try:
+ escape_char = self.getinfo(GetInfoConstants.SQL_SEARCH_PATTERN_ESCAPE.value)
+ # Some drivers might return this as an integer memory address
+ # or other non-string format, so ensure we have a string
+ if not isinstance(escape_char, str):
+ # Default to backslash if not a string
+ escape_char = "\\"
+ self._searchescape = escape_char
+ except Exception as e:
+ # Log the exception for debugging, but do not expose sensitive info
+ logger.debug(
+ "warning",
+ "Failed to retrieve search escape character, using default '\\'. "
+ "Exception: %s",
+ type(e).__name__,
+ )
+ self._searchescape = "\\"
+ return self._searchescape
+
def cursor(self) -> Cursor:
"""
Return a new Cursor object using the connection.
@@ -174,18 +905,535 @@ def cursor(self) -> Cursor:
DatabaseError: If there is an error while creating the cursor.
InterfaceError: If there is an error related to the database interface.
"""
- """Return a new Cursor object using the connection."""
+ logger.debug(
+ "cursor: Creating new cursor - timeout=%d, total_cursors=%d",
+ self._timeout,
+ len(self._cursors),
+ )
if self._closed:
+ logger.error("cursor: Cannot create cursor on closed connection")
# raise InterfaceError
raise InterfaceError(
driver_error="Cannot create cursor on closed connection",
ddbc_error="Cannot create cursor on closed connection",
)
- cursor = Cursor(self)
+ cursor = Cursor(self, timeout=self._timeout)
self._cursors.add(cursor) # Track the cursor
+ logger.debug("cursor: Cursor created successfully - total_cursors=%d", len(self._cursors))
return cursor
+ def add_output_converter(self, sqltype: int, func: Callable[[Any], Any]) -> None:
+ """
+ Register an output converter function that will be called whenever a value
+ with the given SQL type is read from the database.
+
+ Thread-safe implementation that protects the converters dictionary with a lock.
+
+ ⚠️ WARNING: Registering an output converter will cause the supplied Python function
+ to be executed on every matching database value. Do not register converters from
+ untrusted sources, as this can result in arbitrary code execution and security
+ vulnerabilities. This API should never be exposed to untrusted or external input.
+
+ Args:
+ sqltype (int): The integer SQL type value to convert, which can be one of the
+ defined standard constants (e.g. SQL_VARCHAR) or a database-specific
+ value (e.g. -151 for the SQL Server 2008 geometry data type).
+ func (callable): The converter function which will be called with a single parameter,
+ the value, and should return the converted value. If the value is NULL
+ then the parameter passed to the function will be None, otherwise it
+ will be a bytes object.
+
+ Returns:
+ None
+ """
+ with self._converters_lock:
+ self._output_converters[sqltype] = func
+ # Pass to the underlying connection if native implementation supports it
+ if hasattr(self._conn, "add_output_converter"):
+ self._conn.add_output_converter(sqltype, func)
+ logger.info(f"Added output converter for SQL type {sqltype}")
+
+ def get_output_converter(self, sqltype: Union[int, type]) -> Optional[Callable[[Any], Any]]:
+ """
+ Get the output converter function for the specified SQL type.
+
+ Thread-safe implementation that protects the converters dictionary with a lock.
+
+ Args:
+ sqltype (int or type): The SQL type value or Python type to get the converter for
+
+ Returns:
+ callable or None: The converter function or None if no converter is registered
+
+ Note:
+ ⚠️ The returned converter function will be executed on database values. Only use
+ converters from trusted sources.
+ """
+ with self._converters_lock:
+ return self._output_converters.get(sqltype)
+
+ def remove_output_converter(self, sqltype: Union[int, type]) -> None:
+ """
+ Remove the output converter function for the specified SQL type.
+
+ Thread-safe implementation that protects the converters dictionary with a lock.
+
+ Args:
+ sqltype (int or type): The SQL type value to remove the converter for
+
+ Returns:
+ None
+ """
+ with self._converters_lock:
+ if sqltype in self._output_converters:
+ del self._output_converters[sqltype]
+ # Pass to the underlying connection if native implementation supports it
+ if hasattr(self._conn, "remove_output_converter"):
+ self._conn.remove_output_converter(sqltype)
+ logger.info(f"Removed output converter for SQL type {sqltype}")
+
+ def clear_output_converters(self) -> None:
+ """
+ Remove all output converter functions.
+
+ Thread-safe implementation that protects the converters dictionary with a lock.
+
+ Returns:
+ None
+ """
+ with self._converters_lock:
+ self._output_converters.clear()
+ # Pass to the underlying connection if native implementation supports it
+ if hasattr(self._conn, "clear_output_converters"):
+ self._conn.clear_output_converters()
+ logger.info("Cleared all output converters")
+
+ def execute(self, sql: str, *args: Any) -> Cursor:
+ """
+ Creates a new Cursor object, calls its execute method, and returns the new cursor.
+
+ This is a convenience method that is not part of the DB API. Since a new Cursor
+ is allocated by each call, this should not be used if more than one SQL statement
+ needs to be executed on the connection.
+
+ Note on cursor lifecycle management:
+ - Each call creates a new cursor that is tracked by the connection's internal WeakSet
+ - Cursors are automatically dereferenced/closed when they go out of scope
+ - For long-running applications or loops, explicitly call cursor.close() when done
+ to release resources immediately rather than waiting for garbage collection
+
+ Args:
+ sql (str): The SQL query to execute.
+ *args: Parameters to be passed to the query.
+
+ Returns:
+ Cursor: A new cursor with the executed query.
+
+ Raises:
+ DatabaseError: If there is an error executing the query.
+ InterfaceError: If the connection is closed.
+
+ Example:
+ # Automatic cleanup (cursor goes out of scope after the operation)
+ row = connection.execute("SELECT name FROM users WHERE id = ?", 123).fetchone()
+
+ # Manual cleanup for more explicit resource management
+ cursor = connection.execute("SELECT * FROM large_table")
+ try:
+ # Use cursor...
+ rows = cursor.fetchall()
+ finally:
+ cursor.close() # Explicitly release resources
+ """
+ cursor = self.cursor()
+ try:
+ # Add the cursor to our tracking set BEFORE execution
+ # This ensures it's tracked even if execution fails
+ self._cursors.add(cursor)
+
+ # Now execute the query
+ cursor.execute(sql, *args)
+ return cursor
+ except Exception:
+ # If execution fails, close the cursor to avoid leaking resources
+ cursor.close()
+ raise
+
+ def batch_execute(
+ self,
+ statements: List[str],
+ params: Optional[List[Union[None, Any, Tuple[Any, ...], List[Any]]]] = None,
+ reuse_cursor: Optional[Cursor] = None,
+ auto_close: bool = False,
+ ) -> Tuple[List[Union[List["Row"], int]], Cursor]:
+ """
+ Execute multiple SQL statements efficiently using a single cursor.
+
+ This method allows executing multiple SQL statements in sequence using a single
+ cursor, which is more efficient than creating a new cursor for each statement.
+
+ Args:
+ statements (list): List of SQL statements to execute
+ params (list, optional): List of parameter sets corresponding to statements.
+ Each item can be None, a single parameter, or a sequence of parameters.
+ If None, no parameters will be used for any statement.
+ reuse_cursor (Cursor, optional): Existing cursor to reuse instead of creating a new one.
+ If None, a new cursor will be created.
+ auto_close (bool): Whether to close the cursor after execution if a new one was created.
+ Defaults to False. Has no effect if reuse_cursor is provided.
+
+ Returns:
+ tuple: (results, cursor) where:
+ - results is a list of execution results, one for each statement
+ - cursor is the cursor used for execution (useful if you want to keep using it)
+
+ Raises:
+ TypeError: If statements is not a list or if params is provided but not a list
+ ValueError: If params is provided but has different length than statements
+ DatabaseError: If there is an error executing any of the statements
+ InterfaceError: If the connection is closed
+
+ Example:
+ # Execute multiple statements with a single cursor
+ results, _ = conn.batch_execute([
+ "INSERT INTO users VALUES (?, ?)",
+ "UPDATE stats SET count = count + 1",
+ "SELECT * FROM users"
+ ], [
+ (1, "user1"),
+ None,
+ None
+ ])
+
+ # Last result contains the SELECT results
+ for row in results[-1]:
+ print(row)
+
+ # Reuse an existing cursor
+ my_cursor = conn.cursor()
+ results, _ = conn.batch_execute([
+ "SELECT * FROM table1",
+ "SELECT * FROM table2"
+ ], reuse_cursor=my_cursor)
+
+ # Cursor remains open for further use
+ my_cursor.execute("SELECT * FROM table3")
+ """
+ # Validate inputs
+ if not isinstance(statements, list):
+ raise TypeError("statements must be a list of SQL statements")
+
+ if params is not None:
+ if not isinstance(params, list):
+ raise TypeError("params must be a list of parameter sets")
+ if len(params) != len(statements):
+ raise ValueError("params list must have the same length as statements list")
+ else:
+ # Create a list of None values with the same length as statements
+ params = [None] * len(statements)
+
+ # Determine which cursor to use
+ is_new_cursor = reuse_cursor is None
+ cursor = self.cursor() if is_new_cursor else reuse_cursor
+
+ # Execute statements and collect results
+ results = []
+ try:
+ for i, (stmt, param) in enumerate(zip(statements, params)):
+ try:
+ # Execute the statement with parameters if provided
+ if param is not None:
+ cursor.execute(stmt, param)
+ else:
+ cursor.execute(stmt)
+
+ # For SELECT statements, fetch all rows
+ # For other statements, get the row count
+ if cursor.description is not None:
+ # This is a SELECT statement or similar that returns rows
+ results.append(cursor.fetchall())
+ else:
+ # This is an INSERT, UPDATE, DELETE or similar that doesn't return rows
+ results.append(cursor.rowcount)
+
+ logger.debug(f"Executed batch statement {i+1}/{len(statements)}")
+
+ except Exception as e:
+ # If a statement fails, include statement context in the error
+ logger.debug(
+ "error",
+ f"Error executing statement {i+1}/{len(statements)}: {e}",
+ )
+ raise
+
+ except Exception:
+ # If an error occurs and auto_close is True, close the cursor
+ if auto_close:
+ try:
+ # Close the cursor regardless of whether it's reused or new
+ cursor.close()
+ logger.debug(
+ "debug",
+ "Automatically closed cursor after batch execution error",
+ )
+ except Exception as close_err:
+ logger.debug(
+ "warning",
+ f"Error closing cursor after execution failure: {close_err}",
+ )
+ # Re-raise the original exception
+ raise
+
+ # Close the cursor if requested and we created a new one
+ if is_new_cursor and auto_close:
+ cursor.close()
+ logger.debug("Automatically closed cursor after batch execution")
+
+ return results, cursor
+
+ def getinfo(self, info_type: int) -> Union[str, int, bool, None]:
+ """
+ Return general information about the driver and data source.
+
+ Args:
+ info_type (int): The type of information to return. See the ODBC
+ SQLGetInfo documentation for the supported values.
+
+ Returns:
+ The requested information. The type of the returned value depends
+ on the information requested. It will be a string, integer, or boolean.
+
+ Raises:
+ DatabaseError: If there is an error retrieving the information.
+ InterfaceError: If the connection is closed.
+ """
+ if self._closed:
+ raise InterfaceError(
+ driver_error="Cannot get info on closed connection",
+ ddbc_error="Cannot get info on closed connection",
+ )
+
+ # Check that info_type is an integer
+ if not isinstance(info_type, int):
+ raise ValueError(f"info_type must be an integer, got {type(info_type).__name__}")
+
+ # Check for invalid info_type values
+ if info_type < 0:
+ logger.debug(
+ "warning",
+ f"Invalid info_type: {info_type}. Must be a positive integer.",
+ )
+ return None
+
+ # Get the raw result from the C++ layer
+ try:
+ raw_result = self._conn.get_info(info_type)
+ except Exception as e: # pylint: disable=broad-exception-caught
+ # Log the error and return None for invalid info types
+ logger.warning(f"getinfo({info_type}) failed: {e}")
+ return None
+
+ if raw_result is None:
+ return None
+
+ # Check if the result is already a simple type
+ if isinstance(raw_result, (str, int, bool)):
+ return raw_result
+
+ # If it's a dictionary with data and metadata
+ if isinstance(raw_result, dict) and "data" in raw_result:
+ # Extract data and metadata from the raw result
+ data = raw_result["data"]
+ length = raw_result["length"]
+
+ # Debug logging to understand the issue better
+ logger.debug(
+ "debug",
+ f"getinfo: info_type={info_type}, length={length}, data_type={type(data)}",
+ )
+
+ # Define constants for different return types
+ # String types - these return strings in pyodbc
+ string_type_constants = {
+ GetInfoConstants.SQL_DATA_SOURCE_NAME.value,
+ GetInfoConstants.SQL_DRIVER_NAME.value,
+ GetInfoConstants.SQL_DRIVER_VER.value,
+ GetInfoConstants.SQL_SERVER_NAME.value,
+ GetInfoConstants.SQL_USER_NAME.value,
+ GetInfoConstants.SQL_DRIVER_ODBC_VER.value,
+ GetInfoConstants.SQL_IDENTIFIER_QUOTE_CHAR.value,
+ GetInfoConstants.SQL_CATALOG_NAME_SEPARATOR.value,
+ GetInfoConstants.SQL_CATALOG_TERM.value,
+ GetInfoConstants.SQL_SCHEMA_TERM.value,
+ GetInfoConstants.SQL_TABLE_TERM.value,
+ GetInfoConstants.SQL_KEYWORDS.value,
+ GetInfoConstants.SQL_PROCEDURE_TERM.value,
+ GetInfoConstants.SQL_SPECIAL_CHARACTERS.value,
+ GetInfoConstants.SQL_SEARCH_PATTERN_ESCAPE.value,
+ }
+
+ # Boolean 'Y'/'N' types
+ yn_type_constants = {
+ GetInfoConstants.SQL_ACCESSIBLE_PROCEDURES.value,
+ GetInfoConstants.SQL_ACCESSIBLE_TABLES.value,
+ GetInfoConstants.SQL_DATA_SOURCE_READ_ONLY.value,
+ GetInfoConstants.SQL_EXPRESSIONS_IN_ORDERBY.value,
+ GetInfoConstants.SQL_LIKE_ESCAPE_CLAUSE.value,
+ GetInfoConstants.SQL_MULTIPLE_ACTIVE_TXN.value,
+ GetInfoConstants.SQL_NEED_LONG_DATA_LEN.value,
+ GetInfoConstants.SQL_PROCEDURES.value,
+ }
+
+ # Numeric type constants that return integers
+ numeric_type_constants = {
+ GetInfoConstants.SQL_MAX_COLUMN_NAME_LEN.value,
+ GetInfoConstants.SQL_MAX_TABLE_NAME_LEN.value,
+ GetInfoConstants.SQL_MAX_SCHEMA_NAME_LEN.value,
+ GetInfoConstants.SQL_MAX_CATALOG_NAME_LEN.value,
+ GetInfoConstants.SQL_MAX_IDENTIFIER_LEN.value,
+ GetInfoConstants.SQL_MAX_STATEMENT_LEN.value,
+ GetInfoConstants.SQL_MAX_DRIVER_CONNECTIONS.value,
+ GetInfoConstants.SQL_NUMERIC_FUNCTIONS.value,
+ GetInfoConstants.SQL_STRING_FUNCTIONS.value,
+ GetInfoConstants.SQL_DATETIME_FUNCTIONS.value,
+ GetInfoConstants.SQL_TXN_CAPABLE.value,
+ GetInfoConstants.SQL_DEFAULT_TXN_ISOLATION.value,
+ GetInfoConstants.SQL_CURSOR_COMMIT_BEHAVIOR.value,
+ }
+
+ # Determine the type of information we're dealing with
+ is_string_type = (
+ info_type > INFO_TYPE_STRING_THRESHOLD or info_type in string_type_constants
+ )
+ is_yn_type = info_type in yn_type_constants
+ is_numeric_type = info_type in numeric_type_constants
+
+ # Process the data based on type
+ if is_string_type:
+ # For string data, ensure we properly handle the byte array
+ if isinstance(data, bytes):
+ # Make sure we use the correct amount of data based on length
+ actual_data = data[:length]
+
+ # SQLGetInfoW returns UTF-16LE encoded strings (wide-character ODBC API)
+ # Try UTF-16LE first (expected), then UTF-8 as fallback
+ for encoding in ("utf-16-le", "utf-8"):
+ try:
+ return actual_data.decode(encoding).rstrip("\0")
+ except UnicodeDecodeError:
+ continue
+
+ # All decodings failed
+ logger.debug(
+ "Failed to decode string in getinfo (info_type=%d) with supported encodings. "
+ "Returning None to avoid silent corruption.",
+ info_type,
+ )
+ return None
+ else:
+ # If it's not bytes, return as is
+ return data
+ elif is_yn_type:
+ # For Y/N types, pyodbc returns a string 'Y' or 'N'
+ if isinstance(data, bytes) and length >= 1:
+ byte_val = data[0]
+ if byte_val in (b"Y"[0], b"y"[0], 1):
+ return "Y"
+ return "N"
+ # If it's not a byte or we can't determine, default to 'N'
+ return "N"
+ elif is_numeric_type:
+ # Handle numeric types based on length
+ if isinstance(data, bytes):
+ # Map byte length → signed int size
+ int_sizes = {
+ 1: lambda d: int(d[0]),
+ 2: lambda d: int.from_bytes(d[:2], "little", signed=True),
+ 4: lambda d: int.from_bytes(d[:4], "little", signed=True),
+ 8: lambda d: int.from_bytes(d[:8], "little", signed=True),
+ }
+
+ # Direct numeric conversion if supported length
+ if length in int_sizes:
+ result = int_sizes[length](data)
+ return int(result)
+
+ # Helper: check if all chars are digits
+ def is_digit_bytes(b: bytes) -> bool:
+ return all(c in b"0123456789" for c in b)
+
+ # Helper: check if bytes are ASCII-printable or NUL padded
+ def is_printable_bytes(b: bytes) -> bool:
+ return all(32 <= c <= 126 or c == 0 for c in b)
+
+ chunk = data[:length]
+
+ # Try interpret as integer string
+ if is_digit_bytes(chunk):
+ return int(chunk)
+
+ # Try decode as ASCII/UTF-8 string
+ if is_printable_bytes(chunk):
+ str_val = chunk.decode("utf-8", errors="replace").rstrip("\0")
+ return int(str_val) if str_val.isdigit() else str_val
+
+ # For 16-bit values that might be returned for max lengths
+ if length == 2:
+ return int.from_bytes(data[:2], "little", signed=True)
+
+ # For 32-bit values (common for bitwise flags)
+ if length == 4:
+ return int.from_bytes(data[:4], "little", signed=True)
+
+ # Fallback: try to convert to int if possible
+ try:
+ if length <= 8:
+ return int.from_bytes(data[:length], "little", signed=True)
+ except Exception:
+ pass
+
+ # Last resort: return as integer if all else fails
+ try:
+ return int.from_bytes(data[: min(length, 8)], "little", signed=True)
+ except Exception:
+ return 0
+ elif isinstance(data, (int, float)):
+ # Already numeric
+ return int(data)
+ else:
+ # Try to convert to int if it's a string
+ try:
+ if isinstance(data, str) and data.isdigit():
+ return int(data)
+ except Exception:
+ pass
+
+ # Return as is if we can't convert
+ return data
+
+ # For other types, try to determine the most appropriate type
+ if isinstance(data, bytes):
+ # Try to convert to string first
+ try:
+ return data[:length].decode("utf-8").rstrip("\0")
+ except UnicodeDecodeError:
+ pass
+
+ # Try to convert to int for short binary data
+ try:
+ if length <= 8:
+ return int.from_bytes(data[:length], "little", signed=True)
+ except Exception: # pylint: disable=broad-exception-caught
+ pass
+
+ # Return as is if we can't determine
+ return data
+
+ return data
+
+ return raw_result # Return as-is
+
def commit(self) -> None:
"""
Commit the current transaction.
@@ -196,11 +1444,19 @@ def commit(self) -> None:
that the changes are saved.
Raises:
+ InterfaceError: If the connection is closed.
DatabaseError: If there is an error while committing the transaction.
"""
+ # Check if connection is closed
+ if self._closed or self._conn is None:
+ raise InterfaceError(
+ driver_error="Cannot commit on a closed connection",
+ ddbc_error="Cannot commit on a closed connection",
+ )
+
# Commit the current transaction
self._conn.commit()
- log('info', "Transaction committed successfully.")
+ logger.info("Transaction committed successfully.")
def rollback(self) -> None:
"""
@@ -211,11 +1467,19 @@ def rollback(self) -> None:
transaction or if the changes should not be saved.
Raises:
+ InterfaceError: If the connection is closed.
DatabaseError: If there is an error while rolling back the transaction.
"""
+ # Check if connection is closed
+ if self._closed or self._conn is None:
+ raise InterfaceError(
+ driver_error="Cannot rollback on a closed connection",
+ ddbc_error="Cannot rollback on a closed connection",
+ )
+
# Roll back the current transaction
self._conn.rollback()
- log('info', "Transaction rolled back successfully.")
+ logger.info("Transaction rolled back successfully.")
def close(self) -> None:
"""
@@ -233,47 +1497,107 @@ def close(self) -> None:
# Close the connection
if self._closed:
return
-
+
# Close all cursors first, but don't let one failure stop the others
- if hasattr(self, '_cursors'):
+ if hasattr(self, "_cursors"):
# Convert to list to avoid modification during iteration
cursors_to_close = list(self._cursors)
close_errors = []
-
+
for cursor in cursors_to_close:
try:
if not cursor.closed:
cursor.close()
- except Exception as e:
+ except Exception as e: # pylint: disable=broad-exception-caught
# Collect errors but continue closing other cursors
close_errors.append(f"Error closing cursor: {e}")
- log('warning', f"Error closing cursor: {e}")
-
+ logger.warning(f"Error closing cursor: {e}")
+
# If there were errors closing cursors, log them but continue
if close_errors:
- log('warning', f"Encountered {len(close_errors)} errors while closing cursors")
+ logger.debug(
+ "warning",
+ "Encountered %d errors while closing cursors",
+ len(close_errors),
+ )
- # Clear the cursor set explicitly to release any internal references
+ # Clear the cursor set explicitly to release any internal
+ # references
self._cursors.clear()
# Close the connection even if cursor cleanup had issues
try:
if self._conn:
+ if not self.autocommit:
+ # If autocommit is disabled, rollback any uncommitted changes
+ # This is important to ensure no partial transactions remain
+ # For autocommit True, this is not necessary as each statement is
+ # committed immediately
+ logger.debug("Rolling back uncommitted changes before closing connection.")
+ self._conn.rollback()
+ # TODO: Check potential race conditions in case of multithreaded scenarios
+ # Close the connection
self._conn.close()
self._conn = None
except Exception as e:
- log('error', f"Error closing database connection: {e}")
+ logger.error(f"Error closing database connection: {e}")
# Re-raise the connection close error as it's more critical
raise
finally:
# Always mark as closed, even if there were errors
self._closed = True
-
- log('info', "Connection closed successfully.")
- def __del__(self):
+ logger.info("Connection closed successfully.")
+
+ def _remove_cursor(self, cursor: Cursor) -> None:
+ """
+ Remove a cursor from the connection's tracking.
+
+ This method is called when a cursor is closed to ensure proper cleanup.
+
+ Args:
+ cursor: The cursor to remove from tracking.
+ """
+ if hasattr(self, "_cursors"):
+ try:
+ self._cursors.discard(cursor)
+ except Exception:
+ pass # Ignore errors during cleanup
+
+ def __enter__(self) -> "Connection":
+ """
+ Enter the context manager.
+
+ This method enables the Connection to be used with the 'with' statement.
+ When entering the context, it simply returns the connection object itself.
+
+ Returns:
+ Connection: The connection object itself.
+
+ Example:
+ with connect(connection_string) as conn:
+ cursor = conn.cursor()
+ cursor.execute("INSERT INTO table VALUES (?)", [value])
+ # Transaction will be committed automatically when exiting
+ """
+ logger.info("Entering connection context manager.")
+ return self
+
+ def __exit__(self, *args: Any) -> None:
+ """
+ Exit the context manager.
+
+ Closes the connection when exiting the context, ensuring proper
+ resource cleanup. This follows the modern standard used by most
+ database libraries.
+ """
+ if not self._closed:
+ self.close()
+
+ def __del__(self) -> None:
"""
- Destructor to ensure the connection is closed when the connection object is no longer needed.
+ Destructor to ensure the connection is closed when the connection object
+ is no longer needed.
This is a safety net to ensure resources are cleaned up
even if close() was not called explicitly.
"""
@@ -282,4 +1606,4 @@ def __del__(self):
self.close()
except Exception as e:
# Dont raise exceptions from __del__ to avoid issues during garbage collection
- log('error', f"Error during connection cleanup: {e}")
\ No newline at end of file
+ logger.warning(f"Error during connection cleanup: {e}")
diff --git a/mssql_python/connection_string_builder.py b/mssql_python/connection_string_builder.py
new file mode 100644
index 000000000..257cf9f10
--- /dev/null
+++ b/mssql_python/connection_string_builder.py
@@ -0,0 +1,114 @@
+"""
+Copyright (c) Microsoft Corporation.
+Licensed under the MIT license.
+
+Connection string builder for mssql-python.
+
+Reconstructs ODBC connection strings from parameter dictionaries
+with proper escaping and formatting per MS-ODBCSTR specification.
+"""
+
+from typing import Dict, Optional
+from mssql_python.constants import _CONNECTION_STRING_DRIVER_KEY
+
+
+class _ConnectionStringBuilder:
+ """
+ Internal builder for ODBC connection strings. Not part of public API.
+
+ Handles proper escaping of special characters and reconstructs
+ connection strings in ODBC format.
+ """
+
+ def __init__(self, initial_params: Optional[Dict[str, str]] = None):
+ """
+ Initialize the builder with optional initial parameters.
+
+ Args:
+ initial_params: Dictionary of initial connection parameters
+ """
+ self._params: Dict[str, str] = initial_params.copy() if initial_params else {}
+
+ def add_param(self, key: str, value: str) -> "_ConnectionStringBuilder":
+ """
+ Add or update a connection parameter.
+
+ Args:
+ key: Parameter name (should be normalized canonical name)
+ value: Parameter value
+
+ Returns:
+ Self for method chaining
+ """
+ self._params[key] = str(value)
+ return self
+
+ def build(self) -> str:
+ """
+ Build the final connection string.
+
+ Returns:
+ ODBC-formatted connection string with proper escaping
+
+ Note:
+ - Driver parameter is placed first
+ - Other parameters are sorted for consistency
+ - Values are escaped if they contain special characters
+ """
+ parts = []
+
+ # Build in specific order: Driver first, then others
+ if _CONNECTION_STRING_DRIVER_KEY in self._params:
+ parts.append(f"Driver={self._escape_value(self._params['Driver'])}")
+
+ # Add other parameters (sorted for consistency)
+ for key in sorted(self._params.keys()):
+ if key == "Driver":
+ continue # Already added
+
+ value = self._params[key]
+ escaped_value = self._escape_value(value)
+ parts.append(f"{key}={escaped_value}")
+
+ # Join with semicolons
+ return ";".join(parts)
+
+ def _escape_value(self, value: str) -> str:
+ """
+ Escape a parameter value if it contains special characters.
+
+ - Values containing ';', '{', '}', '=', or spaces should be braced for safety
+ - '}' inside braced values is escaped as '}}'
+ - '{' does not need to be escaped
+
+ Args:
+ value: Parameter value to escape
+
+ Returns:
+ Escaped value (possibly wrapped in braces)
+
+ Examples:
+ >>> builder = _ConnectionStringBuilder()
+ >>> builder._escape_value("localhost")
+ 'localhost'
+ >>> builder._escape_value("local;host")
+ '{local;host}'
+ >>> builder._escape_value("p}w{d")
+ '{p}}w{d}'
+ >>> builder._escape_value("ODBC Driver 18 for SQL Server")
+ '{ODBC Driver 18 for SQL Server}'
+ """
+ if not value:
+ return value
+
+ # Check if value contains special characters that require bracing
+ # Include spaces and = for safety, even though technically not always required
+ needs_braces = any(ch in value for ch in ";{}= ")
+
+ if needs_braces:
+ # Escape closing braces by doubling them (ODBC requirement)
+ # Opening braces do not need to be escaped
+ escaped = value.replace("}", "}}")
+ return f"{{{escaped}}}"
+ else:
+ return value
diff --git a/mssql_python/connection_string_parser.py b/mssql_python/connection_string_parser.py
new file mode 100644
index 000000000..9dd88db22
--- /dev/null
+++ b/mssql_python/connection_string_parser.py
@@ -0,0 +1,375 @@
+"""
+Copyright (c) Microsoft Corporation.
+Licensed under the MIT license.
+
+ODBC connection string parser for mssql-python.
+
+Handles ODBC-specific syntax per MS-ODBCSTR specification:
+- Semicolon-separated key=value pairs
+- Braced values: {value}
+- Escaped braces: }} → } (only closing braces need escaping)
+
+Parser behavior:
+- Validates all key=value pairs
+- Raises exceptions for malformed syntax (missing values, unknown keywords, duplicates)
+- Collects all errors and reports them together
+"""
+
+from typing import Dict, Tuple, Optional
+from mssql_python.exceptions import ConnectionStringParseError
+from mssql_python.constants import _ALLOWED_CONNECTION_STRING_PARAMS, _RESERVED_PARAMETERS
+from mssql_python.helpers import sanitize_user_input
+from mssql_python.logging import logger
+
+
+class _ConnectionStringParser:
+ """
+ Internal parser for ODBC connection strings. Not part of public API.
+
+ Implements the ODBC Connection String format as specified in MS-ODBCSTR.
+ Handles braced values, escaped characters, and proper tokenization.
+
+ Validates connection strings and raises errors for:
+ - Unknown/unrecognized keywords
+ - Duplicate keywords
+ - Incomplete specifications (keyword with no value)
+
+ Reference: https://learn.microsoft.com/en-us/openspecs/sql_server_protocols/ms-odbcstr/55953f0e-2d30-4ad4-8e56-b4207e491409
+ """
+
+ def __init__(self, validate_keywords: bool = False) -> None:
+ """
+ Initialize the parser.
+
+ Args:
+ validate_keywords: Whether to validate keywords against the allow-list.
+ If False, pure parsing without validation is performed.
+ This is useful for testing parsing logic independently
+ or when validation is handled separately.
+ """
+ self._validate_keywords = validate_keywords
+
+ @classmethod
+ def normalize_key(cls, key: str) -> Optional[str]:
+ """
+ Normalize a parameter key to its canonical form.
+
+ Args:
+ key: Parameter key from connection string (case-insensitive)
+
+ Returns:
+ Canonical parameter name if allowed, None otherwise
+
+ Examples:
+ >>> _ConnectionStringParser.normalize_key('SERVER')
+ 'Server'
+ >>> _ConnectionStringParser.normalize_key('uid')
+ 'UID'
+ >>> _ConnectionStringParser.normalize_key('UnsupportedParam')
+ None
+ """
+ key_lower = key.lower().strip()
+ return _ALLOWED_CONNECTION_STRING_PARAMS.get(key_lower)
+
+ @staticmethod
+ def _normalize_params(params: Dict[str, str], warn_rejected: bool = True) -> Dict[str, str]:
+ """
+ Normalize and filter parameters against the allow-list (internal use only).
+
+ This method performs several operations:
+ - Normalizes parameter names (e.g., addr/address → Server, uid → UID)
+ - Filters out parameters not in the allow-list
+ - Removes reserved parameters (Driver, APP)
+ - Deduplicates via normalized keys
+
+ Args:
+ params: Dictionary of connection string parameters (keys should be lowercase)
+ warn_rejected: Whether to log warnings for rejected parameters
+
+ Returns:
+ Dictionary containing only allowed parameters with normalized keys
+
+ Note:
+ Driver and APP parameters are filtered here but will be set by
+ the driver in _construct_connection_string to maintain control.
+ """
+ filtered = {}
+
+ # The rejected list should ideally be empty when used in the normal connection
+ # flow, since the parser validates against the allowlist first and raises
+ # errors for unknown parameters. This filtering is primarily a safety net.
+ rejected = []
+
+ for key, value in params.items():
+ normalized_key = _ConnectionStringParser.normalize_key(key)
+
+ if normalized_key:
+ # Skip Driver and APP - these are controlled by the driver
+ if normalized_key in _RESERVED_PARAMETERS:
+ continue
+
+ # Parameter is allowed
+ filtered[normalized_key] = value
+ else:
+ # Parameter is not in allow-list
+ # Note: In normal flow, this should be empty since parser validates first
+ rejected.append(key)
+
+ # Log all rejected parameters together if any were found
+ if rejected and warn_rejected:
+ safe_keys = [sanitize_user_input(key) for key in rejected]
+ logger.debug(
+ f"Connection string parameters not in allow-list and will be ignored: {', '.join(safe_keys)}"
+ )
+
+ return filtered
+
+ def _parse(self, connection_str: str) -> Dict[str, str]:
+ """
+ Parse a connection string into a dictionary of parameters.
+
+ Validates the connection string and raises ConnectionStringParseError
+ if any issues are found (unknown keywords, duplicates, missing values).
+
+ Args:
+ connection_str: ODBC-format connection string
+
+ Returns:
+ Dictionary mapping parameter names (lowercase) to values
+
+ Raises:
+ ConnectionStringParseError: If validation errors are found
+
+ Examples:
+ >>> parser = _ConnectionStringParser()
+ >>> result = parser._parse("Server=localhost;Database=mydb")
+ {'server': 'localhost', 'database': 'mydb'}
+
+ >>> parser._parse("Server={;local;};PWD={p}}w{{d}")
+ {'server': ';local;', 'pwd': 'p}w{d'}
+
+ >>> parser._parse("Server=localhost;Server=other")
+ ConnectionStringParseError: Duplicate keyword 'server'
+ """
+ if not connection_str:
+ return {}
+
+ connection_str = connection_str.strip()
+ if not connection_str:
+ return {}
+
+ # Collect all errors for batch reporting
+ errors = []
+
+ # Dictionary to store parsed key=value pairs
+ params = {}
+
+ # Track which keys we've seen to detect duplicates
+ seen_keys = {} # Maps normalized key -> first occurrence position
+
+ # Track current position in the string
+ current_pos = 0
+ str_len = len(connection_str)
+
+ # Main parsing loop
+ while current_pos < str_len:
+ # Skip leading whitespace and semicolons
+ while current_pos < str_len and connection_str[current_pos] in " \t;":
+ current_pos += 1
+
+ if current_pos >= str_len:
+ break
+
+ # Parse the key
+ key_start = current_pos
+
+ # Advance until we hit '=', ';', or end of string
+ while current_pos < str_len and connection_str[current_pos] not in "=;":
+ current_pos += 1
+
+ # Check if we found a valid '=' separator
+ if current_pos >= str_len or connection_str[current_pos] != "=":
+ # ERROR: No '=' found - incomplete specification
+ incomplete_text = connection_str[key_start:current_pos].strip()
+ if incomplete_text:
+ errors.append(
+ f"Incomplete specification: keyword '{incomplete_text}' has no value (missing '=')"
+ )
+ # Skip to next semicolon
+ while current_pos < str_len and connection_str[current_pos] != ";":
+ current_pos += 1
+ continue
+
+ # Extract and normalize the key
+ key = connection_str[key_start:current_pos].strip().lower()
+
+ # ERROR: Empty key
+ if not key:
+ errors.append("Empty keyword found (format: =value)")
+ current_pos += 1 # Skip the '='
+ # Skip to next semicolon
+ while current_pos < str_len and connection_str[current_pos] != ";":
+ current_pos += 1
+ continue
+
+ # Move past the '='
+ current_pos += 1
+
+ # Parse the value
+ try:
+ value, current_pos = self._parse_value(connection_str, current_pos)
+
+ # ERROR: Empty value
+ if not value:
+ errors.append(
+ f"Empty value for keyword '{key}' (all connection string parameters must have non-empty values)"
+ )
+
+ # Check for duplicates
+ if key in seen_keys:
+ errors.append(f"Duplicate keyword '{key}' found")
+ else:
+ seen_keys[key] = True
+ params[key] = value
+
+ except ValueError as e:
+ errors.append(f"Error parsing value for keyword '{key}': {e}")
+ # Skip to next semicolon
+ while current_pos < str_len and connection_str[current_pos] != ";":
+ current_pos += 1
+
+ # Validate keywords against allowlist if validation is enabled
+ if self._validate_keywords:
+ unknown_keys = []
+ reserved_keys = []
+
+ for key in params.keys():
+ # Check if this key can be normalized (i.e., it's known)
+ normalized_key = _ConnectionStringParser.normalize_key(key)
+
+ if normalized_key is None:
+ # Unknown keyword
+ unknown_keys.append(key)
+ elif normalized_key in _RESERVED_PARAMETERS:
+ # Reserved keyword - user cannot set these
+ reserved_keys.append(key)
+
+ if reserved_keys:
+ for key in reserved_keys:
+ errors.append(
+ f"Reserved keyword '{key}' is controlled by the driver and cannot be specified by the user"
+ )
+
+ if unknown_keys:
+ for key in unknown_keys:
+ errors.append(f"Unknown keyword '{key}' is not recognized")
+
+ # If we collected any errors, raise them all together
+ if errors:
+ raise ConnectionStringParseError(errors)
+
+ return params
+
+ def _parse_value(self, connection_str: str, start_pos: int) -> Tuple[str, int]:
+ """
+ Parse a parameter value from the connection string.
+
+ Handles both simple values and braced values with escaping.
+
+ Args:
+ connection_str: The connection string
+ start_pos: Starting position of the value
+
+ Returns:
+ Tuple of (parsed_value, new_position)
+
+ Raises:
+ ValueError: If braced value is not properly closed
+ """
+ str_len = len(connection_str)
+
+ # Skip leading whitespace before the value
+ while start_pos < str_len and connection_str[start_pos] in " \t":
+ start_pos += 1
+
+ # If we've consumed the entire string or reached a semicolon, return empty value
+ if start_pos >= str_len:
+ return "", start_pos
+
+ # Determine if this is a braced value or simple value
+ if connection_str[start_pos] == "{":
+ return self._parse_braced_value(connection_str, start_pos)
+ else:
+ return self._parse_simple_value(connection_str, start_pos)
+
+ def _parse_simple_value(self, connection_str: str, start_pos: int) -> Tuple[str, int]:
+ """
+ Parse a simple (non-braced) value up to the next semicolon.
+
+ Args:
+ connection_str: The connection string
+ start_pos: Starting position of the value
+
+ Returns:
+ Tuple of (parsed_value, new_position)
+ """
+ str_len = len(connection_str)
+ value_start = start_pos
+
+ # Read characters until we hit a semicolon or end of string
+ while start_pos < str_len and connection_str[start_pos] != ";":
+ start_pos += 1
+
+ # Extract the value and strip trailing whitespace
+ value = connection_str[value_start:start_pos].rstrip()
+ return value, start_pos
+
+ def _parse_braced_value(self, connection_str: str, start_pos: int) -> Tuple[str, int]:
+ """
+ Parse a braced value with proper handling of escaped braces.
+
+ Braced values:
+ - Start with '{' and end with '}'
+ - '}' inside the value is escaped as '}}'
+ - '{' inside the value does not need escaping
+ - Can contain semicolons and other special characters
+
+ Args:
+ connection_str: The connection string
+ start_pos: Starting position (should point to opening '{')
+
+ Returns:
+ Tuple of (parsed_value, new_position)
+
+ Raises:
+ ValueError: If the braced value is not closed (missing '}')
+ """
+ str_len = len(connection_str)
+ brace_start_pos = start_pos
+
+ # Skip the opening '{'
+ start_pos += 1
+
+ # Build the value character by character
+ value = []
+
+ while start_pos < str_len:
+ ch = connection_str[start_pos]
+
+ if ch == "}":
+ # Check if next character is also '}' (escaped brace)
+ if start_pos + 1 < str_len and connection_str[start_pos + 1] == "}":
+ # Escaped right brace: '}}' → '}'
+ value.append("}")
+ start_pos += 2
+ else:
+ # Single '}' means end of braced value
+ start_pos += 1
+ return "".join(value), start_pos
+ else:
+ # Regular character (including '{' which doesn't need escaping per ODBC spec)
+ value.append(ch)
+ start_pos += 1
+
+ # Reached end without finding closing '}'
+ raise ValueError(f"Unclosed braced value starting at position {brace_start_pos}")
diff --git a/mssql_python/constants.py b/mssql_python/constants.py
index 81e60d37e..03d40c833 100644
--- a/mssql_python/constants.py
+++ b/mssql_python/constants.py
@@ -5,12 +5,14 @@
"""
from enum import Enum
+from typing import Dict, Optional, Tuple
class ConstantsDDBC(Enum):
"""
Constants used in the DDBC module.
"""
+
SQL_HANDLE_ENV = 1
SQL_HANDLE_DBC = 2
SQL_HANDLE_STMT = 3
@@ -20,20 +22,14 @@ class ConstantsDDBC(Enum):
SQL_STILL_EXECUTING = 2
SQL_NTS = -3
SQL_DRIVER_NOPROMPT = 0
- SQL_ATTR_ASYNC_DBC_EVENT = 119
SQL_IS_INTEGER = -6
- SQL_ATTR_ASYNC_DBC_FUNCTIONS_ENABLE = 117
SQL_OV_DDBC3_80 = 380
- SQL_ATTR_DDBC_VERSION = 200
- SQL_ATTR_ASYNC_ENABLE = 4
- SQL_ATTR_ASYNC_STMT_EVENT = 29
SQL_ERROR = -1
SQL_INVALID_HANDLE = -2
SQL_NULL_HANDLE = 0
SQL_OV_DDBC3 = 3
SQL_COMMIT = 0
SQL_ROLLBACK = 1
- SQL_ATTR_AUTOCOMMIT = 102
SQL_SMALLINT = 5
SQL_CHAR = 1
SQL_WCHAR = -8
@@ -94,21 +90,15 @@ class ConstantsDDBC(Enum):
SQL_DESC_TYPE = 2
SQL_DESC_LENGTH = 3
SQL_DESC_NAME = 4
- SQL_ATTR_ROW_ARRAY_SIZE = 27
- SQL_ATTR_ROWS_FETCHED_PTR = 26
- SQL_ATTR_ROW_STATUS_PTR = 25
- SQL_FETCH_NEXT = 1
SQL_ROW_SUCCESS = 0
SQL_ROW_SUCCESS_WITH_INFO = 1
SQL_ROW_NOROW = 100
- SQL_ATTR_CURSOR_TYPE = 6
SQL_CURSOR_FORWARD_ONLY = 0
SQL_CURSOR_STATIC = 3
SQL_CURSOR_KEYSET_DRIVEN = 2
SQL_CURSOR_DYNAMIC = 3
SQL_NULL_DATA = -1
SQL_C_DEFAULT = 99
- SQL_ATTR_ROW_BIND_TYPE = 5
SQL_BIND_BY_COLUMN = 0
SQL_PARAM_INPUT = 1
SQL_PARAM_OUTPUT = 2
@@ -117,8 +107,398 @@ class ConstantsDDBC(Enum):
SQL_NULLABLE = 1
SQL_MAX_NUMERIC_LEN = 16
+ SQL_FETCH_NEXT = 1
+ SQL_FETCH_FIRST = 2
+ SQL_FETCH_LAST = 3
+ SQL_FETCH_PRIOR = 4
+ SQL_FETCH_ABSOLUTE = 5
+ SQL_FETCH_RELATIVE = 6
+ SQL_FETCH_BOOKMARK = 8
+ SQL_DATETIMEOFFSET = -155
+ SQL_C_SS_TIMESTAMPOFFSET = 0x4001
+ SQL_SCOPE_CURROW = 0
+ SQL_BEST_ROWID = 1
+ SQL_ROWVER = 2
+ SQL_NO_NULLS = 0
+ SQL_NULLABLE_UNKNOWN = 2
+ SQL_INDEX_UNIQUE = 0
+ SQL_INDEX_ALL = 1
+ SQL_QUICK = 0
+ SQL_ENSURE = 1
+
+ # Connection Attribute Constants for set_attr()
+ SQL_ATTR_ACCESS_MODE = 101
+ SQL_ATTR_AUTOCOMMIT = 102
+ SQL_ATTR_CURSOR_TYPE = 6
+ SQL_ATTR_ROW_BIND_TYPE = 5
+ SQL_ATTR_ASYNC_DBC_FUNCTIONS_ENABLE = 117
+ SQL_ATTR_ROW_ARRAY_SIZE = 27
+ SQL_ATTR_ASYNC_DBC_EVENT = 119
+ SQL_ATTR_DDBC_VERSION = 200
+ SQL_ATTR_ASYNC_STMT_EVENT = 29
+ SQL_ATTR_ROWS_FETCHED_PTR = 26
+ SQL_ATTR_ROW_STATUS_PTR = 25
+ SQL_ATTR_CONNECTION_TIMEOUT = 113
+ SQL_ATTR_CURRENT_CATALOG = 109
+ SQL_ATTR_LOGIN_TIMEOUT = 103
+ SQL_ATTR_ODBC_CURSORS = 110
+ SQL_ATTR_PACKET_SIZE = 112
+ SQL_ATTR_QUIET_MODE = 111
+ SQL_ATTR_TXN_ISOLATION = 108
+ SQL_ATTR_TRACE = 104
+ SQL_ATTR_TRACEFILE = 105
+ SQL_ATTR_TRANSLATE_LIB = 106
+ SQL_ATTR_TRANSLATE_OPTION = 107
+ SQL_ATTR_CONNECTION_POOLING = 201
+ SQL_ATTR_CP_MATCH = 202
+ SQL_ATTR_ASYNC_ENABLE = 4
+ SQL_ATTR_ENLIST_IN_DTC = 1207
+ SQL_ATTR_ENLIST_IN_XA = 1208
+ SQL_ATTR_CONNECTION_DEAD = 1209
+ SQL_ATTR_SERVER_NAME = 13
+ SQL_ATTR_RESET_CONNECTION = 116
+
+ # Transaction Isolation Level Constants
+ SQL_TXN_READ_UNCOMMITTED = 1
+ SQL_TXN_READ_COMMITTED = 2
+ SQL_TXN_REPEATABLE_READ = 4
+ SQL_TXN_SERIALIZABLE = 8
+
+ # Access Mode Constants
+ SQL_MODE_READ_WRITE = 0
+ SQL_MODE_READ_ONLY = 1
+
+ # Connection Dead Constants
+ SQL_CD_TRUE = 1
+ SQL_CD_FALSE = 0
+
+ # ODBC Cursors Constants
+ SQL_CUR_USE_IF_NEEDED = 0
+ SQL_CUR_USE_ODBC = 1
+ SQL_CUR_USE_DRIVER = 2
+
+ # Reset Connection Constants
+ SQL_RESET_CONNECTION_YES = 1
+
+ # Query Timeout Constants
+ SQL_ATTR_QUERY_TIMEOUT = 0
+
+
+class GetInfoConstants(Enum):
+ """
+ These constants are used with various methods like getinfo().
+ """
+
+ # Driver and database information
+ SQL_DRIVER_NAME = 6
+ SQL_DRIVER_VER = 7
+ SQL_DRIVER_ODBC_VER = 77
+ SQL_DRIVER_HLIB = 76
+ SQL_DRIVER_HENV = 75
+ SQL_DRIVER_HDBC = 74
+ SQL_DATA_SOURCE_NAME = 2
+ SQL_DATABASE_NAME = 16
+ SQL_SERVER_NAME = 13
+ SQL_USER_NAME = 47
+
+ # SQL conformance and support
+ SQL_SQL_CONFORMANCE = 118
+ SQL_KEYWORDS = 89
+ SQL_IDENTIFIER_CASE = 28
+ SQL_IDENTIFIER_QUOTE_CHAR = 29
+ SQL_SPECIAL_CHARACTERS = 94
+ SQL_SQL92_ENTRY_SQL = 127
+ SQL_SQL92_INTERMEDIATE_SQL = 128
+ SQL_SQL92_FULL_SQL = 129
+ SQL_SUBQUERIES = 95
+ SQL_EXPRESSIONS_IN_ORDERBY = 27
+ SQL_CORRELATION_NAME = 74
+ SQL_SEARCH_PATTERN_ESCAPE = 14
+
+ # Catalog and schema support
+ SQL_CATALOG_TERM = 42
+ SQL_CATALOG_NAME_SEPARATOR = 41
+ SQL_SCHEMA_TERM = 39
+ SQL_TABLE_TERM = 45
+ SQL_PROCEDURES = 21
+ SQL_ACCESSIBLE_TABLES = 19
+ SQL_ACCESSIBLE_PROCEDURES = 20
+ SQL_CATALOG_NAME = 10002
+ SQL_CATALOG_USAGE = 92
+ SQL_SCHEMA_USAGE = 91
+ SQL_COLUMN_ALIAS = 87
+ SQL_DESCRIBE_PARAMETER = 10003
+
+ # Transaction support
+ SQL_TXN_CAPABLE = 46
+ SQL_TXN_ISOLATION_OPTION = 72
+ SQL_DEFAULT_TXN_ISOLATION = 26
+ SQL_MULTIPLE_ACTIVE_TXN = 37
+ SQL_TXN_ISOLATION_LEVEL = 108
+
+ # Data type support
+ SQL_NUMERIC_FUNCTIONS = 49
+ SQL_STRING_FUNCTIONS = 50
+ SQL_DATETIME_FUNCTIONS = 51
+ SQL_SYSTEM_FUNCTIONS = 58
+ SQL_CONVERT_FUNCTIONS = 48
+ SQL_LIKE_ESCAPE_CLAUSE = 113
+
+ # Numeric limits
+ SQL_MAX_COLUMN_NAME_LEN = 30
+ SQL_MAX_TABLE_NAME_LEN = 35
+ SQL_MAX_SCHEMA_NAME_LEN = 32
+ SQL_MAX_CATALOG_NAME_LEN = 34
+ SQL_MAX_IDENTIFIER_LEN = 10005
+ SQL_MAX_STATEMENT_LEN = 105
+ SQL_MAX_CHAR_LITERAL_LEN = 108
+ SQL_MAX_BINARY_LITERAL_LEN = 112
+ SQL_MAX_COLUMNS_IN_TABLE = 101
+ SQL_MAX_COLUMNS_IN_SELECT = 100
+ SQL_MAX_COLUMNS_IN_GROUP_BY = 97
+ SQL_MAX_COLUMNS_IN_ORDER_BY = 99
+ SQL_MAX_COLUMNS_IN_INDEX = 98
+ SQL_MAX_TABLES_IN_SELECT = 106
+ SQL_MAX_CONCURRENT_ACTIVITIES = 1
+ SQL_MAX_DRIVER_CONNECTIONS = 0
+ SQL_MAX_ROW_SIZE = 104
+ SQL_MAX_USER_NAME_LEN = 107
+
+ # Connection attributes
+ SQL_ACTIVE_CONNECTIONS = 0
+ SQL_ACTIVE_STATEMENTS = 1
+ SQL_DATA_SOURCE_READ_ONLY = 25
+ SQL_NEED_LONG_DATA_LEN = 111
+ SQL_GETDATA_EXTENSIONS = 81
+
+ # Result set and cursor attributes
+ SQL_CURSOR_COMMIT_BEHAVIOR = 23
+ SQL_CURSOR_ROLLBACK_BEHAVIOR = 24
+ SQL_CURSOR_SENSITIVITY = 10001
+ SQL_BOOKMARK_PERSISTENCE = 82
+ SQL_DYNAMIC_CURSOR_ATTRIBUTES1 = 144
+ SQL_DYNAMIC_CURSOR_ATTRIBUTES2 = 145
+ SQL_FORWARD_ONLY_CURSOR_ATTRIBUTES1 = 146
+ SQL_FORWARD_ONLY_CURSOR_ATTRIBUTES2 = 147
+ SQL_STATIC_CURSOR_ATTRIBUTES1 = 150
+ SQL_STATIC_CURSOR_ATTRIBUTES2 = 151
+ SQL_KEYSET_CURSOR_ATTRIBUTES1 = 148
+ SQL_KEYSET_CURSOR_ATTRIBUTES2 = 149
+ SQL_SCROLL_OPTIONS = 44
+ SQL_SCROLL_CONCURRENCY = 43
+ SQL_FETCH_DIRECTION = 8
+ SQL_ROWSET_SIZE = 9
+ SQL_CONCURRENCY = 7
+ SQL_ROW_NUMBER = 14
+ SQL_STATIC_SENSITIVITY = 83
+ SQL_BATCH_SUPPORT = 121
+ SQL_BATCH_ROW_COUNT = 120
+ SQL_PARAM_ARRAY_ROW_COUNTS = 153
+ SQL_PARAM_ARRAY_SELECTS = 154
+ SQL_PROCEDURE_TERM = 40
+
+ # Positioned statement support
+ SQL_POSITIONED_STATEMENTS = 80
+
+ # Other constants
+ SQL_GROUP_BY = 88
+ SQL_OJ_CAPABILITIES = 65
+ SQL_ORDER_BY_COLUMNS_IN_SELECT = 90
+ SQL_OUTER_JOINS = 38
+ SQL_QUOTED_IDENTIFIER_CASE = 93
+ SQL_CONCAT_NULL_BEHAVIOR = 22
+ SQL_NULL_COLLATION = 85
+ SQL_ALTER_TABLE = 86
+ SQL_UNION = 96
+ SQL_DDL_INDEX = 170
+ SQL_MULT_RESULT_SETS = 36
+ SQL_OWNER_USAGE = 91
+ SQL_QUALIFIER_USAGE = 92
+ SQL_TIMEDATE_ADD_INTERVALS = 109
+ SQL_TIMEDATE_DIFF_INTERVALS = 110
+
+ # Return values for some getinfo functions
+ SQL_IC_UPPER = 1
+ SQL_IC_LOWER = 2
+ SQL_IC_SENSITIVE = 3
+ SQL_IC_MIXED = 4
+
+
class AuthType(Enum):
"""Constants for authentication types"""
+
INTERACTIVE = "activedirectoryinteractive"
DEVICE_CODE = "activedirectorydevicecode"
- DEFAULT = "activedirectorydefault"
\ No newline at end of file
+ DEFAULT = "activedirectorydefault"
+
+
+class SQLTypes:
+ """Constants for valid SQL data types to use with setinputsizes"""
+
+ @classmethod
+ def get_valid_types(cls) -> set:
+ """Returns a set of all valid SQL type constants"""
+
+ return {
+ ConstantsDDBC.SQL_CHAR.value,
+ ConstantsDDBC.SQL_VARCHAR.value,
+ ConstantsDDBC.SQL_LONGVARCHAR.value,
+ ConstantsDDBC.SQL_WCHAR.value,
+ ConstantsDDBC.SQL_WVARCHAR.value,
+ ConstantsDDBC.SQL_WLONGVARCHAR.value,
+ ConstantsDDBC.SQL_DECIMAL.value,
+ ConstantsDDBC.SQL_NUMERIC.value,
+ ConstantsDDBC.SQL_BIT.value,
+ ConstantsDDBC.SQL_TINYINT.value,
+ ConstantsDDBC.SQL_SMALLINT.value,
+ ConstantsDDBC.SQL_INTEGER.value,
+ ConstantsDDBC.SQL_BIGINT.value,
+ ConstantsDDBC.SQL_REAL.value,
+ ConstantsDDBC.SQL_FLOAT.value,
+ ConstantsDDBC.SQL_DOUBLE.value,
+ ConstantsDDBC.SQL_BINARY.value,
+ ConstantsDDBC.SQL_VARBINARY.value,
+ ConstantsDDBC.SQL_LONGVARBINARY.value,
+ ConstantsDDBC.SQL_DATE.value,
+ ConstantsDDBC.SQL_TIME.value,
+ ConstantsDDBC.SQL_TIMESTAMP.value,
+ ConstantsDDBC.SQL_GUID.value,
+ }
+
+ # Could also add category methods for convenience
+ @classmethod
+ def get_string_types(cls) -> set:
+ """Returns a set of string SQL type constants"""
+
+ return {
+ ConstantsDDBC.SQL_CHAR.value,
+ ConstantsDDBC.SQL_VARCHAR.value,
+ ConstantsDDBC.SQL_LONGVARCHAR.value,
+ ConstantsDDBC.SQL_WCHAR.value,
+ ConstantsDDBC.SQL_WVARCHAR.value,
+ ConstantsDDBC.SQL_WLONGVARCHAR.value,
+ }
+
+ @classmethod
+ def get_numeric_types(cls) -> set:
+ """Returns a set of numeric SQL type constants"""
+
+ return {
+ ConstantsDDBC.SQL_DECIMAL.value,
+ ConstantsDDBC.SQL_NUMERIC.value,
+ ConstantsDDBC.SQL_BIT.value,
+ ConstantsDDBC.SQL_TINYINT.value,
+ ConstantsDDBC.SQL_SMALLINT.value,
+ ConstantsDDBC.SQL_INTEGER.value,
+ ConstantsDDBC.SQL_BIGINT.value,
+ ConstantsDDBC.SQL_REAL.value,
+ ConstantsDDBC.SQL_FLOAT.value,
+ ConstantsDDBC.SQL_DOUBLE.value,
+ }
+
+
+class AttributeSetTime(Enum):
+ """
+ Defines when connection attributes can be set in relation to connection establishment.
+
+ This enum is used to validate if a specific connection attribute can be set before
+ connection, after connection, or at either time.
+ """
+
+ BEFORE_ONLY = 1 # Must be set before connection is established
+ AFTER_ONLY = 2 # Can only be set after connection is established
+ EITHER = 3 # Can be set either before or after connection
+
+
+# Dictionary mapping attributes to their valid set times
+ATTRIBUTE_SET_TIMING = {
+ # Must be set before connection
+ ConstantsDDBC.SQL_ATTR_LOGIN_TIMEOUT.value: AttributeSetTime.BEFORE_ONLY,
+ ConstantsDDBC.SQL_ATTR_ODBC_CURSORS.value: AttributeSetTime.BEFORE_ONLY,
+ ConstantsDDBC.SQL_ATTR_PACKET_SIZE.value: AttributeSetTime.BEFORE_ONLY,
+ # Can only be set after connection
+ ConstantsDDBC.SQL_ATTR_CONNECTION_DEAD.value: AttributeSetTime.AFTER_ONLY,
+ ConstantsDDBC.SQL_ATTR_ENLIST_IN_DTC.value: AttributeSetTime.AFTER_ONLY,
+ ConstantsDDBC.SQL_ATTR_TRANSLATE_LIB.value: AttributeSetTime.AFTER_ONLY,
+ ConstantsDDBC.SQL_ATTR_TRANSLATE_OPTION.value: AttributeSetTime.AFTER_ONLY,
+ # Can be set either before or after connection
+ ConstantsDDBC.SQL_ATTR_ACCESS_MODE.value: AttributeSetTime.EITHER,
+ ConstantsDDBC.SQL_ATTR_ASYNC_DBC_EVENT.value: AttributeSetTime.EITHER,
+ ConstantsDDBC.SQL_ATTR_ASYNC_DBC_FUNCTIONS_ENABLE.value: AttributeSetTime.EITHER,
+ ConstantsDDBC.SQL_ATTR_ASYNC_ENABLE.value: AttributeSetTime.EITHER,
+ ConstantsDDBC.SQL_ATTR_AUTOCOMMIT.value: AttributeSetTime.EITHER,
+ ConstantsDDBC.SQL_ATTR_CONNECTION_TIMEOUT.value: AttributeSetTime.EITHER,
+ ConstantsDDBC.SQL_ATTR_CURRENT_CATALOG.value: AttributeSetTime.EITHER,
+ ConstantsDDBC.SQL_ATTR_QUIET_MODE.value: AttributeSetTime.EITHER,
+ ConstantsDDBC.SQL_ATTR_TRACE.value: AttributeSetTime.EITHER,
+ ConstantsDDBC.SQL_ATTR_TRACEFILE.value: AttributeSetTime.EITHER,
+ ConstantsDDBC.SQL_ATTR_TXN_ISOLATION.value: AttributeSetTime.EITHER,
+}
+
+
+def get_attribute_set_timing(attribute):
+ """
+ Get when an attribute can be set (before connection, after, or either).
+
+ Args:
+ attribute (int): The connection attribute (SQL_ATTR_*)
+
+ Returns:
+ AttributeSetTime: When the attribute can be set
+ """
+ return ATTRIBUTE_SET_TIMING.get(attribute, AttributeSetTime.AFTER_ONLY)
+
+
+_CONNECTION_STRING_DRIVER_KEY = "Driver"
+_CONNECTION_STRING_APP_KEY = "APP"
+
+# Reserved connection string parameters that are controlled by the driver
+# and cannot be set by users
+_RESERVED_PARAMETERS = (_CONNECTION_STRING_DRIVER_KEY, _CONNECTION_STRING_APP_KEY)
+
+# Core connection parameters with synonym mapping
+# Maps lowercase parameter names to their canonical form
+# Based on ODBC Driver 18 for SQL Server supported parameters
+# A new connection string key to be supported in Python, should be added
+# to the dictionary below. the value is the canonical name used in the
+# final connection string sent to ODBC driver.
+# The left side is what Python connection string supports, the right side
+# is the canonical ODBC key name.
+_ALLOWED_CONNECTION_STRING_PARAMS = {
+ # Server identification - addr, address, and server are synonyms
+ "server": "Server",
+ "address": "Server",
+ "addr": "Server",
+ # Authentication
+ "uid": "UID",
+ "pwd": "PWD",
+ "authentication": "Authentication",
+ "trusted_connection": "Trusted_Connection",
+ # Database
+ "database": "Database",
+ # Driver (always controlled by mssql-python)
+ "driver": "Driver",
+ # Application name (always controlled by mssql-python)
+ "app": "APP",
+ # Encryption and Security
+ "encrypt": "Encrypt",
+ "trustservercertificate": "TrustServerCertificate",
+ "trust_server_certificate": "TrustServerCertificate", # Snake_case synonym
+ "hostnameincertificate": "HostnameInCertificate", # v18.0+
+ "servercertificate": "ServerCertificate", # v18.1+
+ "serverspn": "ServerSPN",
+ # Connection behavior
+ "multisubnetfailover": "MultiSubnetFailover",
+ "applicationintent": "ApplicationIntent",
+ "connectretrycount": "ConnectRetryCount",
+ "connectretryinterval": "ConnectRetryInterval",
+ # Keep-Alive (v17.4+)
+ "keepalive": "KeepAlive",
+ "keepaliveinterval": "KeepAliveInterval",
+ # IP Address Preference (v18.1+)
+ "ipaddresspreference": "IpAddressPreference",
+ "packet size": "PacketSize", # From the tests it looks like pyodbc users use Packet Size
+ # (with spaces) ODBC only honors "PacketSize" without spaces
+ # internally.
+ "packetsize": "PacketSize",
+}
diff --git a/mssql_python/cursor.py b/mssql_python/cursor.py
index ed1bb70dc..84bb650d5 100644
--- a/mssql_python/cursor.py
+++ b/mssql_python/cursor.py
@@ -8,19 +8,47 @@
- Do not use a cursor after it is closed, or after its parent connection is closed.
- Use close() to release resources held by the cursor as soon as it is no longer needed.
"""
-import ctypes
+
+# pylint: disable=too-many-lines # Large file due to comprehensive DB-API 2.0 implementation
+
import decimal
import uuid
import datetime
-from typing import List, Union
-from mssql_python.constants import ConstantsDDBC as ddbc_sql_const
-from mssql_python.helpers import check_error, log
+import warnings
+from typing import List, Union, Any, Optional, Tuple, Sequence, TYPE_CHECKING, Iterable
+from mssql_python.constants import ConstantsDDBC as ddbc_sql_const, SQLTypes
+from mssql_python.helpers import check_error
+from mssql_python.logging import logger
from mssql_python import ddbc_bindings
-from mssql_python.exceptions import InterfaceError
-from .row import Row
-
-
-class Cursor:
+from mssql_python.exceptions import (
+ InterfaceError,
+ NotSupportedError,
+ ProgrammingError,
+ OperationalError,
+ DatabaseError,
+)
+from mssql_python.row import Row
+from mssql_python import get_settings
+from mssql_python.parameter_helper import (
+ detect_and_convert_parameters,
+ parse_pyformat_params,
+ convert_pyformat_to_qmark,
+)
+
+if TYPE_CHECKING:
+ from mssql_python.connection import Connection
+
+# Constants for string handling
+MAX_INLINE_CHAR: int = (
+ 4000 # NVARCHAR/VARCHAR inline limit; this triggers NVARCHAR(MAX)/VARCHAR(MAX) + DAE
+)
+SMALLMONEY_MIN: decimal.Decimal = decimal.Decimal("-214748.3648")
+SMALLMONEY_MAX: decimal.Decimal = decimal.Decimal("214748.3647")
+MONEY_MIN: decimal.Decimal = decimal.Decimal("-922337203685477.5808")
+MONEY_MAX: decimal.Decimal = decimal.Decimal("922337203685477.5807")
+
+
+class Cursor: # pylint: disable=too-many-instance-attributes,too-many-public-methods
"""
Represents a database cursor, which is used to manage the context of a fetch operation.
@@ -29,52 +57,92 @@ class Cursor:
description: Sequence of 7-item sequences describing one result column.
rowcount: Number of rows produced or affected by the last execute operation.
arraysize: Number of rows to fetch at a time with fetchmany().
+ rownumber: Track the current row index in the result set.
Methods:
__init__(connection_str) -> None.
- callproc(procname, parameters=None) ->
+ callproc(procname, parameters=None) ->
Modified copy of the input sequence with output parameters.
close() -> None.
- execute(operation, parameters=None) -> None.
+ execute(operation, parameters=None) -> Cursor.
executemany(operation, seq_of_parameters) -> None.
fetchone() -> Single sequence or None if no more data is available.
fetchmany(size=None) -> Sequence of sequences (e.g. list of tuples).
fetchall() -> Sequence of sequences (e.g. list of tuples).
nextset() -> True if there is another result set, None otherwise.
+ next() -> Fetch the next row from the cursor.
setinputsizes(sizes) -> None.
setoutputsize(size, column=None) -> None.
"""
- def __init__(self, connection) -> None:
+ # TODO(jathakkar): Thread safety considerations
+ # The cursor class contains methods that are not thread-safe due to:
+ # 1. Methods that mutate cursor state (_reset_cursor, self.description, etc.)
+ # 2. Methods that call ODBC functions with shared handles (self.hstmt)
+ #
+ # These methods should be properly synchronized or redesigned when implementing
+ # async functionality to prevent race conditions and data corruption.
+ # Consider using locks, redesigning for immutability, or ensuring
+ # cursor objects are never shared across threads.
+
+ def __init__(self, connection: "Connection", timeout: int = 0) -> None:
"""
Initialize the cursor with a database connection.
Args:
connection: Database connection object.
+ timeout: Query timeout in seconds
"""
- self.connection = connection
+ self._connection: "Connection" = connection # Store as private attribute
+ self._timeout: int = timeout
+ self._inputsizes: Optional[List[Union[int, Tuple[Any, ...]]]] = None
# self.connection.autocommit = False
- self.hstmt = None
+ self.hstmt: Optional[Any] = None
self._initialize_cursor()
- self.description = None
- self.rowcount = -1
- self.arraysize = (
+ self.description: Optional[
+ List[
+ Tuple[
+ str,
+ Any,
+ Optional[int],
+ Optional[int],
+ Optional[int],
+ Optional[int],
+ Optional[bool],
+ ]
+ ]
+ ] = None
+ self.rowcount: int = -1
+ self.arraysize: int = (
1 # Default number of rows to fetch at a time is 1, user can change it
)
- self.buffer_length = 1024 # Default buffer length for string data
- self.closed = False
- self._result_set_empty = False # Add this initialization
- self.last_executed_stmt = (
- "" # Stores the last statement executed by this cursor
- )
- self.is_stmt_prepared = [
+ self.buffer_length: int = 1024 # Default buffer length for string data
+ self.closed: bool = False
+ self._result_set_empty: bool = False # Add this initialization
+ self.last_executed_stmt: str = "" # Stores the last statement executed by this cursor
+ self.is_stmt_prepared: List[bool] = [
False
] # Indicates if last_executed_stmt was prepared by ddbc shim.
# Is a list instead of a bool coz bools in Python are immutable.
+
+ # Initialize attributes that may be defined later to avoid pylint warnings
+ # Note: _original_fetch* methods are not initialized here as they need to be
+ # conditionally set based on hasattr() checks
# Hence, we can't pass around bools by reference & modify them.
# Therefore, it must be a list with exactly one bool element.
- def _is_unicode_string(self, param):
+ self._rownumber = -1 # DB-API extension: last returned row index, -1 before first
+
+ self._cached_column_map = None
+ self._cached_converter_map = None
+ self._next_row_index = 0 # internal: index of the next row the driver will return (0-based)
+ self._has_result_set = False # Track if we have an active result set
+ self._skip_increment_for_next_fetch = (
+ False # Track if we need to skip incrementing the row index
+ )
+ self.messages = [] # Store diagnostic messages
+
+ def _is_unicode_string(self, param: str) -> bool:
"""
Check if a string contains non-ASCII characters.
@@ -90,7 +158,7 @@ def _is_unicode_string(self, param):
except UnicodeEncodeError:
return True # Contains non-ASCII characters, so treat as Unicode
- def _parse_date(self, param):
+ def _parse_date(self, param: str) -> Optional[datetime.date]:
"""
Attempt to parse a string as a date.
@@ -108,7 +176,7 @@ def _parse_date(self, param):
continue
return None
- def _parse_datetime(self, param):
+ def _parse_datetime(self, param: str) -> Optional[datetime.datetime]:
"""
Attempt to parse a string as a datetime, smalldatetime, datetime2, timestamp.
@@ -132,7 +200,7 @@ def _parse_datetime(self, param):
return None # If all formats fail, return None
- def _parse_time(self, param):
+ def _parse_time(self, param: str) -> Optional[datetime.time]:
"""
Attempt to parse a string as a time.
@@ -152,8 +220,8 @@ def _parse_time(self, param):
except ValueError:
continue
return None
-
- def _get_numeric_data(self, param):
+
+ def _get_numeric_data(self, param: decimal.Decimal) -> Any:
"""
Get the data for a numeric parameter.
@@ -161,36 +229,43 @@ def _get_numeric_data(self, param):
param: The numeric parameter.
Returns:
- numeric_data: A NumericData struct containing
+ numeric_data: A NumericData struct containing
the numeric data.
"""
decimal_as_tuple = param.as_tuple()
- num_digits = len(decimal_as_tuple.digits)
+ digits_tuple = decimal_as_tuple.digits
+ num_digits = len(digits_tuple)
exponent = decimal_as_tuple.exponent
- # Calculate the SQL precision & scale
- # precision = no. of significant digits
- # scale = no. digits after decimal point
- if exponent >= 0:
- # digits=314, exp=2 ---> '31400' --> precision=5, scale=0
- precision = num_digits + exponent
+ # Handle special values (NaN, Infinity, etc.)
+ if isinstance(exponent, str):
+ # For special values like 'n' (NaN), 'N' (sNaN), 'F' (Infinity)
+ # Return default precision and scale
+ precision = 38 # SQL Server default max precision
scale = 0
- elif (-1 * exponent) <= num_digits:
- # digits=3140, exp=-3 ---> '3.140' --> precision=4, scale=3
- precision = num_digits
- scale = exponent * -1
else:
- # digits=3140, exp=-5 ---> '0.03140' --> precision=5, scale=5
- # TODO: double check the precision calculation here with SQL documentation
- precision = exponent * -1
- scale = exponent * -1
-
- # TODO: Revisit this check, do we want this restriction?
- if precision > 15:
+ # Calculate the SQL precision & scale
+ # precision = no. of significant digits
+ # scale = no. digits after decimal point
+ if exponent >= 0:
+ # digits=314, exp=2 ---> '31400' --> precision=5, scale=0
+ precision = num_digits + exponent
+ scale = 0
+ elif (-1 * exponent) <= num_digits:
+ # digits=3140, exp=-3 ---> '3.140' --> precision=4, scale=3
+ precision = num_digits
+ scale = exponent * -1
+ else:
+ # digits=3140, exp=-5 ---> '0.03140' --> precision=5, scale=5
+ # TODO: double check the precision calculation here with SQL documentation
+ precision = exponent * -1
+ scale = exponent * -1
+
+ if precision > 38:
raise ValueError(
"Precision of the numeric value is too high - "
+ str(param)
- + ". Should be less than or equal to 15"
+ + ". Should be less than or equal to 38"
)
Numeric_Data = ddbc_bindings.NumericData
numeric_data = Numeric_Data()
@@ -199,17 +274,112 @@ def _get_numeric_data(self, param):
numeric_data.sign = 1 if decimal_as_tuple.sign == 0 else 0
# strip decimal point from param & convert the significant digits to integer
# Ex: 12.34 ---> 1234
- val = str(param)
- if "." in val or "-" in val:
- val = val.replace(".", "")
- val = val.replace("-", "")
- val = int(val)
- numeric_data.val = val
+ int_str = "".join(str(d) for d in digits_tuple)
+ if exponent > 0:
+ int_str = int_str + ("0" * exponent)
+ elif exponent < 0:
+ if -exponent > num_digits:
+ int_str = ("0" * (-exponent - num_digits)) + int_str
+
+ if int_str == "":
+ int_str = "0"
+
+ # Convert decimal base-10 string to python int, then to 16 little-endian bytes
+ big_int = int(int_str)
+ byte_array = bytearray(16) # SQL_MAX_NUMERIC_LEN
+ for i in range(16):
+ byte_array[i] = big_int & 0xFF
+ big_int >>= 8
+ if big_int == 0:
+ break
+
+ numeric_data.val = bytes(byte_array)
return numeric_data
- def _map_sql_type(self, param, parameters_list, i):
+ def _get_encoding_settings(self):
+ """
+ Get the encoding settings from the connection.
+
+ Returns:
+ dict: A dictionary with 'encoding' and 'ctype' keys, or default settings if not available
+
+ Raises:
+ OperationalError, DatabaseError: If there are unexpected database connection issues
+ that indicate a broken connection state. These should not be silently ignored
+ as they can lead to data corruption or inconsistent behavior.
+ """
+ if hasattr(self._connection, "getencoding"):
+ try:
+ return self._connection.getencoding()
+ except (OperationalError, DatabaseError) as db_error:
+ # Log the error for debugging but re-raise for fail-fast behavior
+ # Silently returning defaults can lead to data corruption and hard-to-debug issues
+ logger.error(
+ "Failed to get encoding settings from connection due to database error: %s. "
+ "This indicates a broken connection state that should not be ignored.",
+ db_error,
+ )
+ # Re-raise to fail fast - users should know their connection is broken
+ raise
+ except Exception as unexpected_error:
+ # Handle other unexpected errors (connection closed, programming errors, etc.)
+ logger.error("Unexpected error getting encoding settings: %s", unexpected_error)
+ # Re-raise unexpected errors as well
+ raise
+
+ # Return default encoding settings if getencoding is not available
+ # This is the only case where defaults are appropriate (method doesn't exist)
+ return {"encoding": "utf-16le", "ctype": ddbc_sql_const.SQL_WCHAR.value}
+
+ def _get_decoding_settings(self, sql_type):
+ """
+ Get decoding settings for a specific SQL type.
+
+ Args:
+ sql_type: SQL type constant (SQL_CHAR, SQL_WCHAR, etc.)
+
+ Returns:
+ Dictionary containing the decoding settings.
+
+ Raises:
+ OperationalError, DatabaseError: If there are unexpected database connection issues
+ that indicate a broken connection state. These should not be silently ignored
+ as they can lead to data corruption or inconsistent behavior.
+ """
+ try:
+ # Get decoding settings from connection for this SQL type
+ return self._connection.getdecoding(sql_type)
+ except (OperationalError, DatabaseError) as db_error:
+ # Log the error for debugging but re-raise for fail-fast behavior
+ # Silently returning defaults can lead to data corruption and hard-to-debug issues
+ logger.error(
+ "Failed to get decoding settings for SQL type %s due to database error: %s. "
+ "This indicates a broken connection state that should not be ignored.",
+ sql_type,
+ db_error,
+ )
+ # Re-raise to fail fast - users should know their connection is broken
+ raise
+ except Exception as unexpected_error:
+ # Handle other unexpected errors (connection closed, programming errors, etc.)
+ logger.error(
+ "Unexpected error getting decoding settings for SQL type %s: %s",
+ sql_type,
+ unexpected_error,
+ )
+ # Re-raise unexpected errors as well
+ raise
+
+ def _map_sql_type( # pylint: disable=too-many-arguments,too-many-positional-arguments,too-many-locals,too-many-return-statements,too-many-branches
+ self,
+ param: Any,
+ parameters_list: List[Any],
+ i: int,
+ min_val: Optional[Any] = None,
+ max_val: Optional[Any] = None,
+ ) -> Tuple[int, int, int, int, bool]:
"""
- Map a Python data type to the corresponding SQL type,
+ Map a Python data type to the corresponding SQL type,
C type, Column size, and Decimal digits.
Takes:
- param: The parameter to map.
@@ -218,173 +388,274 @@ def _map_sql_type(self, param, parameters_list, i):
Returns:
- A tuple containing the SQL type, C type, column size, and decimal digits.
"""
+ logger.debug("_map_sql_type: Mapping param index=%d, type=%s", i, type(param).__name__)
if param is None:
+ logger.debug("_map_sql_type: NULL parameter - index=%d", i)
return (
- ddbc_sql_const.SQL_VARCHAR.value, # TODO: Add SQLDescribeParam to get correct type
+ ddbc_sql_const.SQL_VARCHAR.value,
ddbc_sql_const.SQL_C_DEFAULT.value,
1,
0,
+ False,
)
if isinstance(param, bool):
- return ddbc_sql_const.SQL_BIT.value, ddbc_sql_const.SQL_C_BIT.value, 1, 0
+ logger.debug("_map_sql_type: BOOL detected - index=%d", i)
+ return (
+ ddbc_sql_const.SQL_BIT.value,
+ ddbc_sql_const.SQL_C_BIT.value,
+ 1,
+ 0,
+ False,
+ )
if isinstance(param, int):
- if 0 <= param <= 255:
+ # Use min_val/max_val if available
+ value_to_check = max_val if max_val is not None else param
+ min_to_check = min_val if min_val is not None else param
+ logger.debug(
+ "_map_sql_type: INT detected - index=%d, min=%s, max=%s",
+ i,
+ str(min_to_check)[:50],
+ str(value_to_check)[:50],
+ )
+
+ if 0 <= min_to_check and value_to_check <= 255:
+ logger.debug("_map_sql_type: INT -> TINYINT - index=%d", i)
return (
ddbc_sql_const.SQL_TINYINT.value,
ddbc_sql_const.SQL_C_TINYINT.value,
3,
0,
+ False,
)
- if -32768 <= param <= 32767:
+ if -32768 <= min_to_check and value_to_check <= 32767:
+ logger.debug("_map_sql_type: INT -> SMALLINT - index=%d", i)
return (
ddbc_sql_const.SQL_SMALLINT.value,
ddbc_sql_const.SQL_C_SHORT.value,
5,
0,
+ False,
)
- if -2147483648 <= param <= 2147483647:
+ if -2147483648 <= min_to_check and value_to_check <= 2147483647:
+ logger.debug("_map_sql_type: INT -> INTEGER - index=%d", i)
return (
ddbc_sql_const.SQL_INTEGER.value,
ddbc_sql_const.SQL_C_LONG.value,
10,
0,
+ False,
)
+ logger.debug("_map_sql_type: INT -> BIGINT - index=%d", i)
return (
ddbc_sql_const.SQL_BIGINT.value,
ddbc_sql_const.SQL_C_SBIGINT.value,
19,
0,
+ False,
)
if isinstance(param, float):
+ logger.debug("_map_sql_type: FLOAT detected - index=%d", i)
return (
ddbc_sql_const.SQL_DOUBLE.value,
ddbc_sql_const.SQL_C_DOUBLE.value,
15,
0,
+ False,
)
if isinstance(param, decimal.Decimal):
- parameters_list[i] = self._get_numeric_data(
- param
- ) # Replace the parameter with the dictionary
+ logger.debug("_map_sql_type: DECIMAL detected - index=%d", i)
+ # First check precision limit for all decimal values
+ decimal_as_tuple = param.as_tuple()
+ digits_tuple = decimal_as_tuple.digits
+ num_digits = len(digits_tuple)
+ exponent = decimal_as_tuple.exponent
+
+ # Handle special values (NaN, Infinity, etc.)
+ if isinstance(exponent, str):
+ logger.debug(
+ "_map_sql_type: DECIMAL special value - index=%d, exponent=%s", i, exponent
+ )
+ # For special values like 'n' (NaN), 'N' (sNaN), 'F' (Infinity)
+ # Return default precision and scale
+ precision = 38 # SQL Server default max precision
+ else:
+ # Calculate the SQL precision (same logic as _get_numeric_data)
+ if exponent >= 0:
+ precision = num_digits + exponent
+ elif (-1 * exponent) <= num_digits:
+ precision = num_digits
+ else:
+ precision = exponent * -1
+ logger.debug(
+ "_map_sql_type: DECIMAL precision calculated - index=%d, precision=%d",
+ i,
+ precision,
+ )
+
+ if precision > 38:
+ logger.debug(
+ "_map_sql_type: DECIMAL precision too high - index=%d, precision=%d",
+ i,
+ precision,
+ )
+ raise ValueError(
+ f"Precision of the numeric value is too high. "
+ f"The maximum precision supported by SQL Server is 38, but got {precision}."
+ )
+
+ # Detect MONEY / SMALLMONEY range
+ if SMALLMONEY_MIN <= param <= SMALLMONEY_MAX:
+ logger.debug("_map_sql_type: DECIMAL -> SMALLMONEY - index=%d", i)
+ # smallmoney
+ parameters_list[i] = format(param, "f")
+ return (
+ ddbc_sql_const.SQL_VARCHAR.value,
+ ddbc_sql_const.SQL_C_CHAR.value,
+ len(parameters_list[i]),
+ 0,
+ False,
+ )
+ if MONEY_MIN <= param <= MONEY_MAX:
+ logger.debug("_map_sql_type: DECIMAL -> MONEY - index=%d", i)
+ # money
+ parameters_list[i] = format(param, "f")
+ return (
+ ddbc_sql_const.SQL_VARCHAR.value,
+ ddbc_sql_const.SQL_C_CHAR.value,
+ len(parameters_list[i]),
+ 0,
+ False,
+ )
+ # fallback to generic numeric binding
+ logger.debug("_map_sql_type: DECIMAL -> NUMERIC - index=%d", i)
+ parameters_list[i] = self._get_numeric_data(param)
+ logger.debug(
+ "_map_sql_type: NUMERIC created - index=%d, precision=%d, scale=%d",
+ i,
+ parameters_list[i].precision,
+ parameters_list[i].scale,
+ )
return (
ddbc_sql_const.SQL_NUMERIC.value,
ddbc_sql_const.SQL_C_NUMERIC.value,
parameters_list[i].precision,
parameters_list[i].scale,
+ False,
+ )
+
+ if isinstance(param, uuid.UUID):
+ logger.debug("_map_sql_type: UUID detected - index=%d", i)
+ parameters_list[i] = param.bytes_le
+ return (
+ ddbc_sql_const.SQL_GUID.value,
+ ddbc_sql_const.SQL_C_GUID.value,
+ 16,
+ 0,
+ False,
)
if isinstance(param, str):
+ logger.debug("_map_sql_type: STR detected - index=%d, length=%d", i, len(param))
if (
param.startswith("POINT")
or param.startswith("LINESTRING")
or param.startswith("POLYGON")
):
+ logger.debug("_map_sql_type: STR is geometry type - index=%d", i)
return (
ddbc_sql_const.SQL_WVARCHAR.value,
ddbc_sql_const.SQL_C_WCHAR.value,
len(param),
0,
- )
-
- # Attempt to parse as date, datetime, datetime2, timestamp, smalldatetime or time
- if self._parse_date(param):
- parameters_list[i] = self._parse_date(
- param
- ) # Replace the parameter with the date object
- return (
- ddbc_sql_const.SQL_DATE.value,
- ddbc_sql_const.SQL_C_TYPE_DATE.value,
- 10,
- 0,
- )
- if self._parse_datetime(param):
- parameters_list[i] = self._parse_datetime(param)
- return (
- ddbc_sql_const.SQL_TIMESTAMP.value,
- ddbc_sql_const.SQL_C_TYPE_TIMESTAMP.value,
- 26,
- 6,
- )
- if self._parse_time(param):
- parameters_list[i] = self._parse_time(param)
- return (
- ddbc_sql_const.SQL_TIME.value,
- ddbc_sql_const.SQL_C_TYPE_TIME.value,
- 8,
- 0,
+ False,
)
# String mapping logic here
is_unicode = self._is_unicode_string(param)
- # TODO: revisit
- if len(param) > 4000: # Long strings
+
+ # Computes UTF-16 code units (handles surrogate pairs)
+ utf16_len = sum(2 if ord(c) > 0xFFFF else 1 for c in param)
+ logger.debug(
+ "_map_sql_type: STR analysis - index=%d, is_unicode=%s, utf16_len=%d",
+ i,
+ str(is_unicode),
+ utf16_len,
+ )
+ if utf16_len > MAX_INLINE_CHAR: # Long strings -> DAE
+ logger.debug("_map_sql_type: STR exceeds MAX_INLINE_CHAR, using DAE - index=%d", i)
if is_unicode:
return (
- ddbc_sql_const.SQL_WLONGVARCHAR.value,
+ ddbc_sql_const.SQL_WVARCHAR.value,
ddbc_sql_const.SQL_C_WCHAR.value,
- len(param),
0,
+ 0,
+ True,
)
return (
- ddbc_sql_const.SQL_LONGVARCHAR.value,
+ ddbc_sql_const.SQL_VARCHAR.value,
ddbc_sql_const.SQL_C_CHAR.value,
- len(param),
0,
+ 0,
+ True,
)
- if is_unicode: # Short Unicode strings
+
+ # Short strings
+ if is_unicode:
return (
ddbc_sql_const.SQL_WVARCHAR.value,
ddbc_sql_const.SQL_C_WCHAR.value,
- len(param),
+ utf16_len,
0,
+ False,
)
return (
ddbc_sql_const.SQL_VARCHAR.value,
ddbc_sql_const.SQL_C_CHAR.value,
len(param),
0,
+ False,
)
- if isinstance(param, bytes):
- if len(param) > 8000: # Assuming VARBINARY(MAX) for long byte arrays
+ if isinstance(param, (bytes, bytearray)):
+ length = len(param)
+ if length > 8000: # Use VARBINARY(MAX) for large blobs
return (
ddbc_sql_const.SQL_VARBINARY.value,
ddbc_sql_const.SQL_C_BINARY.value,
- len(param),
0,
- )
- return (
- ddbc_sql_const.SQL_BINARY.value,
- ddbc_sql_const.SQL_C_BINARY.value,
- len(param),
- 0,
- )
-
- if isinstance(param, bytearray):
- if len(param) > 8000: # Assuming VARBINARY(MAX) for long byte arrays
- return (
- ddbc_sql_const.SQL_VARBINARY.value,
- ddbc_sql_const.SQL_C_BINARY.value,
- len(param),
0,
+ True,
)
+ # Small blobs → direct binding
return (
- ddbc_sql_const.SQL_BINARY.value,
+ ddbc_sql_const.SQL_VARBINARY.value,
ddbc_sql_const.SQL_C_BINARY.value,
- len(param),
+ max(length, 1),
0,
+ False,
)
if isinstance(param, datetime.datetime):
+ if param.tzinfo is not None:
+ # Timezone-aware datetime -> DATETIMEOFFSET
+ return (
+ ddbc_sql_const.SQL_DATETIMEOFFSET.value,
+ ddbc_sql_const.SQL_C_SS_TIMESTAMPOFFSET.value,
+ 34,
+ 7,
+ False,
+ )
+ # Naive datetime -> TIMESTAMP
return (
ddbc_sql_const.SQL_TIMESTAMP.value,
ddbc_sql_const.SQL_C_TYPE_TIMESTAMP.value,
26,
6,
+ False,
)
if isinstance(param, datetime.date):
@@ -393,6 +664,7 @@ def _map_sql_type(self, param, parameters_list, i):
ddbc_sql_const.SQL_C_TYPE_DATE.value,
10,
0,
+ False,
)
if isinstance(param, datetime.time):
@@ -401,13 +673,12 @@ def _map_sql_type(self, param, parameters_list, i):
ddbc_sql_const.SQL_C_TYPE_TIME.value,
8,
0,
+ False,
)
- return (
- ddbc_sql_const.SQL_VARCHAR.value,
- ddbc_sql_const.SQL_C_CHAR.value,
- len(str(param)),
- 0,
+ # For safety: unknown/unhandled Python types should not silently go to SQL
+ raise TypeError(
+ "Unsupported parameter type: The driver cannot safely convert it to a SQL type."
)
def _initialize_cursor(self) -> None:
@@ -415,12 +686,33 @@ def _initialize_cursor(self) -> None:
Initialize the DDBC statement handle.
"""
self._allocate_statement_handle()
+ self._set_timeout()
- def _allocate_statement_handle(self):
+ def _allocate_statement_handle(self) -> None:
"""
Allocate the DDBC statement handle.
"""
- self.hstmt = self.connection._conn.alloc_statement_handle()
+ self.hstmt = self._connection._conn.alloc_statement_handle()
+
+ def _set_timeout(self) -> None:
+ """
+ Set the query timeout attribute on the statement handle.
+ This is called once when the cursor is created and after any handle reallocation.
+ Following pyodbc's approach for better performance.
+ """
+ if self._timeout > 0:
+ logger.debug("_set_timeout: Setting query timeout=%d seconds", self._timeout)
+ try:
+ timeout_value = int(self._timeout)
+ ret = ddbc_bindings.DDBCSQLSetStmtAttr(
+ self.hstmt,
+ ddbc_sql_const.SQL_ATTR_QUERY_TIMEOUT.value,
+ timeout_value,
+ )
+ check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, self.hstmt, ret)
+ logger.debug("Query timeout set to %d seconds", timeout_value)
+ except Exception as e: # pylint: disable=broad-exception-caught
+ logger.warning("Failed to set query timeout: %s", str(e))
def _reset_cursor(self) -> None:
"""
@@ -429,77 +721,306 @@ def _reset_cursor(self) -> None:
if self.hstmt:
self.hstmt.free()
self.hstmt = None
- log('debug', "SQLFreeHandle succeeded")
+ logger.debug("SQLFreeHandle succeeded")
+
+ self._clear_rownumber()
+
# Reinitialize the statement handle
self._initialize_cursor()
def close(self) -> None:
"""
- Close the cursor now (rather than whenever __del__ is called).
+ Close the connection now (rather than whenever .__del__() is called).
+ Idempotent: subsequent calls have no effect and will be no-ops.
- Raises:
- Error: If any operation is attempted with the cursor after it is closed.
+ The cursor will be unusable from this point forward; an InterfaceError
+ will be raised if any operation (other than close) is attempted with the cursor.
+ This is a deviation from pyodbc, which raises an exception if the cursor is already closed.
"""
if self.closed:
- raise Exception("Cursor is already closed.")
+ # Do nothing - not calling _check_closed() here since we want this to be idempotent
+ return
+
+ # Clear messages per DBAPI
+ self.messages = []
+
+ # Remove this cursor from the connection's tracking
+ if hasattr(self, "connection") and self.connection and hasattr(self.connection, "_cursors"):
+ try:
+ self.connection._cursors.discard(self)
+ except Exception as e: # pylint: disable=broad-exception-caught
+ logger.warning("Error removing cursor from connection tracking: %s", e)
if self.hstmt:
self.hstmt.free()
self.hstmt = None
- log('debug', "SQLFreeHandle succeeded")
+ logger.debug("SQLFreeHandle succeeded")
+ self._clear_rownumber()
self.closed = True
- def _check_closed(self):
+ def _check_closed(self) -> None:
"""
Check if the cursor is closed and raise an exception if it is.
Raises:
- Error: If the cursor is closed.
+ ProgrammingError: If the cursor is closed.
"""
if self.closed:
- raise Exception("Operation cannot be performed: the cursor is closed.")
+ raise ProgrammingError(
+ driver_error="Operation cannot be performed: The cursor is closed.",
+ ddbc_error="",
+ )
- def _create_parameter_types_list(self, parameter, param_info, parameters_list, i):
+ def setinputsizes(self, sizes: List[Union[int, tuple]]) -> None:
"""
- Maps parameter types for the given parameter.
+ Sets the type information to be used for parameters in execute and executemany.
+
+ This method can be used to explicitly declare the types and sizes of query parameters.
+ For example:
+
+ sql = "INSERT INTO product (item, price) VALUES (?, ?)"
+ params = [('bicycle', 499.99), ('ham', 17.95)]
+ # specify that parameters are for NVARCHAR(50) and DECIMAL(18,4) columns
+ cursor.setinputsizes([(SQL_WVARCHAR, 50, 0), (SQL_DECIMAL, 18, 4)])
+ cursor.executemany(sql, params)
Args:
- parameter: parameter to bind.
+ sizes: A sequence of tuples, one for each parameter. Each tuple contains
+ (sql_type, size, decimal_digits) where size and decimal_digits are optional.
+ """
+
+ # Get valid SQL types from centralized constants
+ valid_sql_types = SQLTypes.get_valid_types()
+
+ self._inputsizes = []
+
+ if sizes:
+ for size_info in sizes:
+ if isinstance(size_info, tuple):
+ # Handle tuple format (sql_type, size, decimal_digits)
+ if len(size_info) == 1:
+ sql_type = size_info[0]
+ column_size = 0
+ decimal_digits = 0
+ elif len(size_info) == 2:
+ sql_type, column_size = size_info
+ decimal_digits = 0
+ elif len(size_info) >= 3:
+ sql_type, column_size, decimal_digits = size_info
+
+ # Validate SQL type
+ if not isinstance(sql_type, int) or sql_type not in valid_sql_types:
+ raise ValueError(
+ f"Invalid SQL type: {sql_type}. Must be a valid SQL type constant."
+ )
+
+ # Validate size and precision
+ if not isinstance(column_size, int) or column_size < 0:
+ raise ValueError(
+ f"Invalid column size: {column_size}. Must be a non-negative integer."
+ )
+
+ if not isinstance(decimal_digits, int) or decimal_digits < 0:
+ raise ValueError(
+ f"Invalid decimal digits: {decimal_digits}. "
+ f"Must be a non-negative integer."
+ )
+
+ self._inputsizes.append((sql_type, column_size, decimal_digits))
+ else:
+ # Handle single value (just sql_type)
+ sql_type = size_info
+
+ # Validate SQL type
+ if not isinstance(sql_type, int) or sql_type not in valid_sql_types:
+ raise ValueError(
+ f"Invalid SQL type: {sql_type}. Must be a valid SQL type constant."
+ )
+
+ self._inputsizes.append((sql_type, 0, 0))
+
+ def _reset_inputsizes(self) -> None:
+ """Reset input sizes after execution"""
+ self._inputsizes = None
+
+ def _get_c_type_for_sql_type(self, sql_type: int) -> int:
+ """Map SQL type to appropriate C type for parameter binding"""
+ sql_to_c_type = {
+ ddbc_sql_const.SQL_CHAR.value: ddbc_sql_const.SQL_C_CHAR.value,
+ ddbc_sql_const.SQL_VARCHAR.value: ddbc_sql_const.SQL_C_CHAR.value,
+ ddbc_sql_const.SQL_LONGVARCHAR.value: ddbc_sql_const.SQL_C_CHAR.value,
+ ddbc_sql_const.SQL_WCHAR.value: ddbc_sql_const.SQL_C_WCHAR.value,
+ ddbc_sql_const.SQL_WVARCHAR.value: ddbc_sql_const.SQL_C_WCHAR.value,
+ ddbc_sql_const.SQL_WLONGVARCHAR.value: ddbc_sql_const.SQL_C_WCHAR.value,
+ ddbc_sql_const.SQL_DECIMAL.value: ddbc_sql_const.SQL_C_NUMERIC.value,
+ ddbc_sql_const.SQL_NUMERIC.value: ddbc_sql_const.SQL_C_NUMERIC.value,
+ ddbc_sql_const.SQL_BIT.value: ddbc_sql_const.SQL_C_BIT.value,
+ ddbc_sql_const.SQL_TINYINT.value: ddbc_sql_const.SQL_C_TINYINT.value,
+ ddbc_sql_const.SQL_SMALLINT.value: ddbc_sql_const.SQL_C_SHORT.value,
+ ddbc_sql_const.SQL_INTEGER.value: ddbc_sql_const.SQL_C_LONG.value,
+ ddbc_sql_const.SQL_BIGINT.value: ddbc_sql_const.SQL_C_SBIGINT.value,
+ ddbc_sql_const.SQL_REAL.value: ddbc_sql_const.SQL_C_FLOAT.value,
+ ddbc_sql_const.SQL_FLOAT.value: ddbc_sql_const.SQL_C_DOUBLE.value,
+ ddbc_sql_const.SQL_DOUBLE.value: ddbc_sql_const.SQL_C_DOUBLE.value,
+ ddbc_sql_const.SQL_BINARY.value: ddbc_sql_const.SQL_C_BINARY.value,
+ ddbc_sql_const.SQL_VARBINARY.value: ddbc_sql_const.SQL_C_BINARY.value,
+ ddbc_sql_const.SQL_LONGVARBINARY.value: ddbc_sql_const.SQL_C_BINARY.value,
+ ddbc_sql_const.SQL_DATE.value: ddbc_sql_const.SQL_C_TYPE_DATE.value,
+ ddbc_sql_const.SQL_TIME.value: ddbc_sql_const.SQL_C_TYPE_TIME.value,
+ ddbc_sql_const.SQL_TIMESTAMP.value: ddbc_sql_const.SQL_C_TYPE_TIMESTAMP.value,
+ }
+ return sql_to_c_type.get(sql_type, ddbc_sql_const.SQL_C_DEFAULT.value)
+ def _create_parameter_types_list( # pylint: disable=too-many-arguments,too-many-positional-arguments
+ self,
+ parameter: Any,
+ param_info: Optional[Tuple[Any, ...]],
+ parameters_list: List[Any],
+ i: int,
+ min_val: Optional[Any] = None,
+ max_val: Optional[Any] = None,
+ ) -> Tuple[int, int, int, int, bool]:
+ """
+ Maps parameter types for the given parameter.
+ Args:
+ parameter: parameter to bind.
Returns:
paraminfo.
"""
paraminfo = param_info()
- sql_type, c_type, column_size, decimal_digits = self._map_sql_type(
- parameter, parameters_list, i
- )
+
+ # Check if we have explicit type information from setinputsizes
+ if self._inputsizes and i < len(self._inputsizes):
+ # Use explicit type information
+ sql_type, column_size, decimal_digits = self._inputsizes[i]
+
+ # Default is_dae to False for explicit types, but set to True for large strings/binary
+ is_dae = False
+
+ if parameter is None:
+ # For NULL parameters, always use SQL_C_DEFAULT regardless of SQL type
+ c_type = ddbc_sql_const.SQL_C_DEFAULT.value
+ else:
+ # For non-NULL parameters, determine the appropriate C type based on SQL type
+ c_type = self._get_c_type_for_sql_type(sql_type)
+
+ # Check if this should be a DAE (data at execution) parameter
+ # For string types with large column sizes
+ if isinstance(parameter, str) and column_size > MAX_INLINE_CHAR:
+ is_dae = True
+ # For binary types with large column sizes
+ elif isinstance(parameter, (bytes, bytearray)) and column_size > 8000:
+ is_dae = True
+
+ # Sanitize precision/scale for numeric types
+ if sql_type in (
+ ddbc_sql_const.SQL_DECIMAL.value,
+ ddbc_sql_const.SQL_NUMERIC.value,
+ ):
+ column_size = max(1, min(int(column_size) if column_size > 0 else 18, 38))
+ decimal_digits = min(max(0, decimal_digits), column_size)
+
+ else:
+ # Fall back to automatic type inference
+ sql_type, c_type, column_size, decimal_digits, is_dae = self._map_sql_type(
+ parameter, parameters_list, i, min_val=min_val, max_val=max_val
+ )
+
paraminfo.paramCType = c_type
paraminfo.paramSQLType = sql_type
paraminfo.inputOutputType = ddbc_sql_const.SQL_PARAM_INPUT.value
paraminfo.columnSize = column_size
paraminfo.decimalDigits = decimal_digits
+ paraminfo.isDAE = is_dae
+
+ if is_dae:
+ paraminfo.dataPtr = parameter # Will be converted to py::object* in C++
+
return paraminfo
- def _initialize_description(self):
+ def _initialize_description(self, column_metadata: Optional[Any] = None) -> None:
+ """Initialize the description attribute from column metadata."""
+ if not column_metadata:
+ self.description = None
+ return
+
+ description = []
+ for _, col in enumerate(column_metadata):
+ # Get column name - lowercase it if the lowercase flag is set
+ column_name = col["ColumnName"]
+
+ # Use the current global setting to ensure tests pass correctly
+ if get_settings().lowercase:
+ column_name = column_name.lower()
+
+ # Add to description tuple (7 elements as per PEP-249)
+ description.append(
+ (
+ column_name, # name
+ self._map_data_type(col["DataType"]), # type_code
+ None, # display_size
+ col["ColumnSize"], # internal_size
+ col["ColumnSize"], # precision - should match ColumnSize
+ col["DecimalDigits"], # scale
+ col["Nullable"] == ddbc_sql_const.SQL_NULLABLE.value, # null_ok
+ )
+ )
+ self.description = description
+
+ def _build_converter_map(self):
"""
- Initialize the description attribute using SQLDescribeCol.
+ Build a pre-computed converter map for output converters.
+ Returns a list where each element is either a converter function or None.
+ This eliminates the need to look up converters for every row.
"""
- col_metadata = []
- ret = ddbc_bindings.DDBCSQLDescribeCol(self.hstmt, col_metadata)
- check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, self.hstmt, ret)
+ if (
+ not self.description
+ or not hasattr(self.connection, "_output_converters")
+ or not self.connection._output_converters
+ ):
+ return None
- self.description = [
- (
- col["ColumnName"],
- self._map_data_type(col["DataType"]),
- None,
- col["ColumnSize"],
- col["ColumnSize"],
- col["DecimalDigits"],
- col["Nullable"] == ddbc_sql_const.SQL_NULLABLE.value,
- )
- for col in col_metadata
- ]
+ converter_map = []
+
+ for desc in self.description:
+ if desc is None:
+ converter_map.append(None)
+ continue
+ sql_type = desc[1]
+ converter = self.connection.get_output_converter(sql_type)
+ # If no converter found for the SQL type, try the WVARCHAR converter as a fallback
+ if converter is None:
+ from mssql_python.constants import ConstantsDDBC
+
+ converter = self.connection.get_output_converter(ConstantsDDBC.SQL_WVARCHAR.value)
+
+ converter_map.append(converter)
+
+ return converter_map
+
+ def _get_column_and_converter_maps(self):
+ """
+ Get column map and converter map for Row construction (thread-safe).
+ This centralizes the column map building logic to eliminate duplication
+ and ensure thread-safe lazy initialization.
+
+ Returns:
+ tuple: (column_map, converter_map)
+ """
+ # Thread-safe lazy initialization of column map
+ column_map = self._cached_column_map
+ if column_map is None and self.description:
+ # Build column map locally first, then assign to cache
+ column_map = {col_desc[0]: i for i, col_desc in enumerate(self.description)}
+ self._cached_column_map = column_map
+
+ # Fallback to legacy column name map if no cached map
+ column_map = column_map or getattr(self, "_column_name_map", None)
+
+ # Get cached converter map
+ converter_map = getattr(self, "_cached_converter_map", None)
+
+ return column_map, converter_map
def _map_data_type(self, sql_type):
"""
@@ -536,13 +1057,150 @@ def _map_data_type(self, sql_type):
}
return sql_to_python_type.get(sql_type, str)
- def execute(
+ @property
+ def rownumber(self) -> int:
+ """
+ DB-API extension: Current 0-based index of the cursor in the result set.
+
+ Returns:
+ int or None: The current 0-based index of the cursor in the result set,
+ or None if no row has been fetched yet or the index cannot be determined.
+
+ Note:
+ - Returns -1 before the first successful fetch
+ - Returns 0 after fetching the first row
+ - Returns -1 for empty result sets (since no rows can be fetched)
+
+ Warning:
+ This is a DB-API extension and may not be portable across different
+ database modules.
+ """
+ # Use mssql_python logging system instead of standard warnings
+ logger.warning("DB-API extension cursor.rownumber used")
+
+ # Return None if cursor is closed or no result set is available
+ if self.closed or not self._has_result_set:
+ return -1
+
+ return self._rownumber # Will be None until first fetch, then 0, 1, 2, etc.
+
+ @property
+ def connection(self) -> "Connection":
+ """
+ DB-API 2.0 attribute: Connection object that created this cursor.
+
+ This is a read-only reference to the Connection object that was used to create
+ this cursor. This attribute is useful for polymorphic code that needs access
+ to connection-level functionality.
+
+ Returns:
+ Connection: The connection object that created this cursor.
+
+ Note:
+ This attribute is read-only as specified by DB-API 2.0. Attempting to
+ assign to this attribute will raise an AttributeError.
+ """
+ return self._connection
+
+ def _reset_rownumber(self) -> None:
+ """Reset the rownumber tracking when starting a new result set."""
+ self._rownumber = -1
+ self._next_row_index = 0
+ self._has_result_set = True
+ self._skip_increment_for_next_fetch = False
+
+ def _increment_rownumber(self):
+ """
+ Called after a successful fetch from the driver. Keep both counters consistent.
+ """
+ if self._has_result_set:
+ # driver returned one row, so the next row index increments by 1
+ self._next_row_index += 1
+ # rownumber is last returned row index
+ self._rownumber = self._next_row_index - 1
+ else:
+ raise InterfaceError(
+ "Cannot increment rownumber: no active result set.",
+ "No active result set.",
+ )
+
+ # Will be used when we add support for scrollable cursors
+ def _decrement_rownumber(self):
+ """
+ Decrement the rownumber by 1.
+
+ This could be used for error recovery or cursor positioning operations.
+ """
+ if self._has_result_set and self._rownumber >= 0:
+ if self._rownumber > 0:
+ self._rownumber -= 1
+ else:
+ self._rownumber = -1
+ else:
+ raise InterfaceError(
+ "Cannot decrement rownumber: no active result set.",
+ "No active result set.",
+ )
+
+ def _clear_rownumber(self):
+ """
+ Clear the rownumber tracking.
+
+ This should be called when the result set is cleared or when the cursor is reset.
+ """
+ self._rownumber = -1
+ self._has_result_set = False
+ self._skip_increment_for_next_fetch = False
+
+ def __iter__(self):
+ """
+ Return the cursor itself as an iterator.
+
+ This allows direct iteration over the cursor after execute():
+
+ for row in cursor.execute("SELECT * FROM table"):
+ print(row)
+ """
+ self._check_closed()
+ return self
+
+ def __next__(self):
+ """
+ Fetch the next row when iterating over the cursor.
+
+ Returns:
+ The next Row object.
+
+ Raises:
+ StopIteration: When no more rows are available.
+ """
+ self._check_closed()
+ row = self.fetchone()
+ if row is None:
+ raise StopIteration
+ return row
+
+ def next(self):
+ """
+ Fetch the next row from the cursor.
+
+ This is an alias for __next__() to maintain compatibility with older code.
+
+ Returns:
+ The next Row object.
+
+ Raises:
+ StopIteration: When no more rows are available.
+ """
+ return next(self)
+
+ def execute( # pylint: disable=too-many-locals,too-many-branches,too-many-statements
self,
operation: str,
*parameters,
use_prepare: bool = True,
- reset_cursor: bool = True
- ) -> None:
+ reset_cursor: bool = True,
+ ) -> "Cursor":
"""
Prepare and execute a database operation (query or command).
@@ -552,24 +1210,103 @@ def execute(
use_prepare: Whether to use SQLPrepareW (default) or SQLExecDirectW.
reset_cursor: Whether to reset the cursor before execution.
"""
+ logger.debug(
+ "execute: Starting - operation_length=%d, param_count=%d, use_prepare=%s",
+ len(operation),
+ len(parameters),
+ str(use_prepare),
+ )
+
+ # Log the actual query being executed
+ logger.debug("Executing query: %s", operation)
+
+ # Restore original fetch methods if they exist
+ if hasattr(self, "_original_fetchone"):
+ logger.debug("execute: Restoring original fetch methods")
+ self.fetchone = self._original_fetchone
+ self.fetchmany = self._original_fetchmany
+ self.fetchall = self._original_fetchall
+ del self._original_fetchone
+ del self._original_fetchmany
+ del self._original_fetchall
+
self._check_closed() # Check if the cursor is closed
if reset_cursor:
+ logger.debug("execute: Resetting cursor state")
self._reset_cursor()
+ # Clear any previous messages
+ self.messages = []
+
+ # Auto-detect and convert parameter style if needed
+ # Supports both qmark (?) and pyformat (%(name)s)
+ # Note: parameters is always a tuple due to *parameters in method signature
+ #
+ # Parameter Passing Rules (handling ambiguity):
+ #
+ # 1. Single value:
+ # cursor.execute("SELECT ?", 42)
+ # → parameters = (42,)
+ # → Wrapped as single parameter
+ #
+ # 2. Multiple values (two equivalent ways):
+ # cursor.execute("SELECT ?, ?", 1, 2) # Varargs
+ # cursor.execute("SELECT ?, ?", (1, 2)) # Tuple
+ # → Both result in parameters = (1, 2) or ((1, 2),)
+ # → If single tuple/list/dict arg, it's unwrapped
+ #
+ # 3. Dict for named parameters:
+ # cursor.execute("SELECT %(id)s", {"id": 42})
+ # → parameters = ({"id": 42},)
+ # → Unwrapped to {"id": 42}, then converted to qmark style
+ #
+ # Important: If you pass a tuple/list/dict as the ONLY argument,
+ # it will be unwrapped for parameter binding. This means you cannot
+ # pass a tuple as a single parameter value (but SQL Server doesn't
+ # support tuple types as parameter values anyway).
+ if parameters:
+ # Check if single parameter is a nested container that should be unwrapped
+ # e.g., execute("SELECT ?", (value,)) vs execute("SELECT ?, ?", ((1, 2),))
+ if isinstance(parameters, tuple) and len(parameters) == 1:
+ # Could be either (value,) for single param or ((tuple),) for nested
+ # Check if it's a nested container
+ if isinstance(parameters[0], (tuple, list, dict)):
+ actual_params = parameters[0]
+ else:
+ actual_params = parameters
+ else:
+ actual_params = parameters
+
+ # Convert parameters based on detected style
+ operation, converted_params = detect_and_convert_parameters(operation, actual_params)
+
+ # Convert back to list format expected by the binding code
+ parameters = list(converted_params)
+ else:
+ parameters = []
+
+ # Getting encoding setting
+ encoding_settings = self._get_encoding_settings()
+
+ # Apply timeout if set (non-zero)
+ logger.debug("execute: Creating parameter type list")
param_info = ddbc_bindings.ParamInfo
parameters_type = []
- # Flatten parameters if a single tuple or list is passed
- if len(parameters) == 1 and isinstance(parameters[0], (tuple, list)):
- parameters = parameters[0]
+ # Validate that inputsizes matches parameter count if both are present
+ if parameters and self._inputsizes:
+ if len(self._inputsizes) != len(parameters):
- parameters = list(parameters)
+ warnings.warn(
+ f"Number of input sizes ({len(self._inputsizes)}) does not match "
+ f"number of parameters ({len(parameters)}). "
+ f"This may lead to unexpected behavior.",
+ Warning,
+ )
if parameters:
for i, param in enumerate(parameters):
- paraminfo = self._create_parameter_types_list(
- param, param_info, parameters, i
- )
+ paraminfo = self._create_parameter_types_list(param, param_info, parameters, i)
parameters_type.append(paraminfo)
# TODO: Use a more sophisticated string compare that handles redundant spaces etc.
@@ -577,23 +1314,22 @@ def execute(
# in low-memory conditions
# (Ex: huge number of parallel queries with huge query string sizes)
if operation != self.last_executed_stmt:
-# Executing a new statement. Reset is_stmt_prepared to false
+ # Executing a new statement. Reset is_stmt_prepared to false
self.is_stmt_prepared = [False]
- log('debug', "Executing query: %s", operation)
for i, param in enumerate(parameters):
- log('debug',
+ logger.debug(
"""Parameter number: %s, Parameter: %s,
Param Python Type: %s, ParamInfo: %s, %s, %s, %s, %s""",
i + 1,
param,
str(type(param)),
- parameters_type[i].paramSQLType,
- parameters_type[i].paramCType,
- parameters_type[i].columnSize,
- parameters_type[i].decimalDigits,
- parameters_type[i].inputOutputType,
- )
+ parameters_type[i].paramSQLType,
+ parameters_type[i].paramCType,
+ parameters_type[i].columnSize,
+ parameters_type[i].decimalDigits,
+ parameters_type[i].inputOutputType,
+ )
ret = ddbc_bindings.DDBCSQLExecute(
self.hstmt,
@@ -602,8 +1338,22 @@ def execute(
parameters_type,
self.is_stmt_prepared,
use_prepare,
+ encoding_settings,
)
- check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, self.hstmt, ret)
+ # Check return code
+ try:
+
+ # Check for errors but don't raise exceptions for info/warning messages
+ check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, self.hstmt, ret)
+ except Exception as e: # pylint: disable=broad-exception-caught
+ logger.warning("Execute failed, resetting cursor: %s", e)
+ self._reset_cursor()
+ raise
+
+ # Capture any diagnostic messages (SQL_SUCCESS_WITH_INFO, etc.)
+ if self.hstmt:
+ self.messages.extend(ddbc_bindings.DDBCSQLGetAllDiagRecords(self.hstmt))
+
self.last_executed_stmt = operation
# Update rowcount after execution
@@ -611,164 +1361,1041 @@ def execute(
self.rowcount = ddbc_bindings.DDBCSQLRowCount(self.hstmt)
# Initialize description after execution
- self._initialize_description()
+ # After successful execution, initialize description if there are results
+ column_metadata = []
+ try:
+ ddbc_bindings.DDBCSQLDescribeCol(self.hstmt, column_metadata)
+ self._initialize_description(column_metadata)
+ except Exception as e: # pylint: disable=broad-exception-caught
+ # If describe fails, it's likely there are no results (e.g., for INSERT)
+ self.description = None
+
+ # Reset rownumber for new result set (only for SELECT statements)
+ if self.description: # If we have column descriptions, it's likely a SELECT
+ self.rowcount = -1
+ self._reset_rownumber()
+ # Pre-build column map and converter map
+ self._cached_column_map = {
+ col_desc[0]: i for i, col_desc in enumerate(self.description)
+ }
+ self._cached_converter_map = self._build_converter_map()
+ else:
+ self.rowcount = ddbc_bindings.DDBCSQLRowCount(self.hstmt)
+ self._clear_rownumber()
+ self._cached_column_map = None
+ self._cached_converter_map = None
- @staticmethod
- def _select_best_sample_value(column):
+ # After successful execution, initialize description if there are results
+ column_metadata = []
+ try:
+ ddbc_bindings.DDBCSQLDescribeCol(self.hstmt, column_metadata)
+ self._initialize_description(column_metadata)
+ except Exception as e:
+ # If describe fails, it's likely there are no results (e.g., for INSERT)
+ self.description = None
+
+ self._reset_inputsizes() # Reset input sizes after execution
+ # Return self for method chaining
+ return self
+
+ def _prepare_metadata_result_set( # pylint: disable=too-many-statements
+ self, column_metadata=None, fallback_description=None, specialized_mapping=None
+ ):
"""
- Selects the most representative non-null value from a column for type inference.
+ Prepares a metadata result set by:
+ 1. Retrieving column metadata if not provided
+ 2. Initializing the description attribute
+ 3. Setting up column name mappings
+ 4. Creating wrapper fetch methods with column mapping support
- This is used during executemany() to infer SQL/C types based on actual data,
- preferring a non-null value that is not the first row to avoid bias from placeholder defaults.
-
- Args:
- column: List of values in the column.
- """
- non_nulls = [v for v in column if v is not None]
- if not non_nulls:
- return None
- if all(isinstance(v, int) for v in non_nulls):
- # Pick the value with the widest range (min/max)
- return max(non_nulls, key=lambda v: abs(v))
- if all(isinstance(v, float) for v in non_nulls):
- return 0.0
- if all(isinstance(v, decimal.Decimal) for v in non_nulls):
- return max(non_nulls, key=lambda d: len(d.as_tuple().digits))
- if all(isinstance(v, str) for v in non_nulls):
- return max(non_nulls, key=lambda s: len(str(s)))
- if all(isinstance(v, datetime.datetime) for v in non_nulls):
- return datetime.datetime.now()
- if all(isinstance(v, datetime.date) for v in non_nulls):
- return datetime.date.today()
- return non_nulls[0] # fallback
-
- def _transpose_rowwise_to_columnwise(self, seq_of_parameters: list) -> list:
- """
- Convert list of rows (row-wise) into list of columns (column-wise),
- for array binding via ODBC.
Args:
- seq_of_parameters: Sequence of sequences or mappings of parameters.
- """
- if not seq_of_parameters:
- return []
-
- num_params = len(seq_of_parameters[0])
- columnwise = [[] for _ in range(num_params)]
- for row in seq_of_parameters:
- if len(row) != num_params:
- raise ValueError("Inconsistent parameter row size in executemany()")
- for i, val in enumerate(row):
- columnwise[i].append(val)
- return columnwise
+ column_metadata (list, optional): Pre-fetched column metadata.
+ If None, it will be retrieved.
+ fallback_description (list, optional): Fallback description to use if
+ metadata retrieval fails.
+ specialized_mapping (dict, optional): Custom column mapping for special cases.
- def executemany(self, operation: str, seq_of_parameters: list) -> None:
+ Returns:
+ Cursor: Self, for method chaining
"""
- Prepare a database operation and execute it against all parameter sequences.
- This version uses column-wise parameter binding and a single batched SQLExecute().
- Args:
- operation: SQL query or command.
- seq_of_parameters: Sequence of sequences or mappings of parameters.
+ # Retrieve column metadata if not provided
+ if column_metadata is None:
+ column_metadata = []
+ try:
+ ddbc_bindings.DDBCSQLDescribeCol(self.hstmt, column_metadata)
+ except InterfaceError as e:
+ logger.warning(f"Driver interface error during metadata retrieval: {e}")
+ except Exception as e: # pylint: disable=broad-exception-caught
+ # Log the exception with appropriate context
+ logger.warning(
+ f"Failed to retrieve column metadata: {e}. "
+ f"Using standard ODBC column definitions instead.",
+ )
- Raises:
- Error: If the operation fails.
+ # Initialize the description attribute with the column metadata
+ self._initialize_description(column_metadata)
+
+ # Use fallback description if provided and current description is empty
+ if not self.description and fallback_description:
+ self.description = fallback_description
+
+ # Define column names in ODBC standard order
+ self._column_map = {} # pylint: disable=attribute-defined-outside-init
+ for i, (name, *_) in enumerate(self.description):
+ # Add standard name
+ self._column_map[name] = i
+ # Add lowercase alias
+ self._column_map[name.lower()] = i
+
+ # If specialized mapping is provided, handle it differently
+ if specialized_mapping:
+ # Define specialized fetch methods that use the custom mapping
+ def fetchone_with_specialized_mapping():
+ row = self._original_fetchone()
+ if row is not None:
+ merged_map = getattr(row, "_column_map", {}).copy()
+ merged_map.update(specialized_mapping)
+ row._column_map = merged_map
+ return row
+
+ def fetchmany_with_specialized_mapping(size=None):
+ rows = self._original_fetchmany(size)
+ for row in rows:
+ merged_map = getattr(row, "_column_map", {}).copy()
+ merged_map.update(specialized_mapping)
+ row._column_map = merged_map
+ return rows
+
+ def fetchall_with_specialized_mapping():
+ rows = self._original_fetchall()
+ for row in rows:
+ merged_map = getattr(row, "_column_map", {}).copy()
+ merged_map.update(specialized_mapping)
+ row._column_map = merged_map
+ return rows
+
+ # Save original fetch methods
+ if not hasattr(self, "_original_fetchone"):
+ self._original_fetchone = (
+ self.fetchone
+ ) # pylint: disable=attribute-defined-outside-init
+ self._original_fetchmany = (
+ self.fetchmany
+ ) # pylint: disable=attribute-defined-outside-init
+ self._original_fetchall = (
+ self.fetchall
+ ) # pylint: disable=attribute-defined-outside-init
+
+ # Use specialized mapping methods
+ self.fetchone = fetchone_with_specialized_mapping
+ self.fetchmany = fetchmany_with_specialized_mapping
+ self.fetchall = fetchall_with_specialized_mapping
+ else:
+ # Standard column mapping
+ # Remember original fetch methods (store only once)
+ if not hasattr(self, "_original_fetchone"):
+ self._original_fetchone = (
+ self.fetchone
+ ) # pylint: disable=attribute-defined-outside-init
+ self._original_fetchmany = (
+ self.fetchmany
+ ) # pylint: disable=attribute-defined-outside-init
+ self._original_fetchall = (
+ self.fetchall
+ ) # pylint: disable=attribute-defined-outside-init
+
+ # Create wrapper fetch methods that add column mappings
+ def fetchone_with_mapping():
+ row = self._original_fetchone()
+ if row is not None:
+ row._column_map = self._column_map
+ return row
+
+ def fetchmany_with_mapping(size=None):
+ rows = self._original_fetchmany(size)
+ for row in rows:
+ row._column_map = self._column_map
+ return rows
+
+ def fetchall_with_mapping():
+ rows = self._original_fetchall()
+ for row in rows:
+ row._column_map = self._column_map
+ return rows
+
+ # Replace fetch methods
+ self.fetchone = fetchone_with_mapping
+ self.fetchmany = fetchmany_with_mapping
+ self.fetchall = fetchall_with_mapping
+
+ # Return the cursor itself for method chaining
+ return self
+
+ def getTypeInfo(self, sqlType=None):
+ """
+ Executes SQLGetTypeInfo and creates a result set with information about
+ the specified data type or all data types supported by the ODBC driver if not specified.
"""
self._check_closed()
self._reset_cursor()
- if not seq_of_parameters:
- self.rowcount = 0
- return
+ sql_all_types = 0 # SQL_ALL_TYPES = 0
- param_info = ddbc_bindings.ParamInfo
- param_count = len(seq_of_parameters[0])
+ try:
+ # Get information about data types
+ ret = ddbc_bindings.DDBCSQLGetTypeInfo(
+ self.hstmt, sqlType if sqlType is not None else sql_all_types
+ )
+ check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, self.hstmt, ret)
+
+ # Use the helper method to prepare the result set
+ return self._prepare_metadata_result_set()
+ except Exception as e: # pylint: disable=broad-exception-caught
+ self._reset_cursor()
+ raise e
+
+ def procedures(self, procedure=None, catalog=None, schema=None):
+ """
+ Executes SQLProcedures and creates a result set of information about procedures
+ in the data source.
+
+ Args:
+ procedure (str, optional): Procedure name pattern. Default is None (all procedures).
+ catalog (str, optional): Catalog name pattern. Default is None (current catalog).
+ schema (str, optional): Schema name pattern. Default is None (all schemas).
+ """
+ self._check_closed()
+ self._reset_cursor()
+
+ # Call the SQLProcedures function
+ retcode = ddbc_bindings.DDBCSQLProcedures(self.hstmt, catalog, schema, procedure)
+ check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, self.hstmt, retcode)
+
+ # Define fallback description for procedures
+ fallback_description = [
+ ("procedure_cat", str, None, 128, 128, 0, True),
+ ("procedure_schem", str, None, 128, 128, 0, True),
+ ("procedure_name", str, None, 128, 128, 0, False),
+ ("num_input_params", int, None, 10, 10, 0, True),
+ ("num_output_params", int, None, 10, 10, 0, True),
+ ("num_result_sets", int, None, 10, 10, 0, True),
+ ("remarks", str, None, 254, 254, 0, True),
+ ("procedure_type", int, None, 10, 10, 0, False),
+ ]
+
+ # Use the helper method to prepare the result set
+ return self._prepare_metadata_result_set(fallback_description=fallback_description)
+
+ def primaryKeys(self, table, catalog=None, schema=None):
+ """
+ Creates a result set of column names that make up the primary key for a table
+ by executing the SQLPrimaryKeys function.
+
+ Args:
+ table (str): The name of the table
+ catalog (str, optional): The catalog name (database). Defaults to None.
+ schema (str, optional): The schema name. Defaults to None.
+ """
+ self._check_closed()
+ self._reset_cursor()
+
+ if not table:
+ raise ProgrammingError("Table name must be specified", "HY000")
+
+ # Call the SQLPrimaryKeys function
+ retcode = ddbc_bindings.DDBCSQLPrimaryKeys(self.hstmt, catalog, schema, table)
+ check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, self.hstmt, retcode)
+
+ # Define fallback description for primary keys
+ fallback_description = [
+ ("table_cat", str, None, 128, 128, 0, True),
+ ("table_schem", str, None, 128, 128, 0, True),
+ ("table_name", str, None, 128, 128, 0, False),
+ ("column_name", str, None, 128, 128, 0, False),
+ ("key_seq", int, None, 10, 10, 0, False),
+ ("pk_name", str, None, 128, 128, 0, True),
+ ]
+
+ # Use the helper method to prepare the result set
+ return self._prepare_metadata_result_set(fallback_description=fallback_description)
+
+ def foreignKeys( # pylint: disable=too-many-arguments,too-many-positional-arguments
+ self,
+ table=None,
+ catalog=None,
+ schema=None,
+ foreignTable=None,
+ foreignCatalog=None,
+ foreignSchema=None,
+ ):
+ """
+ Executes the SQLForeignKeys function and creates a result set of column names
+ that are foreign keys.
+
+ This function returns:
+ 1. Foreign keys in the specified table that reference primary keys in other tables, OR
+ 2. Foreign keys in other tables that reference the primary key in the specified table
+ """
+ self._check_closed()
+ self._reset_cursor()
+
+ # Check if we have at least one table specified
+ if table is None and foreignTable is None:
+ raise ProgrammingError("Either table or foreignTable must be specified", "HY000")
+
+ # Call the SQLForeignKeys function
+ retcode = ddbc_bindings.DDBCSQLForeignKeys(
+ self.hstmt,
+ foreignCatalog,
+ foreignSchema,
+ foreignTable,
+ catalog,
+ schema,
+ table,
+ )
+ check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, self.hstmt, retcode)
+
+ # Define fallback description for foreign keys
+ fallback_description = [
+ ("pktable_cat", str, None, 128, 128, 0, True),
+ ("pktable_schem", str, None, 128, 128, 0, True),
+ ("pktable_name", str, None, 128, 128, 0, False),
+ ("pkcolumn_name", str, None, 128, 128, 0, False),
+ ("fktable_cat", str, None, 128, 128, 0, True),
+ ("fktable_schem", str, None, 128, 128, 0, True),
+ ("fktable_name", str, None, 128, 128, 0, False),
+ ("fkcolumn_name", str, None, 128, 128, 0, False),
+ ("key_seq", int, None, 10, 10, 0, False),
+ ("update_rule", int, None, 10, 10, 0, False),
+ ("delete_rule", int, None, 10, 10, 0, False),
+ ("fk_name", str, None, 128, 128, 0, True),
+ ("pk_name", str, None, 128, 128, 0, True),
+ ("deferrability", int, None, 10, 10, 0, False),
+ ]
+
+ # Use the helper method to prepare the result set
+ return self._prepare_metadata_result_set(fallback_description=fallback_description)
+
+ def rowIdColumns(self, table, catalog=None, schema=None, nullable=True):
+ """
+ Executes SQLSpecialColumns with SQL_BEST_ROWID which creates a result set of
+ columns that uniquely identify a row.
+ """
+ self._check_closed()
+ self._reset_cursor()
+
+ if not table:
+ raise ProgrammingError("Table name must be specified", "HY000")
+
+ # Set the identifier type and options
+ identifier_type = ddbc_sql_const.SQL_BEST_ROWID.value
+ scope = ddbc_sql_const.SQL_SCOPE_CURROW.value
+ nullable_flag = (
+ ddbc_sql_const.SQL_NULLABLE.value if nullable else ddbc_sql_const.SQL_NO_NULLS.value
+ )
+
+ # Call the SQLSpecialColumns function
+ retcode = ddbc_bindings.DDBCSQLSpecialColumns(
+ self.hstmt, identifier_type, catalog, schema, table, scope, nullable_flag
+ )
+ check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, self.hstmt, retcode)
+
+ # Define fallback description for special columns
+ fallback_description = [
+ ("scope", int, None, 10, 10, 0, False),
+ ("column_name", str, None, 128, 128, 0, False),
+ ("data_type", int, None, 10, 10, 0, False),
+ ("type_name", str, None, 128, 128, 0, False),
+ ("column_size", int, None, 10, 10, 0, False),
+ ("buffer_length", int, None, 10, 10, 0, False),
+ ("decimal_digits", int, None, 10, 10, 0, True),
+ ("pseudo_column", int, None, 10, 10, 0, False),
+ ]
+
+ # Use the helper method to prepare the result set
+ return self._prepare_metadata_result_set(fallback_description=fallback_description)
+
+ def rowVerColumns(self, table, catalog=None, schema=None, nullable=True):
+ """
+ Executes SQLSpecialColumns with SQL_ROWVER which creates a result set of
+ columns that are automatically updated when any value in the row is updated.
+ """
+ self._check_closed()
+ self._reset_cursor()
+
+ if not table:
+ raise ProgrammingError("Table name must be specified", "HY000")
+
+ # Set the identifier type and options
+ identifier_type = ddbc_sql_const.SQL_ROWVER.value
+ scope = ddbc_sql_const.SQL_SCOPE_CURROW.value
+ nullable_flag = (
+ ddbc_sql_const.SQL_NULLABLE.value if nullable else ddbc_sql_const.SQL_NO_NULLS.value
+ )
+
+ # Call the SQLSpecialColumns function
+ retcode = ddbc_bindings.DDBCSQLSpecialColumns(
+ self.hstmt, identifier_type, catalog, schema, table, scope, nullable_flag
+ )
+ check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, self.hstmt, retcode)
+
+ # Same fallback description as rowIdColumns
+ fallback_description = [
+ ("scope", int, None, 10, 10, 0, False),
+ ("column_name", str, None, 128, 128, 0, False),
+ ("data_type", int, None, 10, 10, 0, False),
+ ("type_name", str, None, 128, 128, 0, False),
+ ("column_size", int, None, 10, 10, 0, False),
+ ("buffer_length", int, None, 10, 10, 0, False),
+ ("decimal_digits", int, None, 10, 10, 0, True),
+ ("pseudo_column", int, None, 10, 10, 0, False),
+ ]
+
+ # Use the helper method to prepare the result set
+ return self._prepare_metadata_result_set(fallback_description=fallback_description)
+
+ def statistics( # pylint: disable=too-many-arguments,too-many-positional-arguments
+ self,
+ table: str,
+ catalog: str = None,
+ schema: str = None,
+ unique: bool = False,
+ quick: bool = True,
+ ) -> "Cursor":
+ """
+ Creates a result set of statistics about a single table and the indexes associated
+ with the table by executing SQLStatistics.
+ """
+ self._check_closed()
+ self._reset_cursor()
+
+ if not table:
+ raise ProgrammingError("Table name is required", "HY000")
+
+ # Set unique and quick flags
+ unique_option = (
+ ddbc_sql_const.SQL_INDEX_UNIQUE.value if unique else ddbc_sql_const.SQL_INDEX_ALL.value
+ )
+ reserved_option = (
+ ddbc_sql_const.SQL_QUICK.value if quick else ddbc_sql_const.SQL_ENSURE.value
+ )
+
+ # Call the SQLStatistics function
+ retcode = ddbc_bindings.DDBCSQLStatistics(
+ self.hstmt, catalog, schema, table, unique_option, reserved_option
+ )
+ check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, self.hstmt, retcode)
+
+ # Define fallback description for statistics
+ fallback_description = [
+ ("table_cat", str, None, 128, 128, 0, True),
+ ("table_schem", str, None, 128, 128, 0, True),
+ ("table_name", str, None, 128, 128, 0, False),
+ ("non_unique", bool, None, 1, 1, 0, False),
+ ("index_qualifier", str, None, 128, 128, 0, True),
+ ("index_name", str, None, 128, 128, 0, True),
+ ("type", int, None, 10, 10, 0, False),
+ ("ordinal_position", int, None, 10, 10, 0, False),
+ ("column_name", str, None, 128, 128, 0, True),
+ ("asc_or_desc", str, None, 1, 1, 0, True),
+ ("cardinality", int, None, 20, 20, 0, True),
+ ("pages", int, None, 20, 20, 0, True),
+ ("filter_condition", str, None, 128, 128, 0, True),
+ ]
+
+ # Use the helper method to prepare the result set
+ return self._prepare_metadata_result_set(fallback_description=fallback_description)
+
+ def columns(self, table=None, catalog=None, schema=None, column=None):
+ """
+ Creates a result set of column information in the specified tables
+ using the SQLColumns function.
+ """
+ self._check_closed()
+ self._reset_cursor()
+
+ # Call the SQLColumns function
+ retcode = ddbc_bindings.DDBCSQLColumns(self.hstmt, catalog, schema, table, column)
+ check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, self.hstmt, retcode)
+
+ # Define fallback description for columns
+ fallback_description = [
+ ("table_cat", str, None, 128, 128, 0, True),
+ ("table_schem", str, None, 128, 128, 0, True),
+ ("table_name", str, None, 128, 128, 0, False),
+ ("column_name", str, None, 128, 128, 0, False),
+ ("data_type", int, None, 10, 10, 0, False),
+ ("type_name", str, None, 128, 128, 0, False),
+ ("column_size", int, None, 10, 10, 0, True),
+ ("buffer_length", int, None, 10, 10, 0, True),
+ ("decimal_digits", int, None, 10, 10, 0, True),
+ ("num_prec_radix", int, None, 10, 10, 0, True),
+ ("nullable", int, None, 10, 10, 0, False),
+ ("remarks", str, None, 254, 254, 0, True),
+ ("column_def", str, None, 254, 254, 0, True),
+ ("sql_data_type", int, None, 10, 10, 0, False),
+ ("sql_datetime_sub", int, None, 10, 10, 0, True),
+ ("char_octet_length", int, None, 10, 10, 0, True),
+ ("ordinal_position", int, None, 10, 10, 0, False),
+ ("is_nullable", str, None, 254, 254, 0, True),
+ ]
+
+ # Use the helper method to prepare the result set
+ return self._prepare_metadata_result_set(fallback_description=fallback_description)
+
+ def _transpose_rowwise_to_columnwise(self, seq_of_parameters: list) -> tuple[list, int]:
+ """
+ Convert sequence of rows (row-wise) into list of columns (column-wise),
+ for array binding via ODBC. Works with both iterables and generators.
+
+ Args:
+ seq_of_parameters: Sequence of sequences or mappings of parameters.
+
+ Returns:
+ tuple: (columnwise_data, row_count)
+ """
+ columnwise = []
+ first_row = True
+ row_count = 0
+
+ for row in seq_of_parameters:
+ row_count += 1
+ if first_row:
+ # Initialize columnwise lists based on first row
+ num_params = len(row)
+ columnwise = [[] for _ in range(num_params)]
+ first_row = False
+ else:
+ # Validate row size consistency
+ if len(row) != num_params:
+ raise ValueError("Inconsistent parameter row size in executemany()")
+
+ # Add each value to its column list
+ for i, val in enumerate(row):
+ columnwise[i].append(val)
+
+ return columnwise, row_count
+
+ def _compute_column_type(self, column):
+ """
+ Determine representative value and integer min/max for a column.
+
+ Returns:
+ sample_value: Representative value for type inference and modified_row.
+ min_val: Minimum for integers (None otherwise).
+ max_val: Maximum for integers (None otherwise).
+ """
+ non_nulls = [v for v in column if v is not None]
+ if not non_nulls:
+ return None, None, None
+
+ int_values = [v for v in non_nulls if isinstance(v, int)]
+ if int_values:
+ min_val, max_val = min(int_values), max(int_values)
+ sample_value = max(int_values, key=abs)
+ return sample_value, min_val, max_val
+
+ sample_value = None
+ for v in non_nulls:
+ if not sample_value:
+ sample_value = v
+ elif isinstance(v, (str, bytes, bytearray)) and isinstance(
+ sample_value, (str, bytes, bytearray)
+ ):
+ # For string/binary objects, prefer the longer one
+ # Use safe length comparison to avoid exceptions from custom __len__ implementations
+ try:
+ if len(v) > len(sample_value):
+ sample_value = v
+ except (TypeError, ValueError, AttributeError):
+ # If length comparison fails, keep the current sample_value
+ pass
+ elif isinstance(v, decimal.Decimal) and isinstance(sample_value, decimal.Decimal):
+ # For Decimal objects, prefer the one that requires higher precision or scale
+ v_tuple = v.as_tuple()
+ sample_tuple = sample_value.as_tuple()
+
+ # Calculate precision (total significant digits) and scale (decimal places)
+ # For a number like 0.000123456789, we need precision = 9, scale = 12
+ # The precision is the number of significant digits (len(digits))
+ # The scale is the number of decimal places needed to represent the number
+
+ v_precision = len(v_tuple.digits)
+ if v_tuple.exponent < 0:
+ v_scale = -v_tuple.exponent
+ else:
+ v_scale = 0
+
+ sample_precision = len(sample_tuple.digits)
+ if sample_tuple.exponent < 0:
+ sample_scale = -sample_tuple.exponent
+ else:
+ sample_scale = 0
+
+ # For SQL DECIMAL(precision, scale), we need:
+ # precision >= number of significant digits
+ # scale >= number of decimal places
+ # For 0.000123456789: precision needs to be at least 12 (to accommodate 12 decimal places)
+ # So we need to adjust precision to be at least as large as scale
+ v_required_precision = max(v_precision, v_scale)
+ sample_required_precision = max(sample_precision, sample_scale)
+
+ # Prefer the decimal that requires higher precision or scale
+ # This ensures we can accommodate all values in the column
+ if v_required_precision > sample_required_precision or (
+ v_required_precision == sample_required_precision and v_scale > sample_scale
+ ):
+ sample_value = v
+ elif isinstance(v, decimal.Decimal) and not isinstance(sample_value, decimal.Decimal):
+ # If comparing Decimal to non-Decimal, prefer Decimal for better type inference
+ sample_value = v
+
+ return sample_value, None, None
+
+ def executemany( # pylint: disable=too-many-locals,too-many-branches,too-many-statements
+ self, operation: str, seq_of_parameters: List[Sequence[Any]]
+ ) -> None:
+ """
+ Prepare a database operation and execute it against all parameter sequences.
+ This version uses column-wise parameter binding and a single batched SQLExecute().
+ Args:
+ operation: SQL query or command.
+ seq_of_parameters: Sequence of sequences or mappings of parameters.
+ Raises:
+ Error: If the operation fails.
+ """
+ logger.debug(
+ "executemany: Starting - operation_length=%d, batch_count=%d",
+ len(operation),
+ len(seq_of_parameters),
+ )
+
+ self._check_closed()
+ self._reset_cursor()
+ self.messages = []
+ logger.debug("executemany: Cursor reset complete")
+
+ if not seq_of_parameters:
+ self.rowcount = 0
+ return
+
+ # Auto-detect and convert parameter style for executemany
+ # Check first row to determine if we need to convert from pyformat to qmark
+ first_row = (
+ seq_of_parameters[0]
+ if hasattr(seq_of_parameters, "__getitem__")
+ else next(iter(seq_of_parameters))
+ )
+
+ if isinstance(first_row, dict):
+ # pyformat style - convert all rows
+ # Parse parameter names from SQL (determines order for all rows)
+ param_names = parse_pyformat_params(operation)
+
+ if param_names:
+ # Convert SQL to qmark style
+ operation, _ = convert_pyformat_to_qmark(operation, first_row)
+
+ # Convert all parameter dicts to tuples in the same order
+ converted_params = []
+ for param_dict in seq_of_parameters:
+ if not isinstance(param_dict, dict):
+ raise TypeError(
+ f"Mixed parameter types in executemany: first row is dict, "
+ f"but row has {type(param_dict).__name__}"
+ )
+ # Build tuple in the order determined by param_names
+ row_tuple = tuple(param_dict[name] for name in param_names)
+ converted_params.append(row_tuple)
+
+ seq_of_parameters = converted_params
+ logger.debug(
+ "executemany: Converted %d rows from pyformat to qmark", len(seq_of_parameters)
+ )
+
+ # Apply timeout if set (non-zero)
+ if self._timeout > 0:
+ try:
+ timeout_value = int(self._timeout)
+ ret = ddbc_bindings.DDBCSQLSetStmtAttr(
+ self.hstmt,
+ ddbc_sql_const.SQL_ATTR_QUERY_TIMEOUT.value,
+ timeout_value,
+ )
+ check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, self.hstmt, ret)
+ logger.debug(f"Set query timeout to {self._timeout} seconds")
+ except Exception as e: # pylint: disable=broad-exception-caught
+ logger.warning(f"Failed to set query timeout: {e}")
+
+ # Get sample row for parameter type detection and validation
+ sample_row = (
+ seq_of_parameters[0]
+ if hasattr(seq_of_parameters, "__getitem__")
+ else next(iter(seq_of_parameters))
+ )
+ param_count = len(sample_row)
+ param_info = ddbc_bindings.ParamInfo
parameters_type = []
+ any_dae = False
+
+ # Check if we have explicit input sizes set
+ if self._inputsizes:
+ # Validate input sizes match parameter count
+ if len(self._inputsizes) != param_count:
+ warnings.warn(
+ f"Number of input sizes ({len(self._inputsizes)}) does not match "
+ f"number of parameters ({param_count}). This may lead to unexpected behavior.",
+ Warning,
+ )
+ # Prepare parameter type information
for col_index in range(param_count):
- column = [row[col_index] for row in seq_of_parameters]
- sample_value = self._select_best_sample_value(column)
- dummy_row = list(seq_of_parameters[0])
- parameters_type.append(
- self._create_parameter_types_list(sample_value, param_info, dummy_row, col_index)
+ column = (
+ [row[col_index] for row in seq_of_parameters]
+ if hasattr(seq_of_parameters, "__getitem__")
+ else []
)
+ sample_value, min_val, max_val = self._compute_column_type(column)
+
+ if self._inputsizes and col_index < len(self._inputsizes):
+ # Use explicitly set input sizes
+ sql_type, column_size, decimal_digits = self._inputsizes[col_index]
+
+ # Default is_dae to False
+ is_dae = False
+
+ # Determine appropriate C type based on SQL type
+ c_type = self._get_c_type_for_sql_type(sql_type)
+
+ # Check if this should be a DAE (data at execution) parameter based on column size
+ if sample_value is not None:
+ if isinstance(sample_value, str) and column_size > MAX_INLINE_CHAR:
+ is_dae = True
+ elif isinstance(sample_value, (bytes, bytearray)) and column_size > 8000:
+ is_dae = True
+
+ # Sanitize precision/scale for numeric types
+ if sql_type in (
+ ddbc_sql_const.SQL_DECIMAL.value,
+ ddbc_sql_const.SQL_NUMERIC.value,
+ ):
+ column_size = max(1, min(int(column_size) if column_size > 0 else 18, 38))
+ decimal_digits = min(max(0, decimal_digits), column_size)
+
+ # For binary data columns with mixed content, we need to find max size
+ if sql_type in (
+ ddbc_sql_const.SQL_BINARY.value,
+ ddbc_sql_const.SQL_VARBINARY.value,
+ ddbc_sql_const.SQL_LONGVARBINARY.value,
+ ):
+ # Find the maximum size needed for any row's binary data
+ max_binary_size = 0
+ for row in seq_of_parameters:
+ value = row[col_index]
+ if value is not None and isinstance(value, (bytes, bytearray)):
+ max_binary_size = max(max_binary_size, len(value))
+
+ # For SQL Server VARBINARY(MAX), we need to use large object binding
+ if column_size > 8000 or max_binary_size > 8000:
+ sql_type = ddbc_sql_const.SQL_LONGVARBINARY.value
+ is_dae = True
+
+ # Update column_size to actual maximum size if it's larger
+ # Always ensure at least a minimum size of 1 for empty strings
+ column_size = max(max_binary_size, 1)
+
+ paraminfo = param_info()
+ paraminfo.paramCType = c_type
+ paraminfo.paramSQLType = sql_type
+ paraminfo.inputOutputType = ddbc_sql_const.SQL_PARAM_INPUT.value
+ paraminfo.columnSize = column_size
+ paraminfo.decimalDigits = decimal_digits
+ paraminfo.isDAE = is_dae
+
+ # Ensure we never have SQL_C_DEFAULT (0) for C-type
+ if paraminfo.paramCType == 0:
+ paraminfo.paramCType = ddbc_sql_const.SQL_C_DEFAULT.value
+
+ parameters_type.append(paraminfo)
+ else:
+ # Use auto-detection for columns without explicit types
+ column = (
+ [row[col_index] for row in seq_of_parameters]
+ if hasattr(seq_of_parameters, "__getitem__")
+ else []
+ )
+ sample_value, min_val, max_val = self._compute_column_type(column)
+
+ dummy_row = list(sample_row)
+ paraminfo = self._create_parameter_types_list(
+ sample_value,
+ param_info,
+ dummy_row,
+ col_index,
+ min_val=min_val,
+ max_val=max_val,
+ )
+ # Special handling for binary data in auto-detected types
+ if paraminfo.paramSQLType in (
+ ddbc_sql_const.SQL_BINARY.value,
+ ddbc_sql_const.SQL_VARBINARY.value,
+ ddbc_sql_const.SQL_LONGVARBINARY.value,
+ ):
+ # Find the maximum size needed for any row's binary data
+ max_binary_size = 0
+ for row in seq_of_parameters:
+ value = row[col_index]
+ if value is not None and isinstance(value, (bytes, bytearray)):
+ max_binary_size = max(max_binary_size, len(value))
+
+ # For SQL Server VARBINARY(MAX), we need to use large object binding
+ if max_binary_size > 8000:
+ paraminfo.paramSQLType = ddbc_sql_const.SQL_LONGVARBINARY.value
+ paraminfo.isDAE = True
+
+ # Update column_size to actual maximum size
+ # Always ensure at least a minimum size of 1 for empty strings
+ paraminfo.columnSize = max(max_binary_size, 1)
- columnwise_params = self._transpose_rowwise_to_columnwise(seq_of_parameters)
- log('info', "Executing batch query with %d parameter sets:\n%s",
- len(seq_of_parameters), "\n".join(f" {i+1}: {tuple(p) if isinstance(p, (list, tuple)) else p}" for i, p in enumerate(seq_of_parameters))
+ parameters_type.append(paraminfo)
+ if paraminfo.isDAE:
+ any_dae = True
+
+ if any_dae:
+ logger.debug(
+ "DAE parameters detected. Falling back to row-by-row execution with streaming.",
+ )
+ for row in seq_of_parameters:
+ self.execute(operation, row)
+ return
+
+ # Process parameters into column-wise format with possible type conversions
+ # First, convert any Decimal types as needed for NUMERIC/DECIMAL columns
+ processed_parameters = []
+ for row in seq_of_parameters:
+ processed_row = list(row)
+ for i, val in enumerate(processed_row):
+ if val is None:
+ continue
+ if (
+ isinstance(val, decimal.Decimal)
+ and parameters_type[i].paramSQLType == ddbc_sql_const.SQL_VARCHAR.value
+ ):
+ processed_row[i] = format(val, "f")
+ # Existing numeric conversion
+ elif parameters_type[i].paramSQLType in (
+ ddbc_sql_const.SQL_DECIMAL.value,
+ ddbc_sql_const.SQL_NUMERIC.value,
+ ) and not isinstance(val, decimal.Decimal):
+ try:
+ processed_row[i] = decimal.Decimal(str(val))
+ except Exception as e: # pylint: disable=broad-exception-caught
+ raise ValueError(
+ f"Failed to convert parameter at row {row}, column {i} to Decimal: {e}"
+ ) from e
+ processed_parameters.append(processed_row)
+
+ # Now transpose the processed parameters
+ columnwise_params, row_count = self._transpose_rowwise_to_columnwise(processed_parameters)
+
+ # Get encoding settings
+ encoding_settings = self._get_encoding_settings()
+
+ # Add debug logging
+ logger.debug(
+ "Executing batch query with %d parameter sets:\n%s",
+ len(seq_of_parameters),
+ "\n".join(
+ f" {i+1}: {tuple(p) if isinstance(p, (list, tuple)) else p}"
+ for i, p in enumerate(seq_of_parameters[:5])
+ ), # Limit to first 5 rows for large batches
)
- # Execute batched statement
ret = ddbc_bindings.SQLExecuteMany(
- self.hstmt,
- operation,
- columnwise_params,
- parameters_type,
- len(seq_of_parameters)
+ self.hstmt, operation, columnwise_params, parameters_type, row_count, encoding_settings
)
- check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, self.hstmt, ret)
- self.rowcount = ddbc_bindings.DDBCSQLRowCount(self.hstmt)
- self.last_executed_stmt = operation
- self._initialize_description()
+ # Capture any diagnostic messages after execution
+ if self.hstmt:
+ self.messages.extend(ddbc_bindings.DDBCSQLGetAllDiagRecords(self.hstmt))
+
+ try:
+ check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, self.hstmt, ret)
+ self.rowcount = ddbc_bindings.DDBCSQLRowCount(self.hstmt)
+ self.last_executed_stmt = operation
+ self._initialize_description()
+
+ if self.description:
+ self.rowcount = -1
+ self._reset_rownumber()
+ else:
+ self.rowcount = ddbc_bindings.DDBCSQLRowCount(self.hstmt)
+ self._clear_rownumber()
+ finally:
+ # Reset input sizes after execution
+ self._reset_inputsizes()
def fetchone(self) -> Union[None, Row]:
"""
Fetch the next row of a query result set.
-
+
Returns:
Single Row object or None if no more data is available.
"""
self._check_closed() # Check if the cursor is closed
+ char_decoding = self._get_decoding_settings(ddbc_sql_const.SQL_CHAR.value)
+ wchar_decoding = self._get_decoding_settings(ddbc_sql_const.SQL_WCHAR.value)
+
# Fetch raw data
row_data = []
- ret = ddbc_bindings.DDBCSQLFetchOne(self.hstmt, row_data)
-
- if ret == ddbc_sql_const.SQL_NO_DATA.value:
- return None
-
- # Create and return a Row object
- return Row(row_data, self.description)
+ try:
+ ret = ddbc_bindings.DDBCSQLFetchOne(
+ self.hstmt,
+ row_data,
+ char_decoding.get("encoding", "utf-8"),
+ wchar_decoding.get("encoding", "utf-16le"),
+ )
- def fetchmany(self, size: int = None) -> List[Row]:
+ if self.hstmt:
+ self.messages.extend(ddbc_bindings.DDBCSQLGetAllDiagRecords(self.hstmt))
+
+ if ret == ddbc_sql_const.SQL_NO_DATA.value:
+ # No more data available
+ if self._next_row_index == 0 and self.description is not None:
+ # This is an empty result set, set rowcount to 0
+ self.rowcount = 0
+ return None
+
+ # Update internal position after successful fetch
+ if self._skip_increment_for_next_fetch:
+ self._skip_increment_for_next_fetch = False
+ self._next_row_index += 1
+ else:
+ self._increment_rownumber()
+
+ self.rowcount = self._next_row_index
+
+ # Get column and converter maps
+ column_map, converter_map = self._get_column_and_converter_maps()
+ return Row(row_data, column_map, cursor=self, converter_map=converter_map)
+ except Exception as e:
+ # On error, don't increment rownumber - rethrow the error
+ raise e
+
+ def fetchmany(self, size: Optional[int] = None) -> List[Row]:
"""
Fetch the next set of rows of a query result.
-
+
Args:
size: Number of rows to fetch at a time.
-
+
Returns:
List of Row objects.
"""
self._check_closed() # Check if the cursor is closed
+ if not self._has_result_set and self.description:
+ self._reset_rownumber()
if size is None:
size = self.arraysize
if size <= 0:
return []
-
+
+ char_decoding = self._get_decoding_settings(ddbc_sql_const.SQL_CHAR.value)
+ wchar_decoding = self._get_decoding_settings(ddbc_sql_const.SQL_WCHAR.value)
+
# Fetch raw data
rows_data = []
- ret = ddbc_bindings.DDBCSQLFetchMany(self.hstmt, rows_data, size)
-
- # Convert raw data to Row objects
- return [Row(row_data, self.description) for row_data in rows_data]
+ try:
+ ret = ddbc_bindings.DDBCSQLFetchMany(
+ self.hstmt,
+ rows_data,
+ size,
+ char_decoding.get("encoding", "utf-8"),
+ wchar_decoding.get("encoding", "utf-16le"),
+ )
+
+ if self.hstmt:
+ self.messages.extend(ddbc_bindings.DDBCSQLGetAllDiagRecords(self.hstmt))
+
+ # Update rownumber for the number of rows actually fetched
+ if rows_data and self._has_result_set:
+ # advance counters by number of rows actually returned
+ self._next_row_index += len(rows_data)
+ self._rownumber = self._next_row_index - 1
+
+ # Centralize rowcount assignment after fetch
+ if len(rows_data) == 0 and self._next_row_index == 0:
+ self.rowcount = 0
+ else:
+ self.rowcount = self._next_row_index
+
+ # Get column and converter maps
+ column_map, converter_map = self._get_column_and_converter_maps()
+
+ # Convert raw data to Row objects
+ return [
+ Row(row_data, column_map, cursor=self, converter_map=converter_map)
+ for row_data in rows_data
+ ]
+ except Exception as e:
+ # On error, don't increment rownumber - rethrow the error
+ raise e
def fetchall(self) -> List[Row]:
"""
Fetch all (remaining) rows of a query result.
-
+
Returns:
List of Row objects.
"""
self._check_closed() # Check if the cursor is closed
+ if not self._has_result_set and self.description:
+ self._reset_rownumber()
+
+ char_decoding = self._get_decoding_settings(ddbc_sql_const.SQL_CHAR.value)
+ wchar_decoding = self._get_decoding_settings(ddbc_sql_const.SQL_WCHAR.value)
# Fetch raw data
rows_data = []
- ret = ddbc_bindings.DDBCSQLFetchAll(self.hstmt, rows_data)
-
- # Convert raw data to Row objects
- return [Row(row_data, self.description) for row_data in rows_data]
+ try:
+ ret = ddbc_bindings.DDBCSQLFetchAll(
+ self.hstmt,
+ rows_data,
+ char_decoding.get("encoding", "utf-8"),
+ wchar_decoding.get("encoding", "utf-16le"),
+ )
+
+ # Check for errors
+ check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, self.hstmt, ret)
+
+ if self.hstmt:
+ self.messages.extend(ddbc_bindings.DDBCSQLGetAllDiagRecords(self.hstmt))
+
+ # Update rownumber for the number of rows actually fetched
+ if rows_data and self._has_result_set:
+ self._next_row_index += len(rows_data)
+ self._rownumber = self._next_row_index - 1
+
+ # Centralize rowcount assignment after fetch
+ if len(rows_data) == 0 and self._next_row_index == 0:
+ self.rowcount = 0
+ else:
+ self.rowcount = self._next_row_index
+
+ # Get column and converter maps
+ column_map, converter_map = self._get_column_and_converter_maps()
+
+ # Convert raw data to Row objects
+ return [
+ Row(row_data, column_map, cursor=self, converter_map=converter_map)
+ for row_data in rows_data
+ ]
+ except Exception as e:
+ # On error, don't increment rownumber - rethrow the error
+ raise e
def nextset(self) -> Union[bool, None]:
"""
@@ -780,24 +2407,605 @@ def nextset(self) -> Union[bool, None]:
Raises:
Error: If the previous call to execute did not produce any result set.
"""
+ logger.debug("nextset: Moving to next result set")
self._check_closed() # Check if the cursor is closed
+ # Clear messages per DBAPI
+ self.messages = []
+
+ # Clear cached column and converter maps for the new result set
+ self._cached_column_map = None
+ self._cached_converter_map = None
+
# Skip to the next result set
ret = ddbc_bindings.DDBCSQLMoreResults(self.hstmt)
check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, self.hstmt, ret)
+
if ret == ddbc_sql_const.SQL_NO_DATA.value:
+ logger.debug("nextset: No more result sets available")
+ self._clear_rownumber()
+ self.description = None
return False
+
+ self._reset_rownumber()
+
+ # Initialize description for the new result set
+ column_metadata = []
+ try:
+ ddbc_bindings.DDBCSQLDescribeCol(self.hstmt, column_metadata)
+ self._initialize_description(column_metadata)
+
+ # Pre-build column map and converter map for the new result set
+ if self.description:
+ self._cached_column_map = {
+ col_desc[0]: i for i, col_desc in enumerate(self.description)
+ }
+ self._cached_converter_map = self._build_converter_map()
+ except Exception as e: # pylint: disable=broad-exception-caught
+ # If describe fails, there might be no results in this result set
+ self.description = None
+
+ logger.debug(
+ "nextset: Moved to next result set - column_count=%d",
+ len(self.description) if self.description else 0,
+ )
return True
+ def _bulkcopy(
+ self, table_name: str, data: Iterable[Union[Tuple, List]], **kwargs
+ ): # pragma: no cover
+ """
+ Perform bulk copy operation for high-performance data loading.
+
+ Args:
+ table_name: Target table name (can include schema, e.g., 'dbo.MyTable').
+ The table must exist and the user must have INSERT permissions.
+
+ data: Iterable of tuples or lists containing row data to be inserted.
+
+ Data Format Requirements:
+ - Each element in the iterable represents one row
+ - Each row should be a tuple or list of column values
+ - Column order must match the target table's column order (by ordinal
+ position), unless column_mappings is specified
+ - The number of values in each row must match the number of columns
+ in the target table
+
+ **kwargs: Additional bulk copy options.
+
+ column_mappings (List[Tuple[int, str]], optional):
+ Maps source data column indices to target table column names.
+ Each tuple is (source_index, target_column_name) where:
+ - source_index: 0-based index of the column in the source data
+ - target_column_name: Name of the target column in the database table
+
+ When omitted: Columns are mapped by ordinal position (first data
+ column → first table column, second → second, etc.)
+
+ When specified: Only the mapped columns are inserted; unmapped
+ source columns are ignored, and unmapped target columns must
+ have default values or allow NULL.
+
+ Returns:
+ Dictionary with bulk copy results including:
+ - rows_copied: Number of rows successfully copied
+ - batch_count: Number of batches processed
+ - elapsed_time: Time taken for the operation
+
+ Raises:
+ ImportError: If mssql_py_core library is not installed
+ TypeError: If data is None, not iterable, or is a string/bytes
+ ValueError: If table_name is empty or parameters are invalid
+ RuntimeError: If connection string is not available
+ """
+ try:
+ import mssql_py_core
+ except ImportError as exc:
+ raise ImportError(
+ "Bulk copy requires the mssql_py_core library which is not installed. "
+ "To install, run: pip install mssql_py_core "
+ ) from exc
+
+ # Validate inputs
+ if not table_name or not isinstance(table_name, str):
+ raise ValueError("table_name must be a non-empty string")
+
+ # Validate that data is iterable (but not a string or bytes, which are technically iterable)
+ if data is None:
+ raise TypeError("data must be an iterable of tuples or lists, got None")
+ if isinstance(data, (str, bytes)):
+ raise TypeError(
+ f"data must be an iterable of tuples or lists, got {type(data).__name__}. "
+ "Strings and bytes are not valid row collections."
+ )
+ if not hasattr(data, "__iter__"):
+ raise TypeError(
+ f"data must be an iterable of tuples or lists, got non-iterable {type(data).__name__}"
+ )
+
+ # Extract and validate kwargs with defaults
+ batch_size = kwargs.get("batch_size", None)
+ timeout = kwargs.get("timeout", 30)
+
+ # Validate batch_size type and value (only if explicitly provided)
+ if batch_size is not None:
+ if not isinstance(batch_size, (int, float)):
+ raise TypeError(
+ f"batch_size must be a positive integer, got {type(batch_size).__name__}"
+ )
+ if batch_size <= 0:
+ raise ValueError(f"batch_size must be positive, got {batch_size}")
+
+ # Validate timeout type and value
+ if not isinstance(timeout, (int, float)):
+ raise TypeError(f"timeout must be a positive number, got {type(timeout).__name__}")
+ if timeout <= 0:
+ raise ValueError(f"timeout must be positive, got {timeout}")
+
+ # Get and parse connection string
+ if not hasattr(self.connection, "connection_str"):
+ raise RuntimeError("Connection string not available for bulk copy")
+
+ # Use the proper connection string parser that handles braced values
+ from mssql_python.connection_string_parser import _ConnectionStringParser
+
+ parser = _ConnectionStringParser(validate_keywords=False)
+ params = parser._parse(self.connection.connection_str)
+
+ if not params.get("server"):
+ raise ValueError("SERVER parameter is required in connection string")
+
+ if not params.get("database"):
+ raise ValueError(
+ "DATABASE parameter is required in connection string for bulk copy. "
+ "Specify the target database explicitly to avoid accidentally writing to system databases."
+ )
+
+ # Build connection context for bulk copy library
+ # Note: Password is extracted separately to avoid storing it in the main context
+ # dict that could be accidentally logged or exposed in error messages.
+ trust_cert = params.get("trustservercertificate", "yes").lower() in ("yes", "true")
+
+ # Parse encryption setting from connection string
+ encrypt_param = params.get("encrypt")
+ if encrypt_param is not None:
+ encrypt_value = encrypt_param.strip().lower()
+ if encrypt_value in ("yes", "true", "mandatory", "required"):
+ encryption = "Required"
+ elif encrypt_value in ("no", "false", "optional"):
+ encryption = "Optional"
+ else:
+ # Pass through unrecognized values (e.g., "Strict") to the underlying driver
+ encryption = encrypt_param
+ else:
+ encryption = "Optional"
+
+ context = {
+ "server": params.get("server"),
+ "database": params.get("database"),
+ "user_name": params.get("uid", ""),
+ "trust_server_certificate": trust_cert,
+ "encryption": encryption,
+ }
+
+ # Extract password separately to avoid storing it in generic context that may be logged
+ password = params.get("pwd", "")
+ pycore_context = dict(context)
+ pycore_context["password"] = password
+
+ pycore_connection = None
+ pycore_cursor = None
+ try:
+ pycore_connection = mssql_py_core.PyCoreConnection(pycore_context)
+ pycore_cursor = pycore_connection.cursor()
+
+ result = pycore_cursor.bulkcopy(table_name, iter(data), **kwargs)
+
+ return result
+
+ except Exception as e:
+ # Log the error for debugging (without exposing credentials)
+ logger.debug(
+ "Bulk copy operation failed for table '%s': %s: %s",
+ table_name,
+ type(e).__name__,
+ str(e),
+ )
+ # Re-raise without exposing connection context in the error chain
+ # to prevent credential leakage in stack traces
+ raise type(e)(str(e)) from None
+
+ finally:
+ # Clear sensitive data to minimize memory exposure
+ password = ""
+ if pycore_context:
+ pycore_context["password"] = ""
+ pycore_context["user_name"] = ""
+ # Clean up bulk copy resources
+ for resource in (pycore_cursor, pycore_connection):
+ if resource and hasattr(resource, "close"):
+ try:
+ resource.close()
+ except Exception as cleanup_error:
+ # Log cleanup errors at debug level to aid troubleshooting
+ # without masking the original exception
+ logger.debug(
+ "Failed to close bulk copy resource %s: %s",
+ type(resource).__name__,
+ cleanup_error,
+ )
+
+ def __enter__(self):
+ """
+ Enter the runtime context for the cursor.
+
+ Returns:
+ The cursor instance itself.
+ """
+ self._check_closed()
+ return self
+
+ def __exit__(self, *args):
+ """Closes the cursor when exiting the context, ensuring proper resource cleanup."""
+ if not self.closed:
+ self.close()
+
+ def fetchval(self):
+ """
+ Fetch the first column of the first row if there are results.
+
+ This is a convenience method for queries that return a single value,
+ such as SELECT COUNT(*) FROM table, SELECT MAX(id) FROM table, etc.
+
+ Returns:
+ The value of the first column of the first row, or None if no rows
+ are available or the first column value is NULL.
+
+ Raises:
+ Exception: If the cursor is closed.
+
+ Example:
+ >>> count = cursor.execute('SELECT COUNT(*) FROM users').fetchval()
+ >>> max_id = cursor.execute('SELECT MAX(id) FROM users').fetchval()
+ >>> name = cursor.execute('SELECT name FROM users WHERE id = ?', user_id).fetchval()
+
+ Note:
+ This is a convenience extension beyond the DB-API 2.0 specification.
+ After calling fetchval(), the cursor position advances by one row,
+ just like fetchone().
+ """
+ logger.debug("fetchval: Fetching single value from first column")
+ self._check_closed() # Check if the cursor is closed
+
+ # Check if this is a result-producing statement
+ if not self.description:
+ # Non-result-set statement (INSERT, UPDATE, DELETE, etc.)
+ logger.debug("fetchval: No result set available (non-SELECT statement)")
+ return None
+
+ # Fetch the first row
+ row = self.fetchone()
+
+ if row is None:
+ logger.debug("fetchval: No value available (no rows)")
+ return None
+
+ logger.debug("fetchval: Value retrieved successfully")
+ return row[0]
+
+ def commit(self):
+ """
+ Commit all SQL statements executed on the connection that created this cursor.
+
+ This is a convenience method that calls commit() on the underlying connection.
+ It affects all cursors created by the same connection since the last commit/rollback.
+
+ The benefit is that many uses can now just use the cursor and not have to track
+ the connection object.
+
+ Raises:
+ Exception: If the cursor is closed or if the commit operation fails.
+
+ Example:
+ >>> cursor.execute("INSERT INTO users (name) VALUES (?)", "John")
+ >>> cursor.commit() # Commits the INSERT
+
+ Note:
+ This is equivalent to calling connection.commit() but provides convenience
+ for code that only has access to the cursor object.
+ """
+ self._check_closed() # Check if the cursor is closed
+
+ # Clear messages per DBAPI
+ self.messages = []
+
+ # Delegate to the connection's commit method
+ self._connection.commit()
+
+ def rollback(self):
+ """
+ Roll back all SQL statements executed on the connection that created this cursor.
+
+ This is a convenience method that calls rollback() on the underlying connection.
+ It affects all cursors created by the same connection since the last commit/rollback.
+
+ The benefit is that many uses can now just use the cursor and not have to track
+ the connection object.
+
+ Raises:
+ Exception: If the cursor is closed or if the rollback operation fails.
+
+ Example:
+ >>> cursor.execute("INSERT INTO users (name) VALUES (?)", "John")
+ >>> cursor.rollback() # Rolls back the INSERT
+
+ Note:
+ This is equivalent to calling connection.rollback() but provides convenience
+ for code that only has access to the cursor object.
+ """
+ self._check_closed() # Check if the cursor is closed
+
+ # Clear messages per DBAPI
+ self.messages = []
+
+ # Delegate to the connection's rollback method
+ self._connection.rollback()
+
def __del__(self):
"""
Destructor to ensure the cursor is closed when it is no longer needed.
This is a safety net to ensure resources are cleaned up
even if close() was not called explicitly.
+ If the cursor is already closed, it will not raise an exception during cleanup.
"""
- if "_closed" not in self.__dict__ or not self._closed:
+ if "closed" not in self.__dict__ or not self.closed:
try:
self.close()
- except Exception as e:
+ except Exception as e: # pylint: disable=broad-exception-caught
# Don't raise an exception in __del__, just log it
- log('error', "Error during cursor cleanup in __del__: %s", e)
\ No newline at end of file
+ # If interpreter is shutting down, we might not have logging set up
+ import sys
+
+ if sys and sys._is_finalizing():
+ # Suppress logging during interpreter shutdown
+ return
+ logger.debug("Exception during cursor cleanup in __del__: %s", e)
+
+ def scroll(
+ self, value: int, mode: str = "relative"
+ ) -> None: # pylint: disable=too-many-branches
+ """
+ Scroll using SQLFetchScroll only, matching test semantics:
+ - relative(N>0): consume N rows; rownumber = previous + N;
+ next fetch returns the following row.
+ - absolute(-1): before first (rownumber = -1), no data consumed.
+ - absolute(0): position so next fetch returns first row;
+ rownumber stays 0 even after that fetch.
+ - absolute(k>0): next fetch returns row index k (0-based);
+ rownumber == k after scroll.
+ """
+ logger.debug(
+ "scroll: Scrolling cursor - mode=%s, value=%d, current_rownumber=%d",
+ mode,
+ value,
+ self._rownumber,
+ )
+ self._check_closed()
+
+ # Clear messages per DBAPI
+ self.messages = []
+
+ if mode not in ("relative", "absolute"):
+ logger.error("scroll: Invalid mode - mode=%s", mode)
+ raise ProgrammingError(
+ "Invalid scroll mode",
+ f"mode must be 'relative' or 'absolute', got '{mode}'",
+ )
+ if not self._has_result_set:
+ logger.error("scroll: No active result set")
+ raise ProgrammingError(
+ "No active result set",
+ "Cannot scroll: no result set available. Execute a query first.",
+ )
+ if not isinstance(value, int):
+ logger.error("scroll: Invalid value type - type=%s", type(value).__name__)
+ raise ProgrammingError(
+ "Invalid scroll value type",
+ f"scroll value must be an integer, got {type(value).__name__}",
+ )
+
+ # Relative backward not supported
+ if mode == "relative" and value < 0:
+ logger.error("scroll: Backward scrolling not supported - value=%d", value)
+ raise NotSupportedError(
+ "Backward scrolling not supported",
+ f"Cannot move backward by {value} rows on a forward-only cursor",
+ )
+
+ row_data: list = []
+
+ # Absolute positioning not supported with forward-only cursors
+ if mode == "absolute":
+ raise NotSupportedError(
+ "Absolute positioning not supported",
+ "Forward-only cursors do not support absolute positioning",
+ )
+
+ try:
+ if mode == "relative":
+ if value == 0:
+ return
+
+ # For forward-only cursors, use multiple SQL_FETCH_NEXT calls
+ # This matches pyodbc's approach for skip operations
+ for i in range(value):
+ ret = ddbc_bindings.DDBCSQLFetchScroll(
+ self.hstmt, ddbc_sql_const.SQL_FETCH_NEXT.value, 0, row_data
+ )
+ if ret == ddbc_sql_const.SQL_NO_DATA.value:
+ raise IndexError(
+ "Cannot scroll to specified position: end of result set reached"
+ )
+
+ # Update position tracking
+ self._rownumber = self._rownumber + value
+ self._next_row_index = self._rownumber + 1
+ logger.debug(
+ "scroll: Scroll complete - new_rownumber=%d, next_row_index=%d",
+ self._rownumber,
+ self._next_row_index,
+ )
+ return
+
+ except Exception as e: # pylint: disable=broad-exception-caught
+ if isinstance(e, (IndexError, NotSupportedError)):
+ raise
+ raise IndexError(f"Scroll operation failed: {e}") from e
+
+ def skip(self, count: int) -> None:
+ """
+ Skip the next count records in the query result set.
+
+ Args:
+ count: Number of records to skip.
+
+ Raises:
+ IndexError: If attempting to skip past the end of the result set.
+ ProgrammingError: If count is not an integer.
+ NotSupportedError: If attempting to skip backwards.
+ """
+ self._check_closed()
+
+ # Clear messages
+ self.messages = []
+
+ # Simply delegate to the scroll method with 'relative' mode
+ self.scroll(count, "relative")
+
+ def _execute_tables( # pylint: disable=too-many-arguments,too-many-positional-arguments
+ self,
+ stmt_handle,
+ catalog_name=None,
+ schema_name=None,
+ table_name=None,
+ table_type=None,
+ ):
+ """
+ Execute SQLTables ODBC function to retrieve table metadata.
+
+ Args:
+ stmt_handle: ODBC statement handle
+ catalog_name: The catalog name pattern
+ schema_name: The schema name pattern
+ table_name: The table name pattern
+ table_type: The table type filter
+ search_escape: The escape character for pattern matching
+ """
+ # Convert None values to empty strings for ODBC
+ catalog = "" if catalog_name is None else catalog_name
+ schema = "" if schema_name is None else schema_name
+ table = "" if table_name is None else table_name
+ types = "" if table_type is None else table_type
+
+ # Call the ODBC SQLTables function
+ retcode = ddbc_bindings.DDBCSQLTables(stmt_handle, catalog, schema, table, types)
+
+ # Check return code and handle errors
+ check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, stmt_handle, retcode)
+
+ # Capture any diagnostic messages
+ if stmt_handle:
+ self.messages.extend(ddbc_bindings.DDBCSQLGetAllDiagRecords(stmt_handle))
+
+ def tables(
+ self, table=None, catalog=None, schema=None, tableType=None
+ ): # pylint: disable=too-many-arguments,too-many-positional-arguments
+ """
+ Returns information about tables in the database that match the given criteria using
+ the SQLTables ODBC function.
+
+ Args:
+ table (str, optional): The table name pattern. Default is None (all tables).
+ catalog (str, optional): The catalog name. Default is None.
+ schema (str, optional): The schema name pattern. Default is None.
+ tableType (str or list, optional): The table type filter. Default is None.
+ Example: "TABLE" or ["TABLE", "VIEW"]
+
+ Returns:
+ Cursor: The cursor object itself for method chaining with fetch methods.
+ """
+ self._check_closed()
+ self._reset_cursor()
+
+ # Format table_type parameter - SQLTables expects comma-separated string
+ table_type_str = None
+ if tableType is not None:
+ if isinstance(tableType, (list, tuple)):
+ table_type_str = ",".join(tableType)
+ else:
+ table_type_str = str(tableType)
+
+ try:
+ # Call SQLTables via the helper method
+ self._execute_tables(
+ self.hstmt,
+ catalog_name=catalog,
+ schema_name=schema,
+ table_name=table,
+ table_type=table_type_str,
+ )
+
+ # Define fallback description for tables
+ fallback_description = [
+ ("table_cat", str, None, 128, 128, 0, True),
+ ("table_schem", str, None, 128, 128, 0, True),
+ ("table_name", str, None, 128, 128, 0, False),
+ ("table_type", str, None, 128, 128, 0, False),
+ ("remarks", str, None, 254, 254, 0, True),
+ ]
+
+ # Use the helper method to prepare the result set
+ return self._prepare_metadata_result_set(fallback_description=fallback_description)
+
+ except Exception as e: # pylint: disable=broad-exception-caught
+ # Log the error and re-raise
+ logger.error(f"Error executing tables query: {e}")
+ raise
+
+ def callproc(
+ self, procname: str, parameters: Optional[Sequence[Any]] = None
+ ) -> Optional[Sequence[Any]]:
+ """
+ Call a stored database procedure with the given name.
+
+ Args:
+ procname: Name of the stored procedure to call
+ parameters: Optional sequence of parameters to pass to the procedure
+
+ Returns:
+ A sequence containing the result parameters (input parameters unchanged,
+ output parameters with their new values)
+
+ Raises:
+ NotSupportedError: This method is not yet implemented
+ """
+ raise NotSupportedError(
+ driver_error="callproc() is not yet implemented",
+ ddbc_error="Stored procedure calls are not currently supported",
+ )
+
+ def setoutputsize(self, size: int, column: Optional[int] = None) -> None:
+ """
+ Set a column buffer size for fetches of large columns.
+
+ This method is optional and is not implemented in this driver.
+
+ Args:
+ size: Maximum size of the column buffer
+ column: Optional column index (0-based) to set the size for
+
+ Note:
+ This method is a no-op in this implementation as buffer sizes
+ are managed automatically by the underlying driver.
+ """
+ # This is a no-op - buffer sizes are managed automatically
diff --git a/mssql_python/db_connection.py b/mssql_python/db_connection.py
index 9c688ac61..a6b8c614e 100644
--- a/mssql_python/db_connection.py
+++ b/mssql_python/db_connection.py
@@ -3,9 +3,19 @@
Licensed under the MIT license.
This module provides a way to create a new connection object to interact with the database.
"""
+
+from typing import Any, Dict, Optional, Union
+
from mssql_python.connection import Connection
-def connect(connection_str: str = "", autocommit: bool = True, attrs_before: dict = None, **kwargs) -> Connection:
+
+def connect(
+ connection_str: str = "",
+ autocommit: bool = False,
+ attrs_before: Optional[Dict[int, Union[int, str, bytes]]] = None,
+ timeout: int = 0,
+ **kwargs: Any,
+) -> Connection:
"""
Constructor for creating a connection to the database.
@@ -33,5 +43,7 @@ def connect(connection_str: str = "", autocommit: bool = True, attrs_before: dic
be used to perform database operations such as executing queries, committing
transactions, and closing the connection.
"""
- conn = Connection(connection_str, autocommit=autocommit, attrs_before=attrs_before, **kwargs)
+ conn = Connection(
+ connection_str, autocommit=autocommit, attrs_before=attrs_before, timeout=timeout, **kwargs
+ )
return conn
diff --git a/mssql_python/ddbc_bindings.py b/mssql_python/ddbc_bindings.py
index 1d4d32cb3..f8fef87d1 100644
--- a/mssql_python/ddbc_bindings.py
+++ b/mssql_python/ddbc_bindings.py
@@ -1,55 +1,76 @@
+"""
+Dynamic loading of platform-specific DDBC bindings for mssql-python.
+
+This module handles the runtime loading of the appropriate compiled extension
+module based on the current platform, architecture, and Python version.
+"""
+
import os
import importlib.util
import sys
import platform
-def normalize_architecture(platform_name, architecture):
+
+def normalize_architecture(platform_name_param, architecture_param):
"""
Normalize architecture names for the given platform.
-
+
Args:
- platform_name (str): Platform name ('windows', 'darwin', 'linux')
- architecture (str): Architecture string to normalize
-
+ platform_name_param (str): Platform name ('windows', 'darwin', 'linux')
+ architecture_param (str): Architecture string to normalize
+
Returns:
str: Normalized architecture name
-
+
Raises:
ImportError: If architecture is not supported for the given platform
OSError: If platform is not supported
"""
- arch_lower = architecture.lower()
-
- if platform_name == "windows":
+ arch_lower = architecture_param.lower()
+
+ if platform_name_param == "windows":
arch_map = {
- "win64": "x64", "amd64": "x64", "x64": "x64",
- "win32": "x86", "x86": "x86",
- "arm64": "arm64"
+ "win64": "x64",
+ "amd64": "x64",
+ "x64": "x64",
+ "win32": "x86",
+ "x86": "x86",
+ "arm64": "arm64",
}
if arch_lower in arch_map:
return arch_map[arch_lower]
- else:
- supported = list(set(arch_map.keys()))
- raise ImportError(f"Unsupported architecture '{architecture}' for platform '{platform_name}'; expected one of {supported}")
-
- elif platform_name == "darwin":
+ supported = list(set(arch_map.keys()))
+ raise ImportError(
+ f"Unsupported architecture '{architecture_param}' for platform "
+ f"'{platform_name_param}'; expected one of {supported}"
+ )
+
+ if platform_name_param == "darwin":
# For macOS, return runtime architecture
return platform.machine().lower()
-
- elif platform_name == "linux":
+
+ if platform_name_param == "linux":
arch_map = {
- "x64": "x86_64", "amd64": "x86_64", "x86_64": "x86_64",
- "arm64": "arm64", "aarch64": "arm64"
+ "x64": "x86_64",
+ "amd64": "x86_64",
+ "x86_64": "x86_64",
+ "arm64": "arm64",
+ "aarch64": "arm64",
}
if arch_lower in arch_map:
return arch_map[arch_lower]
- else:
- supported = list(set(arch_map.keys()))
- raise ImportError(f"Unsupported architecture '{architecture}' for platform '{platform_name}'; expected one of {supported}")
-
- else:
- supported_platforms = ["windows", "darwin", "linux"]
- raise OSError(f"Unsupported platform '{platform_name}'; expected one of {supported_platforms}")
+ supported = list(set(arch_map.keys()))
+ raise ImportError(
+ f"Unsupported architecture '{architecture_param}' for platform "
+ f"'{platform_name_param}'; expected one of {supported}"
+ )
+
+ supported_platforms_list = ["windows", "darwin", "linux"]
+ raise OSError(
+ f"Unsupported platform '{platform_name_param}'; expected one of "
+ f"{supported_platforms_list}"
+ )
+
# Get current Python version and architecture
python_version = f"cp{sys.version_info.major}{sys.version_info.minor}"
@@ -58,25 +79,28 @@ def normalize_architecture(platform_name, architecture):
raw_architecture = platform.machine().lower()
# Special handling for macOS universal2 binaries
-if platform_name == 'darwin':
+if platform_name == "darwin":
architecture = "universal2"
else:
architecture = normalize_architecture(platform_name, raw_architecture)
-
+
# Handle Windows-specific naming for binary files
- if platform_name == 'windows' and architecture == 'x64':
+ if platform_name == "windows" and architecture == "x64":
architecture = "amd64"
# Validate supported platforms
-if platform_name not in ['windows', 'darwin', 'linux']:
- supported_platforms = ['windows', 'darwin', 'linux']
- raise ImportError(f"Unsupported platform '{platform_name}' for mssql-python; expected one of {supported_platforms}")
+if platform_name not in ["windows", "darwin", "linux"]:
+ supported_platforms = ["windows", "darwin", "linux"]
+ raise ImportError(
+ f"Unsupported platform '{platform_name}' for mssql-python; expected one "
+ f"of {supported_platforms}"
+ )
# Determine extension based on platform
-if platform_name == 'windows':
- extension = '.pyd'
+if platform_name == "windows":
+ extension = ".pyd"
else: # macOS or Linux
- extension = '.so'
+ extension = ".so"
# Find the specifically matching module file
module_dir = os.path.dirname(__file__)
@@ -85,20 +109,28 @@ def normalize_architecture(platform_name, architecture):
if not os.path.exists(module_path):
# Fallback to searching for any matching module if the specific one isn't found
- module_files = [f for f in os.listdir(module_dir) if f.startswith('ddbc_bindings.') and f.endswith(extension)]
+ module_files = [
+ f
+ for f in os.listdir(module_dir)
+ if f.startswith("ddbc_bindings.") and f.endswith(extension)
+ ]
if not module_files:
- raise ImportError(f"No ddbc_bindings module found for {python_version}-{architecture} with extension {extension}")
+ raise ImportError(
+ f"No ddbc_bindings module found for {python_version}-{architecture} "
+ f"with extension {extension}"
+ )
module_path = os.path.join(module_dir, module_files[0])
- print(f"Warning: Using fallback module file {module_files[0]} instead of {expected_module}")
+ print(f"Warning: Using fallback module file {module_files[0]} instead of " f"{expected_module}")
+
# Use the original module name 'ddbc_bindings' that the C extension was compiled with
-name = "ddbc_bindings"
-spec = importlib.util.spec_from_file_location(name, module_path)
+module_name = "ddbc_bindings"
+spec = importlib.util.spec_from_file_location(module_name, module_path)
module = importlib.util.module_from_spec(spec)
-sys.modules[name] = module
+sys.modules[module_name] = module
spec.loader.exec_module(module)
# Copy all attributes from the loaded module to this module
for attr in dir(module):
- if not attr.startswith('__'):
- globals()[attr] = getattr(module, attr)
\ No newline at end of file
+ if not attr.startswith("__"):
+ globals()[attr] = getattr(module, attr)
diff --git a/mssql_python/exceptions.py b/mssql_python/exceptions.py
index 308a85690..f2285bce5 100644
--- a/mssql_python/exceptions.py
+++ b/mssql_python/exceptions.py
@@ -4,22 +4,47 @@
This module contains custom exception classes for the mssql_python package.
These classes are used to raise exceptions when an error occurs while executing a query.
"""
-from mssql_python.logging_config import get_logger
-logger = get_logger()
+from typing import Optional
+from mssql_python.logging import logger
+import builtins
-class Exception(Exception):
+class ConnectionStringParseError(builtins.Exception):
+ """
+ Exception raised when connection string parsing fails.
+
+ This exception is raised when the connection string parser encounters
+ syntax errors, unknown keywords, duplicate keywords, or other validation
+ failures. It collects all errors and reports them together.
+ """
+
+ def __init__(self, errors: list) -> None:
+ """
+ Initialize the error with a list of validation errors.
+
+ Args:
+ errors: List of error messages describing what went wrong
+ """
+ self.errors = errors
+ message = "Connection string parsing failed:\n " + "\n ".join(errors)
+ super().__init__(message)
+
+
+class Exception(builtins.Exception):
"""
Base class for all DB API 2.0 exceptions.
"""
- def __init__(self, driver_error, ddbc_error) -> None:
+ def __init__(self, driver_error: str, ddbc_error: str) -> None:
self.driver_error = driver_error
self.ddbc_error = truncate_error_message(ddbc_error)
- self.message = (
- f"Driver Error: {self.driver_error}; DDBC Error: {self.ddbc_error}"
- )
+ if self.ddbc_error:
+ # Both driver and DDBC errors are present
+ self.message = f"Driver Error: {self.driver_error}; DDBC Error: {self.ddbc_error}"
+ else:
+ # Errors raised by the driver itself should not have a DDBC error message
+ self.message = f"Driver Error: {self.driver_error}"
super().__init__(self.message)
@@ -28,7 +53,7 @@ class Warning(Exception):
Exception raised for important warnings like data truncations while inserting, etc.
"""
- def __init__(self, driver_error, ddbc_error) -> None:
+ def __init__(self, driver_error: str, ddbc_error: str) -> None:
super().__init__(driver_error, ddbc_error)
@@ -37,7 +62,7 @@ class Error(Exception):
Base class for errors.
"""
- def __init__(self, driver_error, ddbc_error) -> None:
+ def __init__(self, driver_error: str, ddbc_error: str) -> None:
super().__init__(driver_error, ddbc_error)
@@ -47,7 +72,7 @@ class InterfaceError(Error):
interface rather than the database itself.
"""
- def __init__(self, driver_error, ddbc_error) -> None:
+ def __init__(self, driver_error: str, ddbc_error: str) -> None:
super().__init__(driver_error, ddbc_error)
@@ -56,7 +81,7 @@ class DatabaseError(Error):
Exception raised for errors that are related to the database.
"""
- def __init__(self, driver_error, ddbc_error) -> None:
+ def __init__(self, driver_error: str, ddbc_error: str) -> None:
super().__init__(driver_error, ddbc_error)
@@ -66,7 +91,7 @@ class DataError(DatabaseError):
processed data like division by zero, numeric value out of range, etc.
"""
- def __init__(self, driver_error, ddbc_error) -> None:
+ def __init__(self, driver_error: str, ddbc_error: str) -> None:
super().__init__(driver_error, ddbc_error)
@@ -76,7 +101,7 @@ class OperationalError(DatabaseError):
and not necessarily under the control of the programmer.
"""
- def __init__(self, driver_error, ddbc_error) -> None:
+ def __init__(self, driver_error: str, ddbc_error: str) -> None:
super().__init__(driver_error, ddbc_error)
@@ -86,7 +111,7 @@ class IntegrityError(DatabaseError):
e.g., a foreign key check fails.
"""
- def __init__(self, driver_error, ddbc_error) -> None:
+ def __init__(self, driver_error: str, ddbc_error: str) -> None:
super().__init__(driver_error, ddbc_error)
@@ -96,7 +121,7 @@ class InternalError(DatabaseError):
e.g., the cursor is not valid anymore, the transaction is out of sync, etc.
"""
- def __init__(self, driver_error, ddbc_error) -> None:
+ def __init__(self, driver_error: str, ddbc_error: str) -> None:
super().__init__(driver_error, ddbc_error)
@@ -107,7 +132,7 @@ class ProgrammingError(DatabaseError):
wrong number of parameters specified, etc.
"""
- def __init__(self, driver_error, ddbc_error) -> None:
+ def __init__(self, driver_error: str, ddbc_error: str) -> None:
super().__init__(driver_error, ddbc_error)
@@ -118,12 +143,12 @@ class NotSupportedError(DatabaseError):
on a connection that does not support transaction or has transactions turned off.
"""
- def __init__(self, driver_error, ddbc_error) -> None:
+ def __init__(self, driver_error: str, ddbc_error: str) -> None:
super().__init__(driver_error, ddbc_error)
# Mapping SQLSTATE codes to custom exception classes
-def sqlstate_to_exception(sqlstate: str, ddbc_error: str) -> Exception:
+def sqlstate_to_exception(sqlstate: str, ddbc_error: str) -> Optional[Exception]:
"""
Map an SQLSTATE code to a custom exception class.
This function maps an SQLSTATE code to a custom exception class based on the code.
@@ -135,69 +160,50 @@ def sqlstate_to_exception(sqlstate: str, ddbc_error: str) -> Exception:
mapping[str, Exception]: A mapping of SQLSTATE codes to custom exception classes.
"""
mapping = {
- "01000": Warning(
- driver_error="General warning",
- ddbc_error=ddbc_error
- ), # General warning
+ "01000": Warning(driver_error="General warning", ddbc_error=ddbc_error), # General warning
"01001": OperationalError(
- driver_error="Cursor operation conflict",
- ddbc_error=ddbc_error
+ driver_error="Cursor operation conflict", ddbc_error=ddbc_error
), # Cursor operation conflict
"01002": OperationalError(
- driver_error="Disconnect error",
- ddbc_error=ddbc_error
+ driver_error="Disconnect error", ddbc_error=ddbc_error
), # Disconnect error
"01003": DataError(
- driver_error="NULL value eliminated in set function",
- ddbc_error=ddbc_error
+ driver_error="NULL value eliminated in set function", ddbc_error=ddbc_error
), # NULL value eliminated in set function
"01004": DataError(
- driver_error="String data, right-truncated",
- ddbc_error=ddbc_error
+ driver_error="String data, right-truncated", ddbc_error=ddbc_error
), # String data, right-truncated
"01006": OperationalError(
- driver_error="Privilege not revoked",
- ddbc_error=ddbc_error
+ driver_error="Privilege not revoked", ddbc_error=ddbc_error
), # Privilege not revoked
"01007": OperationalError(
- driver_error="Privilege not granted",
- ddbc_error=ddbc_error
+ driver_error="Privilege not granted", ddbc_error=ddbc_error
), # Privilege not granted
"01S00": ProgrammingError(
- driver_error="Invalid connection string attribute",
- ddbc_error=ddbc_error
+ driver_error="Invalid connection string attribute", ddbc_error=ddbc_error
), # Invalid connection string attribute
- "01S01": DataError(
- driver_error="Error in row",
- ddbc_error=ddbc_error
- ), # Error in row
+ "01S01": DataError(driver_error="Error in row", ddbc_error=ddbc_error), # Error in row
"01S02": Warning(
- driver_error="Option value changed",
- ddbc_error=ddbc_error
+ driver_error="Option value changed", ddbc_error=ddbc_error
), # Option value changed
"01S06": OperationalError(
driver_error="Attempt to fetch before the result set returned the first rowset",
ddbc_error=ddbc_error,
), # Attempt to fetch before the result set returned the first rowset
"01S07": DataError(
- driver_error="Fractional truncation",
- ddbc_error=ddbc_error
+ driver_error="Fractional truncation", ddbc_error=ddbc_error
), # Fractional truncation
"01S08": OperationalError(
- driver_error="Error saving File DSN",
- ddbc_error=ddbc_error
+ driver_error="Error saving File DSN", ddbc_error=ddbc_error
), # Error saving File DSN
"01S09": ProgrammingError(
- driver_error="Invalid keyword",
- ddbc_error=ddbc_error
+ driver_error="Invalid keyword", ddbc_error=ddbc_error
), # Invalid keyword
"07001": ProgrammingError(
- driver_error="Wrong number of parameters",
- ddbc_error=ddbc_error
+ driver_error="Wrong number of parameters", ddbc_error=ddbc_error
), # Wrong number of parameters
"07002": ProgrammingError(
- driver_error="COUNT field incorrect",
- ddbc_error=ddbc_error
+ driver_error="COUNT field incorrect", ddbc_error=ddbc_error
), # COUNT field incorrect
"07005": ProgrammingError(
driver_error="Prepared statement not a cursor-specification",
@@ -208,36 +214,28 @@ def sqlstate_to_exception(sqlstate: str, ddbc_error: str) -> Exception:
ddbc_error=ddbc_error,
), # Restricted data type attribute violation
"07009": ProgrammingError(
- driver_error="Invalid descriptor index",
- ddbc_error=ddbc_error
+ driver_error="Invalid descriptor index", ddbc_error=ddbc_error
), # Invalid descriptor index
"07S01": ProgrammingError(
- driver_error="Invalid use of default parameter",
- ddbc_error=ddbc_error
+ driver_error="Invalid use of default parameter", ddbc_error=ddbc_error
), # Invalid use of default parameter
"08001": OperationalError(
- driver_error="Client unable to establish connection",
- ddbc_error=ddbc_error
+ driver_error="Client unable to establish connection", ddbc_error=ddbc_error
), # Client unable to establish connection
"08002": OperationalError(
- driver_error="Connection name in use",
- ddbc_error=ddbc_error
+ driver_error="Connection name in use", ddbc_error=ddbc_error
), # Connection name in use
"08003": OperationalError(
- driver_error="Connection not open",
- ddbc_error=ddbc_error
+ driver_error="Connection not open", ddbc_error=ddbc_error
), # Connection not open
"08004": OperationalError(
- driver_error="Server rejected the connection",
- ddbc_error=ddbc_error
+ driver_error="Server rejected the connection", ddbc_error=ddbc_error
), # Server rejected the connection
"08007": OperationalError(
- driver_error="Connection failure during transaction",
- ddbc_error=ddbc_error
+ driver_error="Connection failure during transaction", ddbc_error=ddbc_error
), # Connection failure during transaction
"08S01": OperationalError(
- driver_error="Communication link failure",
- ddbc_error=ddbc_error
+ driver_error="Communication link failure", ddbc_error=ddbc_error
), # Communication link failure
"21S01": ProgrammingError(
driver_error="Insert value list does not match column list",
@@ -248,188 +246,145 @@ def sqlstate_to_exception(sqlstate: str, ddbc_error: str) -> Exception:
ddbc_error=ddbc_error,
), # Degree of derived table does not match column list
"22001": DataError(
- driver_error="String data, right-truncated",
- ddbc_error=ddbc_error
+ driver_error="String data, right-truncated", ddbc_error=ddbc_error
), # String data, right-truncated
"22002": DataError(
driver_error="Indicator variable required but not supplied",
ddbc_error=ddbc_error,
), # Indicator variable required but not supplied
"22003": DataError(
- driver_error="Numeric value out of range",
- ddbc_error=ddbc_error
+ driver_error="Numeric value out of range", ddbc_error=ddbc_error
), # Numeric value out of range
"22007": DataError(
- driver_error="Invalid datetime format",
- ddbc_error=ddbc_error
+ driver_error="Invalid datetime format", ddbc_error=ddbc_error
), # Invalid datetime format
"22008": DataError(
- driver_error="Datetime field overflow",
- ddbc_error=ddbc_error
+ driver_error="Datetime field overflow", ddbc_error=ddbc_error
), # Datetime field overflow
"22012": DataError(
- driver_error="Division by zero",
- ddbc_error=ddbc_error
+ driver_error="Division by zero", ddbc_error=ddbc_error
), # Division by zero
"22015": DataError(
- driver_error="Interval field overflow",
- ddbc_error=ddbc_error
+ driver_error="Interval field overflow", ddbc_error=ddbc_error
), # Interval field overflow
"22018": DataError(
driver_error="Invalid character value for cast specification",
ddbc_error=ddbc_error,
), # Invalid character value for cast specification
"22019": ProgrammingError(
- driver_error="Invalid escape character",
- ddbc_error=ddbc_error
+ driver_error="Invalid escape character", ddbc_error=ddbc_error
), # Invalid escape character
"22025": ProgrammingError(
- driver_error="Invalid escape sequence",
- ddbc_error=ddbc_error
+ driver_error="Invalid escape sequence", ddbc_error=ddbc_error
), # Invalid escape sequence
"22026": DataError(
- driver_error="String data, length mismatch",
- ddbc_error=ddbc_error
+ driver_error="String data, length mismatch", ddbc_error=ddbc_error
), # String data, length mismatch
"23000": IntegrityError(
- driver_error="Integrity constraint violation",
- ddbc_error=ddbc_error
+ driver_error="Integrity constraint violation", ddbc_error=ddbc_error
), # Integrity constraint violation
"24000": ProgrammingError(
- driver_error="Invalid cursor state",
- ddbc_error=ddbc_error
+ driver_error="Invalid cursor state", ddbc_error=ddbc_error
), # Invalid cursor state
"25000": OperationalError(
- driver_error="Invalid transaction state",
- ddbc_error=ddbc_error
+ driver_error="Invalid transaction state", ddbc_error=ddbc_error
), # Invalid transaction state
"25S01": OperationalError(
- driver_error="Transaction state",
- ddbc_error=ddbc_error
+ driver_error="Transaction state", ddbc_error=ddbc_error
), # Transaction state
"25S02": OperationalError(
- driver_error="Transaction is still active",
- ddbc_error=ddbc_error
+ driver_error="Transaction is still active", ddbc_error=ddbc_error
), # Transaction is still active
"25S03": OperationalError(
- driver_error="Transaction is rolled back",
- ddbc_error=ddbc_error
+ driver_error="Transaction is rolled back", ddbc_error=ddbc_error
), # Transaction is rolled back
"28000": OperationalError(
- driver_error="Invalid authorization specification",
- ddbc_error=ddbc_error
+ driver_error="Invalid authorization specification", ddbc_error=ddbc_error
), # Invalid authorization specification
"34000": ProgrammingError(
- driver_error="Invalid cursor name",
- ddbc_error=ddbc_error
+ driver_error="Invalid cursor name", ddbc_error=ddbc_error
), # Invalid cursor name
"3C000": ProgrammingError(
- driver_error="Duplicate cursor name",
- ddbc_error=ddbc_error
+ driver_error="Duplicate cursor name", ddbc_error=ddbc_error
), # Duplicate cursor name
"3D000": ProgrammingError(
- driver_error="Invalid catalog name",
- ddbc_error=ddbc_error
+ driver_error="Invalid catalog name", ddbc_error=ddbc_error
), # Invalid catalog name
"3F000": ProgrammingError(
- driver_error="Invalid schema name",
- ddbc_error=ddbc_error
+ driver_error="Invalid schema name", ddbc_error=ddbc_error
), # Invalid schema name
"40001": OperationalError(
- driver_error="Serialization failure",
- ddbc_error=ddbc_error
+ driver_error="Serialization failure", ddbc_error=ddbc_error
), # Serialization failure
"40002": IntegrityError(
- driver_error="Integrity constraint violation",
- ddbc_error=ddbc_error
+ driver_error="Integrity constraint violation", ddbc_error=ddbc_error
), # Integrity constraint violation
"40003": OperationalError(
- driver_error="Statement completion unknown",
- ddbc_error=ddbc_error
+ driver_error="Statement completion unknown", ddbc_error=ddbc_error
), # Statement completion unknown
"42000": ProgrammingError(
- driver_error="Syntax error or access violation",
- ddbc_error=ddbc_error
+ driver_error="Syntax error or access violation", ddbc_error=ddbc_error
), # Syntax error or access violation
"42S01": ProgrammingError(
- driver_error="Base table or view already exists",
- ddbc_error=ddbc_error
+ driver_error="Base table or view already exists", ddbc_error=ddbc_error
), # Base table or view already exists
"42S02": ProgrammingError(
- driver_error="Base table or view not found",
- ddbc_error=ddbc_error
+ driver_error="Base table or view not found", ddbc_error=ddbc_error
), # Base table or view not found
"42S11": ProgrammingError(
- driver_error="Index already exists",
- ddbc_error=ddbc_error
+ driver_error="Index already exists", ddbc_error=ddbc_error
), # Index already exists
"42S12": ProgrammingError(
- driver_error="Index not found",
- ddbc_error=ddbc_error
+ driver_error="Index not found", ddbc_error=ddbc_error
), # Index not found
"42S21": ProgrammingError(
- driver_error="Column already exists",
- ddbc_error=ddbc_error
+ driver_error="Column already exists", ddbc_error=ddbc_error
), # Column already exists
"42S22": ProgrammingError(
- driver_error="Column not found",
- ddbc_error=ddbc_error
+ driver_error="Column not found", ddbc_error=ddbc_error
), # Column not found
"44000": IntegrityError(
- driver_error="WITH CHECK OPTION violation",
- ddbc_error=ddbc_error
+ driver_error="WITH CHECK OPTION violation", ddbc_error=ddbc_error
), # WITH CHECK OPTION violation
"HY000": OperationalError(
- driver_error="General error",
- ddbc_error=ddbc_error
+ driver_error="General error", ddbc_error=ddbc_error
), # General error
"HY001": OperationalError(
- driver_error="Memory allocation error",
- ddbc_error=ddbc_error
+ driver_error="Memory allocation error", ddbc_error=ddbc_error
), # Memory allocation error
"HY003": ProgrammingError(
- driver_error="Invalid application buffer type",
- ddbc_error=ddbc_error
+ driver_error="Invalid application buffer type", ddbc_error=ddbc_error
), # Invalid application buffer type
"HY004": ProgrammingError(
- driver_error="Invalid SQL data type",
- ddbc_error=ddbc_error
+ driver_error="Invalid SQL data type", ddbc_error=ddbc_error
), # Invalid SQL data type
"HY007": ProgrammingError(
- driver_error="Associated statement is not prepared",
- ddbc_error=ddbc_error
+ driver_error="Associated statement is not prepared", ddbc_error=ddbc_error
), # Associated statement is not prepared
"HY008": OperationalError(
- driver_error="Operation canceled",
- ddbc_error=ddbc_error
+ driver_error="Operation canceled", ddbc_error=ddbc_error
), # Operation canceled
"HY009": ProgrammingError(
- driver_error="Invalid use of null pointer",
- ddbc_error=ddbc_error
+ driver_error="Invalid use of null pointer", ddbc_error=ddbc_error
), # Invalid use of null pointer
"HY010": ProgrammingError(
- driver_error="Function sequence error",
- ddbc_error=ddbc_error
+ driver_error="Function sequence error", ddbc_error=ddbc_error
), # Function sequence error
"HY011": ProgrammingError(
- driver_error="Attribute cannot be set now",
- ddbc_error=ddbc_error
+ driver_error="Attribute cannot be set now", ddbc_error=ddbc_error
), # Attribute cannot be set now
"HY012": ProgrammingError(
- driver_error="Invalid transaction operation code",
- ddbc_error=ddbc_error
+ driver_error="Invalid transaction operation code", ddbc_error=ddbc_error
), # Invalid transaction operation code
"HY013": OperationalError(
- driver_error="Memory management error",
- ddbc_error=ddbc_error
+ driver_error="Memory management error", ddbc_error=ddbc_error
), # Memory management error
"HY014": OperationalError(
driver_error="Limit on the number of handles exceeded",
ddbc_error=ddbc_error,
), # Limit on the number of handles exceeded
"HY015": ProgrammingError(
- driver_error="No cursor name available",
- ddbc_error=ddbc_error
+ driver_error="No cursor name available", ddbc_error=ddbc_error
), # No cursor name available
"HY016": ProgrammingError(
driver_error="Cannot modify an implementation row descriptor",
@@ -440,120 +395,93 @@ def sqlstate_to_exception(sqlstate: str, ddbc_error: str) -> Exception:
ddbc_error=ddbc_error,
), # Invalid use of an automatically allocated descriptor handle
"HY018": OperationalError(
- driver_error="Server declined cancel request",
- ddbc_error=ddbc_error
+ driver_error="Server declined cancel request", ddbc_error=ddbc_error
), # Server declined cancel request
"HY019": DataError(
driver_error="Non-character and non-binary data sent in pieces",
ddbc_error=ddbc_error,
), # Non-character and non-binary data sent in pieces
"HY020": DataError(
- driver_error="Attempt to concatenate a null value",
- ddbc_error=ddbc_error
+ driver_error="Attempt to concatenate a null value", ddbc_error=ddbc_error
), # Attempt to concatenate a null value
"HY021": ProgrammingError(
- driver_error="Inconsistent descriptor information",
- ddbc_error=ddbc_error
+ driver_error="Inconsistent descriptor information", ddbc_error=ddbc_error
), # Inconsistent descriptor information
"HY024": ProgrammingError(
- driver_error="Invalid attribute value",
- ddbc_error=ddbc_error
+ driver_error="Invalid attribute value", ddbc_error=ddbc_error
), # Invalid attribute value
"HY090": ProgrammingError(
- driver_error="Invalid string or buffer length",
- ddbc_error=ddbc_error
+ driver_error="Invalid string or buffer length", ddbc_error=ddbc_error
), # Invalid string or buffer length
"HY091": ProgrammingError(
- driver_error="Invalid descriptor field identifier",
- ddbc_error=ddbc_error
+ driver_error="Invalid descriptor field identifier", ddbc_error=ddbc_error
), # Invalid descriptor field identifier
"HY092": ProgrammingError(
- driver_error="Invalid attribute/option identifier",
- ddbc_error=ddbc_error
+ driver_error="Invalid attribute/option identifier", ddbc_error=ddbc_error
), # Invalid attribute/option identifier
"HY095": ProgrammingError(
- driver_error="Function type out of range",
- ddbc_error=ddbc_error
+ driver_error="Function type out of range", ddbc_error=ddbc_error
), # Function type out of range
"HY096": ProgrammingError(
- driver_error="Invalid information type",
- ddbc_error=ddbc_error
+ driver_error="Invalid information type", ddbc_error=ddbc_error
), # Invalid information type
"HY097": ProgrammingError(
- driver_error="Column type out of range",
- ddbc_error=ddbc_error
+ driver_error="Column type out of range", ddbc_error=ddbc_error
), # Column type out of range
"HY098": ProgrammingError(
- driver_error="Scope type out of range",
- ddbc_error=ddbc_error
+ driver_error="Scope type out of range", ddbc_error=ddbc_error
), # Scope type out of range
"HY099": ProgrammingError(
- driver_error="Nullable type out of range",
- ddbc_error=ddbc_error
+ driver_error="Nullable type out of range", ddbc_error=ddbc_error
), # Nullable type out of range
"HY100": ProgrammingError(
- driver_error="Uniqueness option type out of range",
- ddbc_error=ddbc_error
+ driver_error="Uniqueness option type out of range", ddbc_error=ddbc_error
), # Uniqueness option type out of range
"HY101": ProgrammingError(
- driver_error="Accuracy option type out of range",
- ddbc_error=ddbc_error
+ driver_error="Accuracy option type out of range", ddbc_error=ddbc_error
), # Accuracy option type out of range
"HY103": ProgrammingError(
- driver_error="Invalid retrieval code",
- ddbc_error=ddbc_error
+ driver_error="Invalid retrieval code", ddbc_error=ddbc_error
), # Invalid retrieval code
"HY104": ProgrammingError(
- driver_error="Invalid precision or scale value",
- ddbc_error=ddbc_error
+ driver_error="Invalid precision or scale value", ddbc_error=ddbc_error
), # Invalid precision or scale value
"HY105": ProgrammingError(
- driver_error="Invalid parameter type",
- ddbc_error=ddbc_error
+ driver_error="Invalid parameter type", ddbc_error=ddbc_error
), # Invalid parameter type
"HY106": ProgrammingError(
- driver_error="Fetch type out of range",
- ddbc_error=ddbc_error
+ driver_error="Fetch type out of range", ddbc_error=ddbc_error
), # Fetch type out of range
"HY107": ProgrammingError(
- driver_error="Row value out of range",
- ddbc_error=ddbc_error
+ driver_error="Row value out of range", ddbc_error=ddbc_error
), # Row value out of range
"HY109": ProgrammingError(
- driver_error="Invalid cursor position",
- ddbc_error=ddbc_error
+ driver_error="Invalid cursor position", ddbc_error=ddbc_error
), # Invalid cursor position
"HY110": ProgrammingError(
- driver_error="Invalid driver completion",
- ddbc_error=ddbc_error
+ driver_error="Invalid driver completion", ddbc_error=ddbc_error
), # Invalid driver completion
"HY111": ProgrammingError(
- driver_error="Invalid bookmark value",
- ddbc_error=ddbc_error
+ driver_error="Invalid bookmark value", ddbc_error=ddbc_error
), # Invalid bookmark value
"HYC00": NotSupportedError(
- driver_error="Optional feature not implemented",
- ddbc_error=ddbc_error
+ driver_error="Optional feature not implemented", ddbc_error=ddbc_error
), # Optional feature not implemented
"HYT00": OperationalError(
- driver_error="Timeout expired",
- ddbc_error=ddbc_error
+ driver_error="Timeout expired", ddbc_error=ddbc_error
), # Timeout expired
"HYT01": OperationalError(
- driver_error="Connection timeout expired",
- ddbc_error=ddbc_error
+ driver_error="Connection timeout expired", ddbc_error=ddbc_error
), # Connection timeout expired
"IM001": NotSupportedError(
- driver_error="Driver does not support this function",
- ddbc_error=ddbc_error
+ driver_error="Driver does not support this function", ddbc_error=ddbc_error
), # Driver does not support this function
"IM002": OperationalError(
driver_error="Data source name not found and no default driver specified",
ddbc_error=ddbc_error,
), # Data source name not found and no default driver specified
"IM003": OperationalError(
- driver_error="Specified driver could not be loaded",
- ddbc_error=ddbc_error
+ driver_error="Specified driver could not be loaded", ddbc_error=ddbc_error
), # Specified driver could not be loaded
"IM004": OperationalError(
driver_error="Driver's SQLAllocHandle on SQL_HANDLE_ENV failed",
@@ -564,44 +492,35 @@ def sqlstate_to_exception(sqlstate: str, ddbc_error: str) -> Exception:
ddbc_error=ddbc_error,
), # Driver's SQLAllocHandle on SQL_HANDLE_DBC failed
"IM006": OperationalError(
- driver_error="Driver's SQLSetConnectAttr failed",
- ddbc_error=ddbc_error
+ driver_error="Driver's SQLSetConnectAttr failed", ddbc_error=ddbc_error
), # Driver's SQLSetConnectAttr failed
"IM007": OperationalError(
driver_error="No data source or driver specified; dialog prohibited",
ddbc_error=ddbc_error,
), # No data source or driver specified; dialog prohibited
"IM008": OperationalError(
- driver_error="Dialog failed",
- ddbc_error=ddbc_error
+ driver_error="Dialog failed", ddbc_error=ddbc_error
), # Dialog failed
"IM009": OperationalError(
- driver_error="Unable to load translation DLL",
- ddbc_error=ddbc_error
+ driver_error="Unable to load translation DLL", ddbc_error=ddbc_error
), # Unable to load translation DLL
"IM010": OperationalError(
- driver_error="Data source name too long",
- ddbc_error=ddbc_error
+ driver_error="Data source name too long", ddbc_error=ddbc_error
), # Data source name too long
"IM011": OperationalError(
- driver_error="Driver name too long",
- ddbc_error=ddbc_error
+ driver_error="Driver name too long", ddbc_error=ddbc_error
), # Driver name too long
"IM012": OperationalError(
- driver_error="DRIVER keyword syntax error",
- ddbc_error=ddbc_error
+ driver_error="DRIVER keyword syntax error", ddbc_error=ddbc_error
), # DRIVER keyword syntax error
"IM013": OperationalError(
- driver_error="Trace file error",
- ddbc_error=ddbc_error
+ driver_error="Trace file error", ddbc_error=ddbc_error
), # Trace file error
"IM014": OperationalError(
- driver_error="Invalid name of File DSN",
- ddbc_error=ddbc_error
+ driver_error="Invalid name of File DSN", ddbc_error=ddbc_error
), # Invalid name of File DSN
"IM015": OperationalError(
- driver_error="Corrupt file data source",
- ddbc_error=ddbc_error
+ driver_error="Corrupt file data source", ddbc_error=ddbc_error
), # Corrupt file data source
}
return mapping.get(sqlstate, None)
@@ -621,8 +540,7 @@ def truncate_error_message(error_message: str) -> str:
string_third = string_second[string_second.index("]") + 1 :]
return string_first + string_third
except Exception as e:
- if logger:
- logger.error("Error while truncating error message: %s",e)
+ logger.warning("Error while truncating error message: %s", e)
return error_message
@@ -641,10 +559,10 @@ def raise_exception(sqlstate: str, ddbc_error: str) -> None:
"""
exception_class = sqlstate_to_exception(sqlstate, ddbc_error)
if exception_class:
- if logger:
- logger.error(exception_class)
+ logger.error(f"Raising exception: {exception_class}")
raise exception_class
+ logger.error(f"Unknown SQLSTATE {sqlstate}, raising DatabaseError")
raise DatabaseError(
driver_error=f"An error occurred with SQLSTATE code: {sqlstate}",
- ddbc_error=f"{ddbc_error}" if ddbc_error else f"Unknown DDBC error",
+ ddbc_error=f"{ddbc_error}" if ddbc_error else "Unknown DDBC error",
)
diff --git a/mssql_python/helpers.py b/mssql_python/helpers.py
index 267ede75c..4d785b48c 100644
--- a/mssql_python/helpers.py
+++ b/mssql_python/helpers.py
@@ -4,62 +4,19 @@
This module provides helper functions for the mssql_python package.
"""
+import re
+import threading
+import locale
+from typing import Any, Union, Tuple, Optional
from mssql_python import ddbc_bindings
from mssql_python.exceptions import raise_exception
-from mssql_python.logging_config import get_logger
-import platform
-from pathlib import Path
-from mssql_python.ddbc_bindings import normalize_architecture
+from mssql_python.logging import logger
+from mssql_python.constants import ConstantsDDBC
-logger = get_logger()
+# normalize_architecture import removed as it's unused
-def add_driver_to_connection_str(connection_str):
- """
- Add the DDBC driver to the connection string if not present.
-
- Args:
- connection_str (str): The original connection string.
-
- Returns:
- str: The connection string with the DDBC driver added.
-
- Raises:
- Exception: If the connection string is invalid.
- """
- driver_name = "Driver={ODBC Driver 18 for SQL Server}"
- try:
- # Strip any leading or trailing whitespace from the connection string
- connection_str = connection_str.strip()
- connection_str = add_driver_name_to_app_parameter(connection_str)
-
- # Split the connection string into individual attributes
- connection_attributes = connection_str.split(";")
- final_connection_attributes = []
-
- # Iterate through the attributes and exclude any existing driver attribute
- for attribute in connection_attributes:
- if attribute.lower().split("=")[0] == "driver":
- continue
- final_connection_attributes.append(attribute)
-
- # Join the remaining attributes back into a connection string
- connection_str = ";".join(final_connection_attributes)
-
- # Insert the driver attribute at the beginning of the connection string
- final_connection_attributes.insert(0, driver_name)
- connection_str = ";".join(final_connection_attributes)
-
- except Exception as e:
- raise Exception(
- "Invalid connection string, Please follow the format: "
- "Server=server_name;Database=database_name;UID=user_name;PWD=password"
- ) from e
-
- return connection_str
-
-
-def check_error(handle_type, handle, ret):
+def check_error(handle_type: int, handle: Any, ret: int) -> None:
"""
Check for errors and raise an exception if an error is found.
@@ -72,143 +29,261 @@ def check_error(handle_type, handle, ret):
RuntimeError: If an error is found.
"""
if ret < 0:
+ logger.debug(
+ "check_error: Error detected - handle_type=%d, return_code=%d", handle_type, ret
+ )
error_info = ddbc_bindings.DDBCSQLCheckError(handle_type, handle, ret)
- if logger:
- logger.error("Error: %s", error_info.ddbcErrorMsg)
+ logger.error("Error: %s", error_info.ddbcErrorMsg)
+ logger.debug("check_error: SQL state=%s", error_info.sqlState)
raise_exception(error_info.sqlState, error_info.ddbcErrorMsg)
-def add_driver_name_to_app_parameter(connection_string):
+def sanitize_connection_string(conn_str: str) -> str:
"""
- Modifies the input connection string by appending the APP name.
-
+ Sanitize the connection string by removing sensitive information.
Args:
- connection_string (str): The input connection string.
-
+ conn_str (str): The connection string to sanitize.
Returns:
- str: The modified connection string.
+ str: The sanitized connection string.
"""
- # Split the input string into key-value pairs
- parameters = connection_string.split(";")
-
- # Initialize variables
- app_found = False
- modified_parameters = []
-
- # Iterate through the key-value pairs
- for param in parameters:
- if param.lower().startswith("app="):
- # Overwrite the value with 'MSSQL-Python'
- app_found = True
- key, _ = param.split("=", 1)
- modified_parameters.append(f"{key}=MSSQL-Python")
- else:
- # Keep other parameters as is
- modified_parameters.append(param)
-
- # If APP key is not found, append it
- if not app_found:
- modified_parameters.append("APP=MSSQL-Python")
-
- # Join the parameters back into a connection string
- return ";".join(modified_parameters) + ";"
+ logger.debug(
+ "sanitize_connection_string: Sanitizing connection string (length=%d)", len(conn_str)
+ )
+ # Remove sensitive information from the connection string, Pwd section
+ # Replace Pwd=...; or Pwd=... (end of string) with Pwd=***;
+ sanitized = re.sub(r"(Pwd\s*=\s*)[^;]*", r"\1***", conn_str, flags=re.IGNORECASE)
+ logger.debug("sanitize_connection_string: Password fields masked")
+ return sanitized
-def detect_linux_distro():
+def sanitize_user_input(user_input: str, max_length: int = 50) -> str:
"""
- Detect Linux distribution for driver path selection.
+ Sanitize user input for safe logging by removing control characters,
+ limiting length, and ensuring safe characters only.
+
+ Args:
+ user_input (str): The user input to sanitize.
+ max_length (int): Maximum length of the sanitized output.
Returns:
- str: Distribution name ('debian_ubuntu', 'rhel', 'alpine', etc.)
+ str: The sanitized string safe for logging.
"""
- import os
-
- distro_name = "debian_ubuntu" # default
-
- try:
- if os.path.exists("/etc/os-release"):
- with open("/etc/os-release", "r") as f:
- content = f.read()
- for line in content.split("\n"):
- if line.startswith("ID="):
- distro_id = line.split("=", 1)[1].strip('"\'')
- if distro_id in ["ubuntu", "debian"]:
- distro_name = "debian_ubuntu"
- elif distro_id in ["rhel", "centos", "fedora"]:
- distro_name = "rhel"
- elif distro_id == "alpine":
- distro_name = "alpine"
- else:
- distro_name = distro_id # use as-is
- break
- except Exception:
- pass # use default
-
- return distro_name
-
-def get_driver_path(module_dir, architecture):
+ logger.debug(
+ "sanitize_user_input: Sanitizing input (type=%s, length=%d)",
+ type(user_input).__name__,
+ len(user_input) if isinstance(user_input, str) else 0,
+ )
+ if not isinstance(user_input, str):
+ logger.debug("sanitize_user_input: Non-string input detected")
+ return ""
+
+ # Remove control characters and non-printable characters
+ # Allow alphanumeric, dash, underscore, and dot (common in encoding names)
+ sanitized = re.sub(r"[^\w\-\.]", "", user_input)
+
+ # Limit length to prevent log flooding
+ was_truncated = False
+ if len(sanitized) > max_length:
+ sanitized = sanitized[:max_length] + "..."
+ was_truncated = True
+
+ # Return placeholder if nothing remains after sanitization
+ result = sanitized if sanitized else ""
+ logger.debug(
+ "sanitize_user_input: Result length=%d, truncated=%s", len(result), str(was_truncated)
+ )
+ return result
+
+
+def validate_attribute_value(
+ attribute: Union[int, str],
+ value: Union[int, str, bytes, bytearray],
+ is_connected: bool = True,
+ sanitize_logs: bool = True,
+ max_log_length: int = 50,
+) -> Tuple[bool, Optional[str], str, str]:
"""
- Get the platform-specific ODBC driver path.
+ Validates attribute and value pairs for connection attributes.
+
+ Performs basic type checking and validation of ODBC connection attributes.
Args:
- module_dir (str): Base module directory
- architecture (str): Target architecture (x64, arm64, x86, etc.)
+ attribute (int): The connection attribute to validate (SQL_ATTR_*)
+ value: The value to set for the attribute (int, str, bytes, or bytearray)
+ is_connected (bool): Whether the connection is already established
+ sanitize_logs (bool): Whether to include sanitized versions for logging
+ max_log_length (int): Maximum length of sanitized output for logging
Returns:
- str: Full path to the ODBC driver file
-
- Raises:
- RuntimeError: If driver not found or unsupported platform
+ tuple: (is_valid, error_message, sanitized_attribute, sanitized_value)
"""
-
- platform_name = platform.system().lower()
- normalized_arch = normalize_architecture(platform_name, architecture)
-
- if platform_name == "windows":
- driver_path = Path(module_dir) / "libs" / "windows" / normalized_arch / "msodbcsql18.dll"
-
- elif platform_name == "darwin":
- driver_path = Path(module_dir) / "libs" / "macos" / normalized_arch / "lib" / "libmsodbcsql.18.dylib"
-
- elif platform_name == "linux":
- distro_name = detect_linux_distro()
- driver_path = Path(module_dir) / "libs" / "linux" / distro_name / normalized_arch / "lib" / "libmsodbcsql-18.5.so.1.1"
+ logger.debug(
+ "validate_attribute_value: Validating attribute=%s, value_type=%s, is_connected=%s",
+ str(attribute),
+ type(value).__name__,
+ str(is_connected),
+ )
+
+ # Sanitize a value for logging
+ def _sanitize_for_logging(input_val: Any, max_length: int = max_log_length) -> str:
+ if not isinstance(input_val, str):
+ try:
+ input_val = str(input_val)
+ except (TypeError, ValueError):
+ return ""
+
+ # Allow alphanumeric, dash, underscore, and dot
+ sanitized = re.sub(r"[^\w\-\.]", "", input_val)
+
+ # Limit length
+ if len(sanitized) > max_length:
+ sanitized = sanitized[:max_length] + "..."
+
+ return sanitized if sanitized else ""
+
+ # Create sanitized versions for logging
+ sanitized_attr = _sanitize_for_logging(attribute) if sanitize_logs else str(attribute)
+ sanitized_val = _sanitize_for_logging(value) if sanitize_logs else str(value)
+
+ # Basic attribute validation - must be an integer
+ if not isinstance(attribute, int):
+ logger.debug(
+ "validate_attribute_value: Attribute not an integer - type=%s", type(attribute).__name__
+ )
+ return (
+ False,
+ f"Attribute must be an integer, got {type(attribute).__name__}",
+ sanitized_attr,
+ sanitized_val,
+ )
+
+ # Define driver-level attributes that are supported
+ supported_attributes = [
+ ConstantsDDBC.SQL_ATTR_ACCESS_MODE.value,
+ ConstantsDDBC.SQL_ATTR_CONNECTION_TIMEOUT.value,
+ ConstantsDDBC.SQL_ATTR_CURRENT_CATALOG.value,
+ ConstantsDDBC.SQL_ATTR_LOGIN_TIMEOUT.value,
+ ConstantsDDBC.SQL_ATTR_PACKET_SIZE.value,
+ ConstantsDDBC.SQL_ATTR_TXN_ISOLATION.value,
+ ]
+
+ # Check if attribute is supported
+ if attribute not in supported_attributes:
+ logger.debug("validate_attribute_value: Unsupported attribute - attr=%d", attribute)
+ return (
+ False,
+ f"Unsupported attribute: {attribute}",
+ sanitized_attr,
+ sanitized_val,
+ )
+
+ # Check timing constraints for these specific attributes
+ before_only_attributes = [
+ ConstantsDDBC.SQL_ATTR_LOGIN_TIMEOUT.value,
+ ConstantsDDBC.SQL_ATTR_PACKET_SIZE.value,
+ ]
+
+ # Check if attribute can be set at the current connection state
+ if is_connected and attribute in before_only_attributes:
+ logger.debug(
+ "validate_attribute_value: Timing violation - attr=%d cannot be set after connection",
+ attribute,
+ )
+ return (
+ False,
+ (
+ f"Attribute {attribute} must be set before connection establishment. "
+ "Use the attrs_before parameter when creating the connection."
+ ),
+ sanitized_attr,
+ sanitized_val,
+ )
+
+ # Basic value type validation
+ if isinstance(value, int):
+ # For integer values, check if negative (login timeout can be -1 for default)
+ if value < 0 and attribute != ConstantsDDBC.SQL_ATTR_LOGIN_TIMEOUT.value:
+ return (
+ False,
+ f"Integer value cannot be negative: {value}",
+ sanitized_attr,
+ sanitized_val,
+ )
+
+ elif isinstance(value, str):
+ # Basic string length check
+ max_string_size = 8192 # 8KB maximum
+ if len(value) > max_string_size:
+ return (
+ False,
+ f"String value too large: {len(value)} bytes (max {max_string_size})",
+ sanitized_attr,
+ sanitized_val,
+ )
+
+ elif isinstance(value, (bytes, bytearray)):
+ # Basic binary length check
+ max_binary_size = 32768 # 32KB maximum
+ if len(value) > max_binary_size:
+ return (
+ False,
+ f"Binary value too large: {len(value)} bytes (max {max_binary_size})",
+ sanitized_attr,
+ sanitized_val,
+ )
else:
- raise RuntimeError(f"Unsupported platform: {platform_name}")
-
- driver_path_str = str(driver_path)
+ # Reject unsupported value types
+ return (
+ False,
+ f"Unsupported attribute value type: {type(value).__name__}",
+ sanitized_attr,
+ sanitized_val,
+ )
+
+ # All basic validations passed
+ logger.debug(
+ "validate_attribute_value: Validation passed - attr=%d, value_type=%s",
+ attribute,
+ type(value).__name__,
+ )
+ return True, None, sanitized_attr, sanitized_val
+
+
+# Settings functionality moved here to avoid circular imports
+
+# Initialize the locale setting only once at module import time
+# This avoids thread-safety issues with locale
+_default_decimal_separator: str = "."
+try:
+ # Get the locale setting once during module initialization
+ locale_separator = locale.localeconv()["decimal_point"]
+ if locale_separator and len(locale_separator) == 1:
+ _default_decimal_separator = locale_separator
+except (AttributeError, KeyError, TypeError, ValueError):
+ pass # Keep the default "." if locale access fails
+
+
+class Settings:
+ """
+ Settings class for mssql_python package configuration.
- # Check if file exists
- if not driver_path.exists():
- raise RuntimeError(f"ODBC driver not found at: {driver_path_str}")
+ This class holds global settings that affect the behavior of the package,
+ including lowercase column names, decimal separator.
+ """
- return driver_path_str
+ def __init__(self) -> None:
+ self.lowercase: bool = False
+ # Use the pre-determined separator - no locale access here
+ self.decimal_separator: str = _default_decimal_separator
-def sanitize_connection_string(conn_str: str) -> str:
- """
- Sanitize the connection string by removing sensitive information.
- Args:
- conn_str (str): The connection string to sanitize.
- Returns:
- str: The sanitized connection string.
- """
- # Remove sensitive information from the connection string, Pwd section
- # Replace Pwd=...; or Pwd=... (end of string) with Pwd=***;
- import re
- return re.sub(r"(Pwd\s*=\s*)[^;]*", r"\1***", conn_str, flags=re.IGNORECASE)
+# Global settings instance
+_settings: Settings = Settings()
+_settings_lock: threading.Lock = threading.Lock()
-def log(level: str, message: str, *args) -> None:
- """
- Universal logging helper that gets a fresh logger instance.
-
- Args:
- level: Log level ('debug', 'info', 'warning', 'error')
- message: Log message with optional format placeholders
- *args: Arguments for message formatting
- """
- logger = get_logger()
- if logger:
- getattr(logger, level)(message, *args)
\ No newline at end of file
+def get_settings() -> Settings:
+ """Return the global settings object"""
+ with _settings_lock:
+ return _settings
diff --git a/mssql_python/libs/linux/alpine/arm64/lib/MICROSOFT_ODBC_DRIVER_FOR_SQL_SERVER_LICENSE.txt b/mssql_python/libs/linux/alpine/arm64/lib/MICROSOFT_ODBC_DRIVER_FOR_SQL_SERVER_LICENSE.txt
new file mode 100644
index 000000000..ebd7b3151
--- /dev/null
+++ b/mssql_python/libs/linux/alpine/arm64/lib/MICROSOFT_ODBC_DRIVER_FOR_SQL_SERVER_LICENSE.txt
@@ -0,0 +1,76 @@
+MICROSOFT SOFTWARE LICENSE TERMS
+MICROSOFT ODBC DRIVER 18 FOR SQL SERVER
+
+These license terms are an agreement between you and Microsoft Corporation (or one of its affiliates). They apply to the software named above and any Microsoft services or software updates (except to the extent such services or updates are accompanied by new or additional terms, in which case those different terms apply prospectively and do not alter your or Microsoft’s rights relating to pre-updated software or services). IF YOU COMPLY WITH THESE LICENSE TERMS, YOU HAVE THE RIGHTS BELOW. BY USING THE SOFTWARE, YOU ACCEPT THESE TERMS.
+
+1. INSTALLATION AND USE RIGHTS.
+
+ a) General. You may install and use any number of copies of the software to develop and test your applications.
+ b) Third Party Software. The software may include third party applications that Microsoft, not the third party, licenses to you under this agreement. Any included notices for third party applications are for your information only.
+
+2. DISTRIBUTABLE CODE. The software may contain code you are permitted to distribute (i.e. make available for third parties) in applications you develop, as described in this Section.
+
+ a) Distribution Rights. The code and test files described below are distributable if included with the software.
+
+ i. REDIST.TXT Files. You may copy and distribute the object code form of code listed on the REDIST list in the software, if any, or listed at REDIST (https://aka.ms/odbc18eularedist);
+ ii. Image Library. You may copy and distribute images, graphics, and animations in the Image Library as described in the software documentation;
+ iii. Sample Code, Templates, and Styles. You may copy, modify, and distribute the source and object code form of code marked as “sample”, “template”, “simple styles”, and “sketch styles”; and
+ iv. Third Party Distribution. You may permit distributors of your applications to copy and distribute any of this distributable code you elect to distribute with your applications.
+
+ b) Distribution Requirements. For any code you distribute, you must:
+
+ i. add significant primary functionality to it in your applications;
+ ii. require distributors and external end users to agree to terms that protect it and Microsoft at least as much as this agreement; and
+ iii. indemnify, defend, and hold harmless Microsoft from any claims, including attorneys’ fees, related to the distribution or use of your applications, except to the extent that any claim is based solely on the unmodified distributable code.
+
+ c) Distribution Restrictions. You may not:
+
+ i. use Microsoft’s trademarks or trade dress in your application in any way that suggests your application comes from or is endorsed by Microsoft; or
+ ii. modify or distribute the source code of any distributable code so that any part of it becomes subject to any license that requires that the distributable code, any other part of the software, or any of Microsoft’s other intellectual property be disclosed or distributed in source code form, or that others have the right to modify it.
+
+3. DATA COLLECTION. Some features in the software may enable collection of data from users of your applications that access or use the software. If you use these features to enable data collection in your applications, you must comply with applicable law, including getting any required user consent, and maintain a prominent privacy policy that accurately informs users about how you use, collect, and share their data. You agree to comply with all applicable provisions of the Microsoft Privacy Statement at [https://go.microsoft.com/fwlink/?LinkId=521839].
+
+4. SCOPE OF LICENSE. The software is licensed, not sold. Microsoft reserves all other rights. Unless applicable law gives you more rights despite this limitation, you will not (and have no right to):
+
+ d) use the software in any way that is against the law or to create or propagate malware; or
+ e) share, publish, distribute, or lend the software (except for any distributable code, subject to the terms above), provide the software as a stand-alone hosted solution for others to use, or transfer the software or this agreement to any third party.
+
+5. EXPORT RESTRICTIONS. You must comply with all domestic and international export laws and regulations that apply to the software, which include restrictions on destinations, end users, and end use. For further information on export restrictions, visit http://aka.ms/exporting.
+
+6. SUPPORT SERVICES. Microsoft is not obligated under this agreement to provide any support services for the software. Any support provided is “as is”, “with all faults”, and without warranty of any kind.
+
+7. UPDATES. The software may periodically check for updates, and download and install them for you. You may obtain updates only from Microsoft or authorized sources. Microsoft may need to update your system to provide you with updates. You agree to receive these automatic updates without any additional notice. Updates may not include or support all existing software features, services, or peripheral devices.
+
+8. ENTIRE AGREEMENT. This agreement, and any other terms Microsoft may provide for supplements, updates, or third-party applications, is the entire agreement for the software.
+
+9. APPLICABLE LAW AND PLACE TO RESOLVE DISPUTES. If you acquired the software in the United States or Canada, the laws of the state or province where you live (or, if a business, where your principal place of business is located) govern the interpretation of this agreement, claims for its breach, and all other claims (including consumer protection, unfair competition, and tort claims), regardless of conflict of laws principles. If you acquired the software in any other country, its laws apply. If U.S. federal jurisdiction exists, you and Microsoft consent to exclusive jurisdiction and venue in the federal court in King County, Washington for all disputes heard in court. If not, you and Microsoft consent to exclusive jurisdiction and venue in the Superior Court of King County, Washington for all disputes heard in court.
+
+10. CONSUMER RIGHTS; REGIONAL VARIATIONS. This agreement describes certain legal rights. You may have other rights, including consumer rights, under the laws of your state or country. Separate and apart from your relationship with Microsoft, you may also have rights with respect to the party from which you acquired the software. This agreement does not change those other rights if the laws of your state or country do not permit it to do so. For example, if you acquired the software in one of the below regions, or mandatory country law applies, then the following provisions apply to you:
+
+ a) Australia. You have statutory guarantees under the Australian Consumer Law and nothing in this agreement is intended to affect those rights.
+ b) Canada. If you acquired this software in Canada, you may stop receiving updates by turning off the automatic update feature, disconnecting your device from the Internet (if and when you re-connect to the Internet, however, the software will resume checking for and installing updates), or uninstalling the software. The product documentation, if any, may also specify how to turn off updates for your specific device or software.
+ c) Germany and Austria.
+
+ i. Warranty. The properly licensed software will perform substantially as described in any Microsoft materials that accompany the software. However, Microsoft gives no contractual guarantee in relation to the licensed software.
+ ii. Limitation of Liability. In case of intentional conduct, gross negligence, claims based on the Product Liability Act, as well as, in case of death or personal or physical injury, Microsoft is liable according to the statutory law.
+
+ Subject to the foregoing clause ii., Microsoft will only be liable for slight negligence if Microsoft is in breach of such material contractual obligations, the fulfillment of which facilitate the due performance of this agreement, the breach of which would endanger the purpose of this agreement and the compliance with which a party may constantly trust in (so-called "cardinal obligations"). In other cases of slight negligence, Microsoft will not be liable for slight negligence.
+
+11. DISCLAIMER OF WARRANTY. THE SOFTWARE IS LICENSED “AS IS.” YOU BEAR THE RISK OF USING IT. MICROSOFT GIVES NO EXPRESS WARRANTIES, GUARANTEES, OR CONDITIONS. TO THE EXTENT PERMITTED UNDER APPLICABLE LAWS, MICROSOFT EXCLUDES ALL IMPLIED WARRANTIES, INCLUDING MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.
+
+12. LIMITATION ON AND EXCLUSION OF DAMAGES. IF YOU HAVE ANY BASIS FOR RECOVERING DAMAGES DESPITE THE PRECEDING DISCLAIMER OF WARRANTY, YOU CAN RECOVER FROM MICROSOFT AND ITS SUPPLIERS ONLY DIRECT DAMAGES UP TO U.S. $5.00. YOU CANNOT RECOVER ANY OTHER DAMAGES, INCLUDING CONSEQUENTIAL, LOST PROFITS, SPECIAL, INDIRECT, OR INCIDENTAL DAMAGES.
+
+ This limitation applies to (a) anything related to the software, services, content (including code) on third party Internet sites, or third party applications; and (b) claims for breach of contract, warranty, guarantee, or condition; strict liability, negligence, or other tort; or any other claim; in each case to the extent permitted by applicable law.
+ It also applies even if Microsoft knew or should have known about the possibility of the damages. The above limitation or exclusion may not apply to you because your state, province, or country may not allow the exclusion or limitation of incidental, consequential, or other damages.
+ Please note: As this software is distributed in Canada, some of the clauses in this agreement are provided below in French.
+ Remarque: Ce logiciel étant distribué au Canada, certaines des clauses dans ce contrat sont fournies ci-dessous en français.
+
+ EXONÉRATION DE GARANTIE. Le logiciel visé par une licence est offert « tel quel ». Toute utilisation de ce logiciel est à votre seule risque et péril. Microsoft n’accorde aucune autre garantie expresse. Vous pouvez bénéficier de droits additionnels en vertu du droit local sur la protection des consommateurs, que ce contrat ne peut modifier. La ou elles sont permises par le droit locale, les garanties implicites de qualité marchande, d’adéquation à un usage particulier et d’absence de contrefaçon sont exclues.
+ LIMITATION DES DOMMAGES-INTÉRÊTS ET EXCLUSION DE RESPONSABILITÉ POUR LES DOMMAGES. Vous pouvez obtenir de Microsoft et de ses fournisseurs une indemnisation en cas de dommages directs uniquement à hauteur de 5,00 $ US. Vous ne pouvez prétendre à aucune indemnisation pour les autres dommages, y compris les dommages spéciaux, indirects ou accessoires et pertes de bénéfices.
+
+ Cette limitation concerne:
+ • tout ce qui est relié au logiciel, aux services ou au contenu (y compris le code) figurant sur des sites Internet tiers ou dans des programmes tiers; et
+ • les réclamations au titre de violation de contrat ou de garantie, ou au titre de responsabilité stricte, de négligence ou d’une autre faute dans la limite autorisée par la loi en vigueur.
+
+ Elle s’applique également, même si Microsoft connaissait ou devrait connaître l’éventualité d’un tel dommage. Si votre pays n’autorise pas l’exclusion ou la limitation de responsabilité pour les dommages indirects, accessoires ou de quelque nature que ce soit, il se peut que la limitation ou l’exclusion ci-dessus ne s’appliquera pas à votre égard.
+ EFFET JURIDIQUE. Le présent contrat décrit certains droits juridiques. Vous pourriez avoir d’autres droits prévus par les lois de votre pays. Le présent contrat ne modifie pas les droits que vous confèrent les lois de votre pays si celles-ci ne le permettent pas.
diff --git a/mssql_python/libs/linux/alpine/arm64/lib/libmsodbcsql-18.5.so.1.1 b/mssql_python/libs/linux/alpine/arm64/lib/libmsodbcsql-18.5.so.1.1
new file mode 100755
index 000000000..d88498315
Binary files /dev/null and b/mssql_python/libs/linux/alpine/arm64/lib/libmsodbcsql-18.5.so.1.1 differ
diff --git a/mssql_python/libs/linux/alpine/arm64/lib/libodbcinst.so.2 b/mssql_python/libs/linux/alpine/arm64/lib/libodbcinst.so.2
new file mode 100755
index 000000000..62a79a366
Binary files /dev/null and b/mssql_python/libs/linux/alpine/arm64/lib/libodbcinst.so.2 differ
diff --git a/mssql_python/libs/linux/alpine/arm64/share/resources/en_US/msodbcsqlr18.rll b/mssql_python/libs/linux/alpine/arm64/share/resources/en_US/msodbcsqlr18.rll
new file mode 100644
index 000000000..0f69236ee
Binary files /dev/null and b/mssql_python/libs/linux/alpine/arm64/share/resources/en_US/msodbcsqlr18.rll differ
diff --git a/mssql_python/libs/linux/alpine/x86_64/lib/MICROSOFT_ODBC_DRIVER_FOR_SQL_SERVER_LICENSE.txt b/mssql_python/libs/linux/alpine/x86_64/lib/MICROSOFT_ODBC_DRIVER_FOR_SQL_SERVER_LICENSE.txt
new file mode 100644
index 000000000..ebd7b3151
--- /dev/null
+++ b/mssql_python/libs/linux/alpine/x86_64/lib/MICROSOFT_ODBC_DRIVER_FOR_SQL_SERVER_LICENSE.txt
@@ -0,0 +1,76 @@
+MICROSOFT SOFTWARE LICENSE TERMS
+MICROSOFT ODBC DRIVER 18 FOR SQL SERVER
+
+These license terms are an agreement between you and Microsoft Corporation (or one of its affiliates). They apply to the software named above and any Microsoft services or software updates (except to the extent such services or updates are accompanied by new or additional terms, in which case those different terms apply prospectively and do not alter your or Microsoft’s rights relating to pre-updated software or services). IF YOU COMPLY WITH THESE LICENSE TERMS, YOU HAVE THE RIGHTS BELOW. BY USING THE SOFTWARE, YOU ACCEPT THESE TERMS.
+
+1. INSTALLATION AND USE RIGHTS.
+
+ a) General. You may install and use any number of copies of the software to develop and test your applications.
+ b) Third Party Software. The software may include third party applications that Microsoft, not the third party, licenses to you under this agreement. Any included notices for third party applications are for your information only.
+
+2. DISTRIBUTABLE CODE. The software may contain code you are permitted to distribute (i.e. make available for third parties) in applications you develop, as described in this Section.
+
+ a) Distribution Rights. The code and test files described below are distributable if included with the software.
+
+ i. REDIST.TXT Files. You may copy and distribute the object code form of code listed on the REDIST list in the software, if any, or listed at REDIST (https://aka.ms/odbc18eularedist);
+ ii. Image Library. You may copy and distribute images, graphics, and animations in the Image Library as described in the software documentation;
+ iii. Sample Code, Templates, and Styles. You may copy, modify, and distribute the source and object code form of code marked as “sample”, “template”, “simple styles”, and “sketch styles”; and
+ iv. Third Party Distribution. You may permit distributors of your applications to copy and distribute any of this distributable code you elect to distribute with your applications.
+
+ b) Distribution Requirements. For any code you distribute, you must:
+
+ i. add significant primary functionality to it in your applications;
+ ii. require distributors and external end users to agree to terms that protect it and Microsoft at least as much as this agreement; and
+ iii. indemnify, defend, and hold harmless Microsoft from any claims, including attorneys’ fees, related to the distribution or use of your applications, except to the extent that any claim is based solely on the unmodified distributable code.
+
+ c) Distribution Restrictions. You may not:
+
+ i. use Microsoft’s trademarks or trade dress in your application in any way that suggests your application comes from or is endorsed by Microsoft; or
+ ii. modify or distribute the source code of any distributable code so that any part of it becomes subject to any license that requires that the distributable code, any other part of the software, or any of Microsoft’s other intellectual property be disclosed or distributed in source code form, or that others have the right to modify it.
+
+3. DATA COLLECTION. Some features in the software may enable collection of data from users of your applications that access or use the software. If you use these features to enable data collection in your applications, you must comply with applicable law, including getting any required user consent, and maintain a prominent privacy policy that accurately informs users about how you use, collect, and share their data. You agree to comply with all applicable provisions of the Microsoft Privacy Statement at [https://go.microsoft.com/fwlink/?LinkId=521839].
+
+4. SCOPE OF LICENSE. The software is licensed, not sold. Microsoft reserves all other rights. Unless applicable law gives you more rights despite this limitation, you will not (and have no right to):
+
+ d) use the software in any way that is against the law or to create or propagate malware; or
+ e) share, publish, distribute, or lend the software (except for any distributable code, subject to the terms above), provide the software as a stand-alone hosted solution for others to use, or transfer the software or this agreement to any third party.
+
+5. EXPORT RESTRICTIONS. You must comply with all domestic and international export laws and regulations that apply to the software, which include restrictions on destinations, end users, and end use. For further information on export restrictions, visit http://aka.ms/exporting.
+
+6. SUPPORT SERVICES. Microsoft is not obligated under this agreement to provide any support services for the software. Any support provided is “as is”, “with all faults”, and without warranty of any kind.
+
+7. UPDATES. The software may periodically check for updates, and download and install them for you. You may obtain updates only from Microsoft or authorized sources. Microsoft may need to update your system to provide you with updates. You agree to receive these automatic updates without any additional notice. Updates may not include or support all existing software features, services, or peripheral devices.
+
+8. ENTIRE AGREEMENT. This agreement, and any other terms Microsoft may provide for supplements, updates, or third-party applications, is the entire agreement for the software.
+
+9. APPLICABLE LAW AND PLACE TO RESOLVE DISPUTES. If you acquired the software in the United States or Canada, the laws of the state or province where you live (or, if a business, where your principal place of business is located) govern the interpretation of this agreement, claims for its breach, and all other claims (including consumer protection, unfair competition, and tort claims), regardless of conflict of laws principles. If you acquired the software in any other country, its laws apply. If U.S. federal jurisdiction exists, you and Microsoft consent to exclusive jurisdiction and venue in the federal court in King County, Washington for all disputes heard in court. If not, you and Microsoft consent to exclusive jurisdiction and venue in the Superior Court of King County, Washington for all disputes heard in court.
+
+10. CONSUMER RIGHTS; REGIONAL VARIATIONS. This agreement describes certain legal rights. You may have other rights, including consumer rights, under the laws of your state or country. Separate and apart from your relationship with Microsoft, you may also have rights with respect to the party from which you acquired the software. This agreement does not change those other rights if the laws of your state or country do not permit it to do so. For example, if you acquired the software in one of the below regions, or mandatory country law applies, then the following provisions apply to you:
+
+ a) Australia. You have statutory guarantees under the Australian Consumer Law and nothing in this agreement is intended to affect those rights.
+ b) Canada. If you acquired this software in Canada, you may stop receiving updates by turning off the automatic update feature, disconnecting your device from the Internet (if and when you re-connect to the Internet, however, the software will resume checking for and installing updates), or uninstalling the software. The product documentation, if any, may also specify how to turn off updates for your specific device or software.
+ c) Germany and Austria.
+
+ i. Warranty. The properly licensed software will perform substantially as described in any Microsoft materials that accompany the software. However, Microsoft gives no contractual guarantee in relation to the licensed software.
+ ii. Limitation of Liability. In case of intentional conduct, gross negligence, claims based on the Product Liability Act, as well as, in case of death or personal or physical injury, Microsoft is liable according to the statutory law.
+
+ Subject to the foregoing clause ii., Microsoft will only be liable for slight negligence if Microsoft is in breach of such material contractual obligations, the fulfillment of which facilitate the due performance of this agreement, the breach of which would endanger the purpose of this agreement and the compliance with which a party may constantly trust in (so-called "cardinal obligations"). In other cases of slight negligence, Microsoft will not be liable for slight negligence.
+
+11. DISCLAIMER OF WARRANTY. THE SOFTWARE IS LICENSED “AS IS.” YOU BEAR THE RISK OF USING IT. MICROSOFT GIVES NO EXPRESS WARRANTIES, GUARANTEES, OR CONDITIONS. TO THE EXTENT PERMITTED UNDER APPLICABLE LAWS, MICROSOFT EXCLUDES ALL IMPLIED WARRANTIES, INCLUDING MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.
+
+12. LIMITATION ON AND EXCLUSION OF DAMAGES. IF YOU HAVE ANY BASIS FOR RECOVERING DAMAGES DESPITE THE PRECEDING DISCLAIMER OF WARRANTY, YOU CAN RECOVER FROM MICROSOFT AND ITS SUPPLIERS ONLY DIRECT DAMAGES UP TO U.S. $5.00. YOU CANNOT RECOVER ANY OTHER DAMAGES, INCLUDING CONSEQUENTIAL, LOST PROFITS, SPECIAL, INDIRECT, OR INCIDENTAL DAMAGES.
+
+ This limitation applies to (a) anything related to the software, services, content (including code) on third party Internet sites, or third party applications; and (b) claims for breach of contract, warranty, guarantee, or condition; strict liability, negligence, or other tort; or any other claim; in each case to the extent permitted by applicable law.
+ It also applies even if Microsoft knew or should have known about the possibility of the damages. The above limitation or exclusion may not apply to you because your state, province, or country may not allow the exclusion or limitation of incidental, consequential, or other damages.
+ Please note: As this software is distributed in Canada, some of the clauses in this agreement are provided below in French.
+ Remarque: Ce logiciel étant distribué au Canada, certaines des clauses dans ce contrat sont fournies ci-dessous en français.
+
+ EXONÉRATION DE GARANTIE. Le logiciel visé par une licence est offert « tel quel ». Toute utilisation de ce logiciel est à votre seule risque et péril. Microsoft n’accorde aucune autre garantie expresse. Vous pouvez bénéficier de droits additionnels en vertu du droit local sur la protection des consommateurs, que ce contrat ne peut modifier. La ou elles sont permises par le droit locale, les garanties implicites de qualité marchande, d’adéquation à un usage particulier et d’absence de contrefaçon sont exclues.
+ LIMITATION DES DOMMAGES-INTÉRÊTS ET EXCLUSION DE RESPONSABILITÉ POUR LES DOMMAGES. Vous pouvez obtenir de Microsoft et de ses fournisseurs une indemnisation en cas de dommages directs uniquement à hauteur de 5,00 $ US. Vous ne pouvez prétendre à aucune indemnisation pour les autres dommages, y compris les dommages spéciaux, indirects ou accessoires et pertes de bénéfices.
+
+ Cette limitation concerne:
+ • tout ce qui est relié au logiciel, aux services ou au contenu (y compris le code) figurant sur des sites Internet tiers ou dans des programmes tiers; et
+ • les réclamations au titre de violation de contrat ou de garantie, ou au titre de responsabilité stricte, de négligence ou d’une autre faute dans la limite autorisée par la loi en vigueur.
+
+ Elle s’applique également, même si Microsoft connaissait ou devrait connaître l’éventualité d’un tel dommage. Si votre pays n’autorise pas l’exclusion ou la limitation de responsabilité pour les dommages indirects, accessoires ou de quelque nature que ce soit, il se peut que la limitation ou l’exclusion ci-dessus ne s’appliquera pas à votre égard.
+ EFFET JURIDIQUE. Le présent contrat décrit certains droits juridiques. Vous pourriez avoir d’autres droits prévus par les lois de votre pays. Le présent contrat ne modifie pas les droits que vous confèrent les lois de votre pays si celles-ci ne le permettent pas.
diff --git a/mssql_python/libs/linux/alpine/x86_64/lib/libmsodbcsql-18.5.so.1.1 b/mssql_python/libs/linux/alpine/x86_64/lib/libmsodbcsql-18.5.so.1.1
new file mode 100755
index 000000000..9ec7372c2
Binary files /dev/null and b/mssql_python/libs/linux/alpine/x86_64/lib/libmsodbcsql-18.5.so.1.1 differ
diff --git a/mssql_python/libs/linux/alpine/x86_64/lib/libodbcinst.so.2 b/mssql_python/libs/linux/alpine/x86_64/lib/libodbcinst.so.2
new file mode 100755
index 000000000..ceecc8c80
Binary files /dev/null and b/mssql_python/libs/linux/alpine/x86_64/lib/libodbcinst.so.2 differ
diff --git a/mssql_python/libs/linux/alpine/x86_64/share/resources/en_US/msodbcsqlr18.rll b/mssql_python/libs/linux/alpine/x86_64/share/resources/en_US/msodbcsqlr18.rll
new file mode 100644
index 000000000..0f69236ee
Binary files /dev/null and b/mssql_python/libs/linux/alpine/x86_64/share/resources/en_US/msodbcsqlr18.rll differ
diff --git a/mssql_python/libs/linux/suse/x86_64/lib/libmsodbcsql-18.5.so.1.1 b/mssql_python/libs/linux/suse/x86_64/lib/libmsodbcsql-18.5.so.1.1
new file mode 100755
index 000000000..589787d48
Binary files /dev/null and b/mssql_python/libs/linux/suse/x86_64/lib/libmsodbcsql-18.5.so.1.1 differ
diff --git a/mssql_python/libs/linux/suse/x86_64/lib/libodbcinst.so.2 b/mssql_python/libs/linux/suse/x86_64/lib/libodbcinst.so.2
new file mode 100755
index 000000000..ad6d9db01
Binary files /dev/null and b/mssql_python/libs/linux/suse/x86_64/lib/libodbcinst.so.2 differ
diff --git a/mssql_python/libs/linux/suse/x86_64/share/resources/en_US/msodbcsqlr18.rll b/mssql_python/libs/linux/suse/x86_64/share/resources/en_US/msodbcsqlr18.rll
new file mode 100755
index 000000000..0f69236ee
Binary files /dev/null and b/mssql_python/libs/linux/suse/x86_64/share/resources/en_US/msodbcsqlr18.rll differ
diff --git a/mssql_python/logging.py b/mssql_python/logging.py
new file mode 100644
index 000000000..2cb9361f5
--- /dev/null
+++ b/mssql_python/logging.py
@@ -0,0 +1,609 @@
+"""
+Copyright (c) Microsoft Corporation.
+Licensed under the MIT license.
+
+Enhanced logging module for mssql_python with JDBC-style logging levels.
+This module provides fine-grained logging control with zero overhead when disabled.
+"""
+
+import logging
+from logging.handlers import RotatingFileHandler
+import os
+import sys
+import threading
+import datetime
+import re
+import platform
+import atexit
+from typing import Optional
+
+# Single DEBUG level - all or nothing philosophy
+# If you need logging, you need to see everything
+DEBUG = logging.DEBUG # 10
+
+# Output destination constants
+STDOUT = "stdout" # Log to stdout only
+FILE = "file" # Log to file only (default)
+BOTH = "both" # Log to both file and stdout
+
+# Allowed log file extensions
+ALLOWED_LOG_EXTENSIONS = {".txt", ".log", ".csv"}
+
+
+class ThreadIDFilter(logging.Filter):
+ """Filter that adds thread_id to all log records."""
+
+ def filter(self, record):
+ """Add thread_id (OS native) attribute to log record."""
+ # Use OS native thread ID for debugging compatibility
+ try:
+ thread_id = threading.get_native_id()
+ except AttributeError:
+ # Fallback for Python < 3.8
+ thread_id = threading.current_thread().ident
+ record.thread_id = thread_id
+ return True
+
+
+class MSSQLLogger:
+ """
+ Singleton logger for mssql_python with single DEBUG level.
+
+ Philosophy: All or nothing - if you enable logging, you see EVERYTHING.
+ Logging is a troubleshooting tool, not a production feature.
+
+ Features:
+ - Single DEBUG level (no categorization)
+ - Automatic file rotation (512MB, 5 backups)
+ - Password sanitization
+ - Trace ID support with contextvars (automatic propagation)
+ - Thread-safe operation
+ - Zero overhead when disabled (level check only)
+
+ ⚠️ Performance Warning: Logging adds ~2-5% overhead. Only enable when troubleshooting.
+ """
+
+ _instance: Optional["MSSQLLogger"] = None
+ _lock = threading.Lock()
+ _init_lock = threading.Lock() # Separate lock for initialization
+
+ def __new__(cls) -> "MSSQLLogger":
+ """Ensure singleton pattern"""
+ if cls._instance is None:
+ with cls._lock:
+ if cls._instance is None:
+ cls._instance = super(MSSQLLogger, cls).__new__(cls)
+ return cls._instance
+
+ def __init__(self):
+ """Initialize the logger (only once) - thread-safe"""
+ # Use separate lock for initialization check to prevent race condition
+ # This ensures hasattr check and assignment are atomic
+ with self._init_lock:
+ # Skip if already initialized
+ if hasattr(self, "_initialized"):
+ return
+
+ self._initialized = True
+
+ # Create the underlying Python logger
+ self._logger = logging.getLogger("mssql_python")
+ self._logger.setLevel(logging.CRITICAL) # Disabled by default
+ self._logger.propagate = False # Don't propagate to root logger
+
+ # Add trace ID filter (injects thread_id into every log record)
+ self._logger.addFilter(ThreadIDFilter())
+
+ # Output mode and handlers
+ self._output_mode = FILE # Default to file only
+ self._file_handler = None
+ self._stdout_handler = None
+ self._log_file = None
+ self._custom_log_path = None # Custom log file path (if specified)
+ self._handlers_initialized = False
+ self._handler_lock = threading.RLock() # Reentrant lock for handler operations
+ self._cleanup_registered = False # Track if atexit cleanup is registered
+
+ # Don't setup handlers yet - do it lazily when setLevel is called
+ # This prevents creating log files when user changes output mode before enabling logging
+
+ def _setup_handlers(self):
+ """
+ Setup handlers based on output mode.
+ Creates file handler and/or stdout handler as needed.
+ Thread-safe: Protects against concurrent handler removal during logging.
+ """
+ # Lock prevents race condition where one thread logs while another removes handlers
+ with self._handler_lock:
+ # Acquire locks on all existing handlers before closing
+ # This ensures no thread is mid-write when we close
+ old_handlers = self._logger.handlers[:]
+ for handler in old_handlers:
+ handler.acquire()
+
+ try:
+ # Flush and close each handler while holding its lock
+ for handler in old_handlers:
+ try:
+ handler.flush() # Flush BEFORE close
+ except:
+ pass # Ignore flush errors
+ handler.close()
+ self._logger.removeHandler(handler)
+ finally:
+ # Release locks on old handlers
+ for handler in old_handlers:
+ try:
+ handler.release()
+ except:
+ pass # Handler might already be closed
+
+ self._file_handler = None
+ self._stdout_handler = None
+
+ # Create CSV formatter
+ # Custom formatter to extract source from message and format as CSV
+ class CSVFormatter(logging.Formatter):
+ def format(self, record):
+ # Extract source from message (e.g., [Python] or [DDBC])
+ msg = record.getMessage()
+ if msg.startswith("[") and "]" in msg:
+ end_bracket = msg.index("]")
+ source = msg[1:end_bracket]
+ message = msg[end_bracket + 2 :].strip() # Skip '] '
+ else:
+ source = "Unknown"
+ message = msg
+
+ # Format timestamp with milliseconds using period separator
+ timestamp = self.formatTime(record, "%Y-%m-%d %H:%M:%S")
+ timestamp_with_ms = f"{timestamp}.{int(record.msecs):03d}"
+
+ # Get thread ID
+ thread_id = getattr(record, "thread_id", 0)
+
+ # Build CSV row
+ location = f"{record.filename}:{record.lineno}"
+ csv_row = f"{timestamp_with_ms}, {thread_id}, {record.levelname}, {location}, {source}, {message}"
+
+ return csv_row
+
+ formatter = CSVFormatter()
+
+ # Override format to use milliseconds with period separator
+ formatter.default_msec_format = "%s.%03d"
+
+ # Setup file handler if needed
+ if self._output_mode in (FILE, BOTH):
+ # Use custom path or auto-generate
+ if self._custom_log_path:
+ self._log_file = self._custom_log_path
+ # Ensure directory exists for custom path
+ log_dir = os.path.dirname(self._custom_log_path)
+ if log_dir and not os.path.exists(log_dir):
+ os.makedirs(log_dir, exist_ok=True)
+ else:
+ # Create log file in mssql_python_logs folder
+ log_dir = os.path.join(os.getcwd(), "mssql_python_logs")
+ if not os.path.exists(log_dir):
+ os.makedirs(log_dir, exist_ok=True)
+
+ timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
+ pid = os.getpid()
+ self._log_file = os.path.join(log_dir, f"mssql_python_trace_{timestamp}_{pid}.log")
+
+ # Create rotating file handler (512MB, 5 backups)
+ # Use UTF-8 encoding for unicode support on all platforms
+ self._file_handler = RotatingFileHandler(
+ self._log_file, maxBytes=512 * 1024 * 1024, backupCount=5, encoding="utf-8" # 512MB
+ )
+ self._file_handler.setFormatter(formatter)
+ self._logger.addHandler(self._file_handler)
+
+ # Write CSV header to new log file
+ self._write_log_header()
+ else:
+ # No file logging - clear the log file path
+ self._log_file = None
+
+ # Setup stdout handler if needed
+ if self._output_mode in (STDOUT, BOTH):
+ import sys
+
+ self._stdout_handler = logging.StreamHandler(sys.stdout)
+ self._stdout_handler.setFormatter(formatter)
+ self._logger.addHandler(self._stdout_handler)
+
+ def _reconfigure_handlers(self):
+ """
+ Reconfigure handlers when output mode changes.
+ Closes existing handlers and creates new ones based on current output mode.
+ """
+ self._setup_handlers()
+
+ def _cleanup_handlers(self):
+ """
+ Cleanup all handlers on process exit.
+ Registered with atexit to ensure proper file handle cleanup.
+
+ Thread-safe: Protects against concurrent logging during cleanup.
+
+ Note on RotatingFileHandler:
+ - File rotation (at 512MB) is already thread-safe
+ - doRollover() is called within emit() which holds handler.lock
+ - No additional synchronization needed for rotation
+ """
+ with self._handler_lock:
+ handlers = self._logger.handlers[:]
+ for handler in handlers:
+ handler.acquire()
+
+ try:
+ for handler in handlers:
+ try:
+ handler.flush()
+ handler.close()
+ except:
+ pass # Ignore errors during cleanup
+ self._logger.removeHandler(handler)
+ finally:
+ for handler in handlers:
+ try:
+ handler.release()
+ except:
+ pass
+
+ def _validate_log_file_extension(self, file_path: str) -> None:
+ """
+ Validate that the log file has an allowed extension.
+
+ Args:
+ file_path: Path to the log file
+
+ Raises:
+ ValueError: If the file extension is not allowed
+ """
+ _, ext = os.path.splitext(file_path)
+ ext_lower = ext.lower()
+
+ if ext_lower not in ALLOWED_LOG_EXTENSIONS:
+ allowed = ", ".join(sorted(ALLOWED_LOG_EXTENSIONS))
+ raise ValueError(
+ f"Invalid log file extension '{ext}'. " f"Allowed extensions: {allowed}"
+ )
+
+ def _write_log_header(self):
+ """
+ Write CSV header and metadata to the log file.
+ Called once when log file is created.
+ """
+ if not self._log_file or not self._file_handler:
+ return
+
+ try:
+ # Get script name from sys.argv or __main__
+ script_name = os.path.basename(sys.argv[0]) if sys.argv else ""
+
+ # Get Python version
+ python_version = platform.python_version()
+
+ # Get driver version (try to import from package)
+ try:
+ from mssql_python import __version__
+
+ driver_version = __version__
+ except:
+ driver_version = "unknown"
+
+ # Get current time
+ start_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+ # Get PID
+ pid = os.getpid()
+
+ # Get OS info
+ os_info = platform.platform()
+
+ # Build header comment line
+ header_line = f"# MSSQL-Python Driver Log | Script: {script_name} | PID: {pid} | Log Level: DEBUG | Python: {python_version} | Driver: {driver_version} | Start: {start_time} | OS: {os_info}\n"
+
+ # CSV column headers
+ csv_header = "Timestamp, ThreadID, Level, Location, Source, Message\n"
+
+ # Write directly to file (bypass formatter)
+ with open(self._log_file, "a") as f:
+ f.write(header_line)
+ f.write(csv_header)
+
+ except Exception as e:
+ # Notify on stderr so user knows why header is missing
+ try:
+ sys.stderr.write(
+ f"[MSSQL-Python] Warning: Failed to write log header to {self._log_file}: {type(e).__name__}\n"
+ )
+ sys.stderr.flush()
+ except:
+ pass # Even stderr notification failed
+ # Don't crash - logging continues without header
+
+ def _log(self, level: int, msg: str, add_prefix: bool = True, *args, **kwargs):
+ """
+ Internal logging method with exception safety.
+
+ Args:
+ level: Log level (DEBUG, INFO, WARNING, ERROR)
+ msg: Message format string
+ add_prefix: Whether to add [Python] prefix (default True)
+ *args: Arguments for message formatting
+ **kwargs: Additional keyword arguments
+
+ Note:
+ Callers are responsible for sanitizing sensitive data (passwords,
+ tokens, etc.) before logging. Use helpers.sanitize_connection_string()
+ for connection strings.
+
+ Exception Safety:
+ NEVER crashes the application. Catches all exceptions:
+ - TypeError/ValueError: Bad format string or args
+ - IOError/OSError: Disk full, permission denied
+ - UnicodeEncodeError: Encoding issues
+
+ On critical failures (ERROR level), attempts stderr fallback.
+ All other failures are silently ignored to prevent app crashes.
+ """
+ try:
+ # Fast level check (zero overhead if disabled)
+ if not self._logger.isEnabledFor(level):
+ return
+
+ # Add prefix if requested (only after level check)
+ if add_prefix:
+ msg = f"[Python] {msg}"
+
+ # Format message with args if provided
+ if args:
+ msg = msg % args
+
+ # Log the message (no args since already formatted)
+ self._logger.log(level, msg, **kwargs)
+ except Exception:
+ # Last resort: Try stderr fallback for any logging failure
+ # This helps diagnose critical issues (disk full, permission denied, etc.)
+ try:
+ import sys
+
+ level_name = logging.getLevelName(level)
+ sys.stderr.write(
+ f"[MSSQL-Python Logging Failed - {level_name}] {msg if 'msg' in locals() else 'Unable to format message'}\n"
+ )
+ sys.stderr.flush()
+ except:
+ pass # Even stderr failed - give up silently
+
+ # Convenience methods for logging
+
+ def debug(self, msg: str, *args, **kwargs):
+ """Log at DEBUG level (all diagnostic messages)"""
+ self._log(logging.DEBUG, msg, True, *args, **kwargs)
+
+ def info(self, msg: str, *args, **kwargs):
+ """Log at INFO level"""
+ self._log(logging.INFO, msg, True, *args, **kwargs)
+
+ def warning(self, msg: str, *args, **kwargs):
+ """Log at WARNING level"""
+ self._log(logging.WARNING, msg, True, *args, **kwargs)
+
+ def error(self, msg: str, *args, **kwargs):
+ """Log at ERROR level"""
+ self._log(logging.ERROR, msg, True, *args, **kwargs)
+
+ # Level control
+
+ def _setLevel(
+ self, level: int, output: Optional[str] = None, log_file_path: Optional[str] = None
+ ):
+ """
+ Internal method to set logging level (use setup_logging() instead).
+
+ Args:
+ level: Logging level (typically DEBUG)
+ output: Optional output mode (FILE, STDOUT, BOTH)
+ log_file_path: Optional custom path for log file
+
+ Raises:
+ ValueError: If output mode is invalid
+ """
+ # Validate and set output mode if specified
+ if output is not None:
+ if output not in (FILE, STDOUT, BOTH):
+ raise ValueError(
+ f"Invalid output mode: {output}. " f"Must be one of: {FILE}, {STDOUT}, {BOTH}"
+ )
+ self._output_mode = output
+
+ # Store custom log file path if provided
+ if log_file_path is not None:
+ self._validate_log_file_extension(log_file_path)
+ self._custom_log_path = log_file_path
+
+ # Setup handlers if not yet initialized or if output mode/path changed
+ # Handler setup is protected by _handler_lock inside _setup_handlers()
+ if not self._handlers_initialized or output is not None or log_file_path is not None:
+ self._setup_handlers()
+ self._handlers_initialized = True
+
+ # Register atexit cleanup on first handler setup
+ if not self._cleanup_registered:
+ atexit.register(self._cleanup_handlers)
+ self._cleanup_registered = True
+
+ # Set level (atomic operation, no lock needed)
+ self._logger.setLevel(level)
+
+ # Notify C++ bridge of level change
+ self._notify_cpp_level_change(level)
+
+ def getLevel(self) -> int:
+ """
+ Get the current logging level.
+
+ Returns:
+ int: Current log level
+ """
+ return self._logger.level
+
+ def isEnabledFor(self, level: int) -> bool:
+ """
+ Check if a given log level is enabled.
+
+ Args:
+ level: Log level to check
+
+ Returns:
+ bool: True if the level is enabled
+ """
+ return self._logger.isEnabledFor(level)
+
+ # Handler management
+
+ def addHandler(self, handler: logging.Handler):
+ """Add a handler to the logger (thread-safe)"""
+ with self._handler_lock:
+ self._logger.addHandler(handler)
+
+ def removeHandler(self, handler: logging.Handler):
+ """Remove a handler from the logger (thread-safe)"""
+ with self._handler_lock:
+ self._logger.removeHandler(handler)
+
+ @property
+ def handlers(self) -> list:
+ """Get list of handlers attached to the logger (thread-safe)"""
+ with self._handler_lock:
+ return self._logger.handlers[:] # Return copy to prevent external modification
+
+ def reset_handlers(self):
+ """
+ Reset/recreate handlers.
+ Useful when log file has been deleted or needs to be recreated.
+ """
+ self._setup_handlers()
+
+ def _notify_cpp_level_change(self, level: int):
+ """
+ Notify C++ bridge that log level has changed.
+ This updates the cached level in C++ for fast checks.
+
+ Args:
+ level: New log level
+ """
+ try:
+ # Import here to avoid circular dependency
+ from . import ddbc_bindings
+
+ if hasattr(ddbc_bindings, "update_log_level"):
+ ddbc_bindings.update_log_level(level)
+ except (ImportError, AttributeError):
+ # C++ bindings not available or not yet initialized
+ pass
+
+ # Properties
+
+ @property
+ def output(self) -> str:
+ """Get the current output mode"""
+ return self._output_mode
+
+ @output.setter
+ def output(self, mode: str):
+ """
+ Set the output mode.
+
+ Args:
+ mode: Output mode (FILE, STDOUT, or BOTH)
+
+ Raises:
+ ValueError: If mode is not a valid OutputMode value
+ """
+ if mode not in (FILE, STDOUT, BOTH):
+ raise ValueError(
+ f"Invalid output mode: {mode}. " f"Must be one of: {FILE}, {STDOUT}, {BOTH}"
+ )
+ self._output_mode = mode
+
+ # Only reconfigure if handlers were already initialized
+ if self._handlers_initialized:
+ self._reconfigure_handlers()
+
+ @property
+ def log_file(self) -> Optional[str]:
+ """Get the current log file path (None if file output is disabled)"""
+ return self._log_file
+
+ @property
+ def level(self) -> int:
+ """Get the current logging level"""
+ return self._logger.level
+
+
+# ============================================================================
+# Module-level exports (Primary API)
+# ============================================================================
+
+# Singleton logger instance
+logger = MSSQLLogger()
+
+# Expose the underlying Python logger for use in application code
+# This allows applications to access the same logger used by the driver
+# Usage: from mssql_python.logging import driver_logger
+driver_logger = logger._logger
+
+# ============================================================================
+# Primary API - setup_logging()
+# ============================================================================
+
+
+def setup_logging(output: str = "file", log_file_path: Optional[str] = None):
+ """
+ Enable DEBUG logging for troubleshooting.
+
+ ⚠️ PERFORMANCE WARNING: Logging adds ~2-5% overhead.
+ Only enable when investigating issues. Do NOT enable in production without reason.
+
+ Philosophy: All or nothing - if you need logging, you need to see EVERYTHING.
+ Logging is a troubleshooting tool, not a production monitoring solution.
+
+ Args:
+ output: Where to send logs (default: 'file')
+ Options: 'file', 'stdout', 'both'
+ log_file_path: Optional custom path for log file
+ Must have extension: .txt, .log, or .csv
+ If not specified, auto-generates in ./mssql_python_logs/
+
+ Examples:
+ import mssql_python
+
+ # File only (default, in mssql_python_logs folder)
+ mssql_python.setup_logging()
+
+ # Stdout only (for CI/CD)
+ mssql_python.setup_logging(output='stdout')
+
+ # Both file and stdout (for development)
+ mssql_python.setup_logging(output='both')
+
+ # Custom log file path (must use .txt, .log, or .csv extension)
+ mssql_python.setup_logging(log_file_path="/var/log/myapp.log")
+ mssql_python.setup_logging(log_file_path="/tmp/debug.txt")
+ mssql_python.setup_logging(log_file_path="/tmp/data.csv")
+
+ # Custom path with both outputs
+ mssql_python.setup_logging(output='both', log_file_path="/tmp/debug.log")
+
+ Future Enhancement:
+ For performance analysis, use the universal profiler (coming soon)
+ instead of logging. Logging is not designed for performance measurement.
+ """
+ logger._setLevel(logging.DEBUG, output, log_file_path)
+ return logger
diff --git a/mssql_python/logging_config.py b/mssql_python/logging_config.py
deleted file mode 100644
index 2e9eaaeaf..000000000
--- a/mssql_python/logging_config.py
+++ /dev/null
@@ -1,164 +0,0 @@
-"""
-Copyright (c) Microsoft Corporation.
-Licensed under the MIT license.
-This module provides logging configuration for the mssql_python package.
-"""
-
-import logging
-from logging.handlers import RotatingFileHandler
-import os
-import sys
-import datetime
-
-
-class LoggingManager:
- """
- Singleton class to manage logging configuration for the mssql_python package.
- This class provides a centralized way to manage logging configuration and replaces
- the previous approach using global variables.
- """
- _instance = None
- _initialized = False
- _logger = None
- _log_file = None
-
- def __new__(cls):
- if cls._instance is None:
- cls._instance = super(LoggingManager, cls).__new__(cls)
- return cls._instance
-
- def __init__(self):
- if not self._initialized:
- self._initialized = True
- self._enabled = False
-
- @classmethod
- def is_logging_enabled(cls):
- """Class method to check if logging is enabled for backward compatibility"""
- if cls._instance is None:
- return False
- return cls._instance._enabled
-
- @property
- def enabled(self):
- """Check if logging is enabled"""
- return self._enabled
-
- @property
- def log_file(self):
- """Get the current log file path"""
- return self._log_file
-
- def setup(self, mode="file", log_level=logging.DEBUG):
- """
- Set up logging configuration.
-
- This method configures the logging settings for the application.
- It sets the log level, format, and log file location.
-
- Args:
- mode (str): The logging mode ('file' or 'stdout').
- log_level (int): The logging level (default: logging.DEBUG).
- """
- # Enable logging
- self._enabled = True
-
- # Create a logger for mssql_python module
- # Use a consistent logger name to ensure we're using the same logger throughout
- self._logger = logging.getLogger("mssql_python")
- self._logger.setLevel(log_level)
-
- # Configure the root logger to ensure all messages are captured
- root_logger = logging.getLogger()
- root_logger.setLevel(log_level)
-
- # Make sure the logger propagates to the root logger
- self._logger.propagate = True
-
- # Clear any existing handlers to avoid duplicates during re-initialization
- if self._logger.handlers:
- self._logger.handlers.clear()
-
- # Construct the path to the log file
- # Directory for log files - currentdir/logs
- current_dir = os.path.dirname(os.path.abspath(__file__))
- log_dir = os.path.join(current_dir, 'logs')
- # exist_ok=True allows the directory to be created if it doesn't exist
- os.makedirs(log_dir, exist_ok=True)
-
- # Generate timestamp-based filename for better sorting and organization
- timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
- self._log_file = os.path.join(log_dir, f'mssql_python_trace_{timestamp}_{os.getpid()}.log')
-
- # Create a log handler to log to driver specific file
- # By default we only want to log to a file, max size 500MB, and keep 5 backups
- file_handler = RotatingFileHandler(self._log_file, maxBytes=512*1024*1024, backupCount=5)
- file_handler.setLevel(log_level)
-
- # Create a custom formatter that adds [Python Layer log] prefix only to non-DDBC messages
- class PythonLayerFormatter(logging.Formatter):
- def format(self, record):
- message = record.getMessage()
- # Don't add [Python Layer log] prefix if the message already has [DDBC Bindings log] or [Python Layer log]
- if "[DDBC Bindings log]" not in message and "[Python Layer log]" not in message:
- # Create a copy of the record to avoid modifying the original
- new_record = logging.makeLogRecord(record.__dict__)
- new_record.msg = f"[Python Layer log] {record.msg}"
- return super().format(new_record)
- return super().format(record)
-
- # Use our custom formatter
- formatter = PythonLayerFormatter('%(asctime)s - %(levelname)s - %(filename)s - %(message)s')
- file_handler.setFormatter(formatter)
- self._logger.addHandler(file_handler)
-
- if mode == 'stdout':
- # If the mode is stdout, then we want to log to the console as well
- stdout_handler = logging.StreamHandler(sys.stdout)
- stdout_handler.setLevel(log_level)
- # Use the same smart formatter
- stdout_handler.setFormatter(formatter)
- self._logger.addHandler(stdout_handler)
- elif mode != 'file':
- raise ValueError(f'Invalid logging mode: {mode}')
-
- return self._logger
-
- def get_logger(self):
- """
- Get the logger instance.
-
- Returns:
- logging.Logger: The logger instance, or None if logging is not enabled.
- """
- if not self.enabled:
- # If logging is not enabled, return None
- return None
- return self._logger
-
-
-# Create a singleton instance
-_manager = LoggingManager()
-
-def setup_logging(mode="file", log_level=logging.DEBUG):
- """
- Set up logging configuration.
-
- This is a wrapper around the LoggingManager.setup method for backward compatibility.
-
- Args:
- mode (str): The logging mode ('file' or 'stdout').
- log_level (int): The logging level (default: logging.DEBUG).
- """
- return _manager.setup(mode, log_level)
-
-def get_logger():
- """
- Get the logger instance.
-
- This is a wrapper around the LoggingManager.get_logger method for backward compatibility.
-
- Returns:
- logging.Logger: The logger instance.
- """
- return _manager.get_logger()
\ No newline at end of file
diff --git a/mssql_python/mssql_python.pyi b/mssql_python/mssql_python.pyi
index 9f41d58dd..dd3fd96a0 100644
--- a/mssql_python/mssql_python.pyi
+++ b/mssql_python/mssql_python.pyi
@@ -1,192 +1,363 @@
"""
Copyright (c) Microsoft Corporation.
Licensed under the MIT license.
+Type stubs for mssql_python package - based on actual public API
"""
-from typing import Final, Union
+from typing import Any, Dict, List, Optional, Union, Tuple, Sequence, Callable, Iterator
import datetime
+import logging
+
+# GLOBALS - DB-API 2.0 Required Module Globals
+# https://www.python.org/dev/peps/pep-0249/#module-interface
+apilevel: str # "2.0"
+paramstyle: str # "qmark"
+threadsafety: int # 1
+
+# Module Settings - Properties that can be get/set at module level
+lowercase: bool # Controls column name case behavior
+native_uuid: bool # Controls UUID type handling
+
+# Settings Class
+class Settings:
+ lowercase: bool
+ decimal_separator: str
+ native_uuid: bool
+ def __init__(self) -> None: ...
+
+# Module-level Configuration Functions
+def get_settings() -> Settings: ...
+def setDecimalSeparator(separator: str) -> None: ...
+def getDecimalSeparator() -> str: ...
+def pooling(max_size: int = 100, idle_timeout: int = 600, enabled: bool = True) -> None: ...
+def get_info_constants() -> Dict[str, int]: ...
-# GLOBALS
-# Read-Only
-apilevel: Final[str] = "2.0"
-paramstyle: Final[str] = "pyformat"
-threadsafety: Final[int] = 1
+# Logging Functions
+def setup_logging(mode: str = "file", log_level: int = logging.DEBUG) -> None: ...
+def get_logger() -> Optional[logging.Logger]: ...
-# Type Objects
+# DB-API 2.0 Type Objects
# https://www.python.org/dev/peps/pep-0249/#type-objects
class STRING:
- """
- This type object is used to describe columns in a database that are string-based (e.g. CHAR).
- """
+ """Type object for string-based database columns (e.g. CHAR, VARCHAR)."""
- def __init__(self) -> None: ...
+ ...
class BINARY:
- """
- This type object is used to describe (long)
- binary columns in a database (e.g. LONG, RAW, BLOBs).
- """
+ """Type object for binary database columns (e.g. BINARY, VARBINARY)."""
- def __init__(self) -> None: ...
+ ...
class NUMBER:
- """
- This type object is used to describe numeric columns in a database.
- """
+ """Type object for numeric database columns (e.g. INT, DECIMAL)."""
- def __init__(self) -> None: ...
+ ...
class DATETIME:
- """
- This type object is used to describe date/time columns in a database.
- """
+ """Type object for date/time database columns (e.g. DATE, TIMESTAMP)."""
- def __init__(self) -> None: ...
+ ...
class ROWID:
- """
- This type object is used to describe the “Row ID” column in a database.
- """
+ """Type object for row identifier columns."""
- def __init__(self) -> None: ...
+ ...
-# Type Constructors
+# DB-API 2.0 Type Constructors
+# https://www.python.org/dev/peps/pep-0249/#type-constructors
def Date(year: int, month: int, day: int) -> datetime.date: ...
def Time(hour: int, minute: int, second: int) -> datetime.time: ...
def Timestamp(
- year: int, month: int, day: int, hour: int, minute: int, second: int, microsecond: int
+ year: int,
+ month: int,
+ day: int,
+ hour: int,
+ minute: int,
+ second: int,
+ microsecond: int,
) -> datetime.datetime: ...
def DateFromTicks(ticks: int) -> datetime.date: ...
def TimeFromTicks(ticks: int) -> datetime.time: ...
def TimestampFromTicks(ticks: int) -> datetime.datetime: ...
-def Binary(string: str) -> bytes: ...
+def Binary(value: Union[str, bytes, bytearray]) -> bytes: ...
-# Exceptions
+# DB-API 2.0 Exception Hierarchy
# https://www.python.org/dev/peps/pep-0249/#exceptions
-class Warning(Exception): ...
-class Error(Exception): ...
-class InterfaceError(Error): ...
-class DatabaseError(Error): ...
-class DataError(DatabaseError): ...
-class OperationalError(DatabaseError): ...
-class IntegrityError(DatabaseError): ...
-class InternalError(DatabaseError): ...
-class ProgrammingError(DatabaseError): ...
-class NotSupportedError(DatabaseError): ...
-
-# Connection Objects
-class Connection:
- """
- Connection object for interacting with the database.
+class Warning(Exception):
+ def __init__(self, driver_error: str, ddbc_error: str) -> None: ...
+ driver_error: str
+ ddbc_error: str
+ message: str
+
+class Error(Exception):
+ def __init__(self, driver_error: str, ddbc_error: str) -> None: ...
+ driver_error: str
+ ddbc_error: str
+ message: str
+
+class InterfaceError(Error):
+ def __init__(self, driver_error: str, ddbc_error: str) -> None: ...
+
+class DatabaseError(Error):
+ def __init__(self, driver_error: str, ddbc_error: str) -> None: ...
+
+class DataError(DatabaseError):
+ def __init__(self, driver_error: str, ddbc_error: str) -> None: ...
+
+class OperationalError(DatabaseError):
+ def __init__(self, driver_error: str, ddbc_error: str) -> None: ...
+
+class IntegrityError(DatabaseError):
+ def __init__(self, driver_error: str, ddbc_error: str) -> None: ...
- https://www.python.org/dev/peps/pep-0249/#connection-objects
+class InternalError(DatabaseError):
+ def __init__(self, driver_error: str, ddbc_error: str) -> None: ...
- This class should not be instantiated directly, instead call global connect() method to
- create a Connection object.
+class ProgrammingError(DatabaseError):
+ def __init__(self, driver_error: str, ddbc_error: str) -> None: ...
+
+class NotSupportedError(DatabaseError):
+ def __init__(self, driver_error: str, ddbc_error: str) -> None: ...
+
+# Row Object
+class Row:
"""
+ Represents a database result row.
- def cursor(self) -> "Cursor":
- """
- Return a new Cursor object using the connection.
- """
- ...
-
- def commit(self) -> None:
- """
- Commit the current transaction.
- """
- ...
-
- def rollback(self) -> None:
- """
- Roll back the current transaction.
- """
- ...
-
- def close(self) -> None:
- """
- Close the connection now.
- """
- ...
-
-# Cursor Objects
-class Cursor:
+ Supports both index-based and name-based column access.
"""
- Cursor object for executing SQL queries and fetching results.
- https://www.python.org/dev/peps/pep-0249/#cursor-objects
+ def __init__(
+ self,
+ cursor: "Cursor",
+ description: List[
+ Tuple[
+ str,
+ Any,
+ Optional[int],
+ Optional[int],
+ Optional[int],
+ Optional[int],
+ Optional[bool],
+ ]
+ ],
+ values: List[Any],
+ column_map: Optional[Dict[str, int]] = None,
+ settings_snapshot: Optional[Dict[str, Any]] = None,
+ ) -> None: ...
+ def __getitem__(self, index: int) -> Any: ...
+ def __getattr__(self, name: str) -> Any: ...
+ def __eq__(self, other: Any) -> bool: ...
+ def __len__(self) -> int: ...
+ def __iter__(self) -> Iterator[Any]: ...
+ def __str__(self) -> str: ...
+ def __repr__(self) -> str: ...
+
+# DB-API 2.0 Cursor Object
+# https://www.python.org/dev/peps/pep-0249/#cursor-objects
+class Cursor:
+ """
+ Database cursor for executing SQL operations and fetching results.
- This class should not be instantiated directly, instead call cursor() from a Connection
- object to create a Cursor object.
+ This class should not be instantiated directly. Use Connection.cursor() instead.
"""
+ # DB-API 2.0 Required Attributes
+ description: Optional[
+ List[
+ Tuple[
+ str,
+ Any,
+ Optional[int],
+ Optional[int],
+ Optional[int],
+ Optional[int],
+ Optional[bool],
+ ]
+ ]
+ ]
+ rowcount: int
+ arraysize: int
+
+ # Extension Attributes
+ closed: bool
+ messages: List[str]
+
+ @property
+ def rownumber(self) -> int: ...
+ @property
+ def connection(self) -> "Connection": ...
+ def __init__(self, connection: "Connection", timeout: int = 0) -> None: ...
+
+ # DB-API 2.0 Required Methods
def callproc(
- self, procname: str, parameters: Union[None, list] = None
- ) -> Union[None, list]:
- """
- Call a stored database procedure with the given name.
- """
- ...
-
- def close(self) -> None:
- """
- Close the cursor now.
- """
- ...
-
+ self, procname: str, parameters: Optional[Sequence[Any]] = None
+ ) -> Optional[Sequence[Any]]: ...
+ def close(self) -> None: ...
def execute(
- self, operation: str, parameters: Union[None, list, dict] = None
- ) -> None:
- """
- Prepare and execute a database operation (query or command).
- """
- ...
-
- def executemany(self, operation: str, seq_of_parameters: list) -> None:
- """
- Prepare a database operation and execute it against all parameter sequences.
- """
- ...
-
- def fetchone(self) -> Union[None, tuple]:
- """
- Fetch the next row of a query result set.
- """
- ...
-
- def fetchmany(self, size: int = None) -> list:
- """
- Fetch the next set of rows of a query result.
- """
- ...
-
- def fetchall(self) -> list:
- """
- Fetch all (remaining) rows of a query result.
- """
- ...
-
- def nextset(self) -> Union[None, bool]:
- """
- Skip to the next available result set.
- """
- ...
-
- def setinputsizes(self, sizes: list) -> None:
- """
- Predefine memory areas for the operation’s parameters.
- """
- ...
-
- def setoutputsize(self, size: int, column: int = None) -> None:
- """
- Set a column buffer size for fetches of large columns.
- """
- ...
-
-# Module Functions
-def connect(connection_str: str) -> Connection:
+ self,
+ operation: str,
+ *parameters: Any,
+ use_prepare: bool = True,
+ reset_cursor: bool = True,
+ ) -> "Cursor": ...
+ def executemany(self, operation: str, seq_of_parameters: List[Sequence[Any]]) -> None: ...
+ def fetchone(self) -> Optional[Row]: ...
+ def fetchmany(self, size: Optional[int] = None) -> List[Row]: ...
+ def fetchall(self) -> List[Row]: ...
+ def nextset(self) -> Optional[bool]: ...
+ def setinputsizes(self, sizes: List[Union[int, Tuple[Any, ...]]]) -> None: ...
+ def setoutputsize(self, size: int, column: Optional[int] = None) -> None: ...
+
+# DB-API 2.0 Connection Object
+# https://www.python.org/dev/peps/pep-0249/#connection-objects
+class Connection:
"""
- Constructor for creating a connection to the database.
+ Database connection object.
+
+ This class should not be instantiated directly. Use the connect() function instead.
"""
- ...
+
+ # DB-API 2.0 Exception Attributes
+ Warning: type[Warning]
+ Error: type[Error]
+ InterfaceError: type[InterfaceError]
+ DatabaseError: type[DatabaseError]
+ DataError: type[DataError]
+ OperationalError: type[OperationalError]
+ IntegrityError: type[IntegrityError]
+ InternalError: type[InternalError]
+ ProgrammingError: type[ProgrammingError]
+ NotSupportedError: type[NotSupportedError]
+
+ # Connection Properties
+ @property
+ def timeout(self) -> int: ...
+ @timeout.setter
+ def timeout(self, value: int) -> None: ...
+ @property
+ def autocommit(self) -> bool: ...
+ @autocommit.setter
+ def autocommit(self, value: bool) -> None: ...
+ @property
+ def searchescape(self) -> str: ...
+ def __init__(
+ self,
+ connection_str: str = "",
+ autocommit: bool = False,
+ attrs_before: Optional[Dict[int, Union[int, str, bytes]]] = None,
+ timeout: int = 0,
+ **kwargs: Any,
+ ) -> None: ...
+
+ # DB-API 2.0 Required Methods
+ def cursor(self) -> Cursor: ...
+ def commit(self) -> None: ...
+ def rollback(self) -> None: ...
+ def close(self) -> None: ...
+
+ # Extension Methods
+ def setautocommit(self, value: bool = False) -> None: ...
+ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = None) -> None: ...
+ def getencoding(self) -> Dict[str, Union[str, int]]: ...
+ def setdecoding(
+ self, sqltype: int, encoding: Optional[str] = None, ctype: Optional[int] = None
+ ) -> None: ...
+ def getdecoding(self, sqltype: int) -> Dict[str, Union[str, int]]: ...
+ def set_attr(self, attribute: int, value: Union[int, str, bytes, bytearray]) -> None: ...
+ def add_output_converter(self, sqltype: int, func: Callable[[Any], Any]) -> None: ...
+ def get_output_converter(self, sqltype: Union[int, type]) -> Optional[Callable[[Any], Any]]: ...
+ def remove_output_converter(self, sqltype: Union[int, type]) -> None: ...
+ def clear_output_converters(self) -> None: ...
+ def execute(self, sql: str, *args: Any) -> Cursor: ...
+ def batch_execute(
+ self,
+ statements: List[str],
+ params: Optional[List[Union[None, Any, Tuple[Any, ...], List[Any]]]] = None,
+ reuse_cursor: Optional[Cursor] = None,
+ auto_close: bool = False,
+ ) -> Tuple[List[Union[List[Row], int]], Cursor]: ...
+ def getinfo(self, info_type: int) -> Union[str, int, bool, None]: ...
+
+ # Context Manager Support
+ def __enter__(self) -> "Connection": ...
+ def __exit__(self, *args: Any) -> None: ...
+
+# Module Connection Function
+def connect(
+ connection_str: str = "",
+ autocommit: bool = False,
+ attrs_before: Optional[Dict[int, Union[int, str, bytes]]] = None,
+ timeout: int = 0,
+ **kwargs: Any,
+) -> Connection: ...
+
+# SQL Type Constants
+SQL_CHAR: int
+SQL_VARCHAR: int
+SQL_LONGVARCHAR: int
+SQL_WCHAR: int
+SQL_WVARCHAR: int
+SQL_WLONGVARCHAR: int
+SQL_DECIMAL: int
+SQL_NUMERIC: int
+SQL_BIT: int
+SQL_TINYINT: int
+SQL_SMALLINT: int
+SQL_INTEGER: int
+SQL_BIGINT: int
+SQL_REAL: int
+SQL_FLOAT: int
+SQL_DOUBLE: int
+SQL_BINARY: int
+SQL_VARBINARY: int
+SQL_LONGVARBINARY: int
+SQL_DATE: int
+SQL_TIME: int
+SQL_TIMESTAMP: int
+SQL_WMETADATA: int
+
+# Connection Attribute Constants
+SQL_ATTR_ACCESS_MODE: int
+SQL_ATTR_CONNECTION_TIMEOUT: int
+SQL_ATTR_CURRENT_CATALOG: int
+SQL_ATTR_LOGIN_TIMEOUT: int
+SQL_ATTR_PACKET_SIZE: int
+SQL_ATTR_TXN_ISOLATION: int
+
+# Transaction Isolation Level Constants
+SQL_TXN_READ_UNCOMMITTED: int
+SQL_TXN_READ_COMMITTED: int
+SQL_TXN_REPEATABLE_READ: int
+SQL_TXN_SERIALIZABLE: int
+
+# Access Mode Constants
+SQL_MODE_READ_WRITE: int
+SQL_MODE_READ_ONLY: int
+
+# GetInfo Constants for Connection.getinfo()
+SQL_DRIVER_NAME: int
+SQL_DRIVER_VER: int
+SQL_DRIVER_ODBC_VER: int
+SQL_DATA_SOURCE_NAME: int
+SQL_DATABASE_NAME: int
+SQL_SERVER_NAME: int
+SQL_USER_NAME: int
+SQL_SQL_CONFORMANCE: int
+SQL_KEYWORDS: int
+SQL_IDENTIFIER_QUOTE_CHAR: int
+SQL_SEARCH_PATTERN_ESCAPE: int
+SQL_CATALOG_TERM: int
+SQL_SCHEMA_TERM: int
+SQL_TABLE_TERM: int
+SQL_PROCEDURE_TERM: int
+SQL_TXN_CAPABLE: int
+SQL_DEFAULT_TXN_ISOLATION: int
+SQL_NUMERIC_FUNCTIONS: int
+SQL_STRING_FUNCTIONS: int
+SQL_DATETIME_FUNCTIONS: int
+SQL_MAX_COLUMN_NAME_LEN: int
+SQL_MAX_TABLE_NAME_LEN: int
+SQL_MAX_SCHEMA_NAME_LEN: int
+SQL_MAX_CATALOG_NAME_LEN: int
+SQL_MAX_IDENTIFIER_LEN: int
diff --git a/mssql_python/msvcp140.dll b/mssql_python/msvcp140.dll
deleted file mode 100644
index 0a9b13d75..000000000
Binary files a/mssql_python/msvcp140.dll and /dev/null differ
diff --git a/mssql_python/parameter_helper.py b/mssql_python/parameter_helper.py
new file mode 100644
index 000000000..77cd2259f
--- /dev/null
+++ b/mssql_python/parameter_helper.py
@@ -0,0 +1,348 @@
+"""
+Copyright (c) Microsoft Corporation.
+Licensed under the MIT license.
+
+Parameter style conversion helpers for mssql-python.
+
+Supports both qmark (?) and pyformat (%(name)s) parameter styles.
+Simple character scanning approach - does NOT parse SQL contexts.
+
+Reference: https://www.python.org/dev/peps/pep-0249/#paramstyle
+"""
+
+from typing import Dict, List, Tuple, Any, Union
+from mssql_python.logging import logger
+
+# Distinctive marker for escaped percent signs during pyformat conversion
+# Uses a unique prefix/suffix that's extremely unlikely to appear in real SQL
+_ESCAPED_PERCENT_MARKER = "__MSSQL_PYFORMAT_ESCAPED_PERCENT_PLACEHOLDER__"
+
+
+def parse_pyformat_params(sql: str) -> List[str]:
+ """
+ Extract %(name)s parameter names from SQL string.
+
+ Uses simple character scanning approach - does NOT parse SQL contexts
+ (strings, comments, identifiers). This means %(name)s patterns inside SQL
+ string literals or comments WILL be detected as parameters.
+
+ Args:
+ sql: SQL query string with %(name)s placeholders
+
+ Returns:
+ List of parameter names in order of appearance (with duplicates if reused)
+
+ Examples:
+ >>> parse_pyformat_params("SELECT * FROM users WHERE id = %(id)s")
+ ['id']
+
+ >>> parse_pyformat_params("WHERE name = %(name)s OR email = %(name)s")
+ ['name', 'name']
+
+ >>> parse_pyformat_params("SELECT * FROM %(table)s WHERE id = %(id)s")
+ ['table', 'id']
+ """
+ logger.debug(
+ "parse_pyformat_params: Starting parse - sql_length=%d, sql_preview=%s",
+ len(sql),
+ sql[:100] if len(sql) > 100 else sql,
+ )
+ params = []
+ i = 0
+ length = len(sql)
+
+ while i < length:
+ # Look for %(
+ if i + 2 < length and sql[i] == "%" and sql[i + 1] == "(":
+ # Find the closing )
+ j = i + 2
+ while j < length and sql[j] != ")":
+ j += 1
+
+ # Check if we found ) and it's followed by 's'
+ if j < length and sql[j] == ")":
+ if j + 1 < length and sql[j + 1] == "s":
+ # Extract parameter name
+ param_name = sql[i + 2 : j]
+ params.append(param_name)
+ logger.debug(
+ "parse_pyformat_params: Found parameter '%s' at position %d",
+ param_name,
+ i,
+ )
+ i = j + 2
+ continue
+
+ i += 1
+
+ logger.debug(
+ "parse_pyformat_params: Completed - found %d parameters: %s",
+ len(params),
+ params,
+ )
+ return params
+
+
+def convert_pyformat_to_qmark(sql: str, param_dict: Dict[str, Any]) -> Tuple[str, Tuple[Any, ...]]:
+ """
+ Convert pyformat-style query to qmark-style for ODBC execution.
+
+ Validates that all required parameters are present and builds a positional
+ parameter tuple. Supports parameter reuse (same parameter appearing multiple times).
+
+ Args:
+ sql: SQL query with %(name)s placeholders
+ param_dict: Dictionary of parameter values
+
+ Returns:
+ Tuple of (rewritten_sql_with_?, positional_params_tuple)
+
+ Raises:
+ KeyError: If required parameter is missing from param_dict
+
+ Examples:
+ >>> convert_pyformat_to_qmark(
+ ... "SELECT * FROM users WHERE id = %(id)s",
+ ... {"id": 42}
+ ... )
+ ("SELECT * FROM users WHERE id = ?", (42,))
+
+ >>> convert_pyformat_to_qmark(
+ ... "WHERE name = %(name)s OR email = %(name)s",
+ ... {"name": "alice"}
+ ... )
+ ("WHERE name = ? OR email = ?", ("alice", "alice"))
+ """
+ logger.debug(
+ "convert_pyformat_to_qmark: Starting conversion - sql_length=%d, param_count=%d",
+ len(sql),
+ len(param_dict),
+ )
+ logger.debug(
+ "convert_pyformat_to_qmark: SQL preview: %s",
+ sql[:200] if len(sql) > 200 else sql,
+ )
+ logger.debug(
+ "convert_pyformat_to_qmark: Parameters provided: %s",
+ list(param_dict.keys()),
+ )
+
+ # Support %% escaping - replace %% with a placeholder before parsing
+ # This allows users to have literal % in their SQL
+ escaped_sql = sql.replace("%%", _ESCAPED_PERCENT_MARKER)
+
+ if "%%" in sql:
+ logger.debug(
+ "convert_pyformat_to_qmark: Detected %d escaped percent sequences (%%%%)",
+ sql.count("%%"),
+ )
+
+ # Extract parameter names in order
+ param_names = parse_pyformat_params(escaped_sql)
+
+ if not param_names:
+ logger.debug(
+ "convert_pyformat_to_qmark: No pyformat parameters found - returning SQL as-is"
+ )
+ # No parameters found - restore escaped %% and return as-is
+ restored_sql = escaped_sql.replace(_ESCAPED_PERCENT_MARKER, "%")
+ return restored_sql, ()
+
+ logger.debug(
+ "convert_pyformat_to_qmark: Extracted %d parameter references (with duplicates): %s",
+ len(param_names),
+ param_names,
+ )
+ logger.debug(
+ "convert_pyformat_to_qmark: Unique parameters needed: %s",
+ sorted(set(param_names)),
+ )
+
+ # Validate all required parameters are present
+ missing = set(param_names) - set(param_dict.keys())
+ if missing:
+ # Provide helpful error message
+ missing_list = sorted(missing)
+ required_list = sorted(set(param_names))
+ provided_list = sorted(param_dict.keys())
+
+ logger.error(
+ "convert_pyformat_to_qmark: Missing parameters - required=%s, provided=%s, missing=%s",
+ required_list,
+ provided_list,
+ missing_list,
+ )
+
+ error_msg = (
+ f"Missing required parameter(s): {', '.join(repr(p) for p in missing_list)}. "
+ f"Query requires: {required_list}, provided: {provided_list}"
+ )
+ raise KeyError(error_msg)
+
+ # Build positional parameter tuple (with duplicates if param reused)
+ positional_params = tuple(param_dict[name] for name in param_names)
+
+ logger.debug(
+ "convert_pyformat_to_qmark: Built positional params tuple - length=%d",
+ len(positional_params),
+ )
+
+ # Replace %(name)s with ? using simple string replacement
+ # We replace each unique parameter name to avoid issues with overlapping names
+ rewritten_sql = escaped_sql
+ unique_params = set(param_names)
+ logger.debug(
+ "convert_pyformat_to_qmark: Replacing %d unique parameter placeholders with ?",
+ len(unique_params),
+ )
+
+ for param_name in unique_params: # Use set to avoid duplicate replacements
+ pattern = f"%({param_name})s"
+ occurrences = rewritten_sql.count(pattern)
+ rewritten_sql = rewritten_sql.replace(pattern, "?")
+ logger.debug(
+ "convert_pyformat_to_qmark: Replaced parameter '%s' (%d occurrences)",
+ param_name,
+ occurrences,
+ )
+
+ # Restore escaped %% back to %
+ if _ESCAPED_PERCENT_MARKER in rewritten_sql:
+ marker_count = rewritten_sql.count(_ESCAPED_PERCENT_MARKER)
+ rewritten_sql = rewritten_sql.replace(_ESCAPED_PERCENT_MARKER, "%")
+ logger.debug(
+ "convert_pyformat_to_qmark: Restored %d escaped percent markers to %%",
+ marker_count,
+ )
+
+ logger.debug(
+ "convert_pyformat_to_qmark: Conversion complete - result_sql_length=%d, param_count=%d",
+ len(rewritten_sql),
+ len(positional_params),
+ )
+ logger.debug(
+ "convert_pyformat_to_qmark: Result SQL preview: %s",
+ rewritten_sql[:200] if len(rewritten_sql) > 200 else rewritten_sql,
+ )
+
+ logger.debug(
+ "Converted pyformat to qmark: params=%s, positional=%s",
+ list(param_dict.keys()),
+ positional_params,
+ )
+
+ return rewritten_sql, positional_params
+
+
+def detect_and_convert_parameters(
+ sql: str, parameters: Union[None, Tuple, List, Dict]
+) -> Tuple[str, Union[None, Tuple, List]]:
+ """
+ Auto-detect parameter style and convert to qmark if needed.
+
+ Detects parameter style based on the type of parameters:
+ - None: No parameters
+ - Tuple/List: qmark style (?) - pass through unchanged
+ - Dict: pyformat style (%(name)s) - convert to qmark
+
+ Args:
+ sql: SQL query string
+ parameters: Parameters in any supported format
+
+ Returns:
+ Tuple of (sql, parameters) where parameters are in qmark format
+
+ Raises:
+ TypeError: If parameters type doesn't match placeholders in SQL
+ KeyError: If required pyformat parameter is missing
+
+ Examples:
+ >>> detect_and_convert_parameters(
+ ... "SELECT * FROM users WHERE id = ?",
+ ... (42,)
+ ... )
+ ("SELECT * FROM users WHERE id = ?", (42,))
+
+ >>> detect_and_convert_parameters(
+ ... "SELECT * FROM users WHERE id = %(id)s",
+ ... {"id": 42}
+ ... )
+ ("SELECT * FROM users WHERE id = ?", (42,))
+ """
+ logger.debug(
+ "detect_and_convert_parameters: Starting - sql_length=%d, parameters_type=%s",
+ len(sql),
+ type(parameters).__name__ if parameters is not None else "None",
+ )
+
+ # No parameters
+ if parameters is None:
+ logger.debug("detect_and_convert_parameters: No parameters provided - returning as-is")
+ return sql, None
+
+ # Qmark style - tuple or list
+ if isinstance(parameters, (tuple, list)):
+ logger.debug(
+ "detect_and_convert_parameters: Detected qmark-style parameters (%s) - count=%d",
+ type(parameters).__name__,
+ len(parameters),
+ )
+
+ # Check if SQL has pyformat placeholders
+ param_names = parse_pyformat_params(sql)
+ if param_names:
+ logger.error(
+ "detect_and_convert_parameters: Parameter style mismatch - SQL has pyformat placeholders %s but received %s",
+ param_names,
+ type(parameters).__name__,
+ )
+ # SQL has %(name)s but user passed tuple/list
+ raise TypeError(
+ f"Parameter style mismatch: query uses named placeholders (%(name)s), "
+ f"but {type(parameters).__name__} was provided. "
+ f"Use dict for named parameters. Example: "
+ f'cursor.execute(sql, {{"param1": value1, "param2": value2}})'
+ )
+
+ # Valid qmark style - pass through
+ logger.debug("detect_and_convert_parameters: Valid qmark style - passing through unchanged")
+ return sql, parameters
+
+ # Pyformat style - dict
+ if isinstance(parameters, dict):
+ logger.debug(
+ "detect_and_convert_parameters: Detected pyformat-style parameters (dict) - count=%d, keys=%s",
+ len(parameters),
+ list(parameters.keys()),
+ )
+
+ # Check if SQL appears to have qmark placeholders
+ if "?" in sql and not parse_pyformat_params(sql):
+ logger.error(
+ "detect_and_convert_parameters: Parameter style mismatch - SQL has ? placeholders but received dict"
+ )
+ # SQL has ? but user passed dict and no %(name)s found
+ raise TypeError(
+ f"Parameter style mismatch: query uses positional placeholders (?), "
+ f"but dict was provided. "
+ f"Use tuple/list for positional parameters. Example: "
+ f"cursor.execute(sql, (value1, value2))"
+ )
+
+ logger.debug("detect_and_convert_parameters: Valid pyformat style - converting to qmark")
+ # Convert pyformat to qmark
+ converted_sql, qmark_params = convert_pyformat_to_qmark(sql, parameters)
+ logger.debug(
+ "detect_and_convert_parameters: Conversion complete - qmark_param_count=%d",
+ len(qmark_params) if qmark_params else 0,
+ )
+ return converted_sql, qmark_params
+
+ # Unsupported type
+ logger.error(
+ "detect_and_convert_parameters: Unsupported parameter type - %s",
+ type(parameters).__name__,
+ )
+ raise TypeError(
+ f"Parameters must be tuple, list, dict, or None. " f"Got {type(parameters).__name__}"
+ )
diff --git a/mssql_python/pooling.py b/mssql_python/pooling.py
index 3658242a2..a2811d9f1 100644
--- a/mssql_python/pooling.py
+++ b/mssql_python/pooling.py
@@ -1,47 +1,139 @@
-# mssql_python/pooling.py
+"""
+Copyright (c) Microsoft Corporation.
+Licensed under the MIT license.
+This module provides connection pooling functionality for the mssql_python package.
+"""
+
import atexit
-from mssql_python import ddbc_bindings
import threading
+from typing import Dict
+
+from mssql_python import ddbc_bindings
+from mssql_python.logging import logger
+
class PoolingManager:
- _enabled = False
- _initialized = False
- _lock = threading.Lock()
- _config = {
- "max_size": 100,
- "idle_timeout": 600
- }
+ """
+ Manages connection pooling for the mssql_python package.
+
+ This class provides thread-safe connection pooling functionality using the
+ underlying DDBC bindings. It follows a singleton pattern with class-level
+ state management.
+ """
+
+ _enabled: bool = False
+ _initialized: bool = False
+ _pools_closed: bool = False # Track if pools have been closed
+ _lock: threading.Lock = threading.Lock()
+ _config: Dict[str, int] = {"max_size": 100, "idle_timeout": 600}
@classmethod
- def enable(cls, max_size=100, idle_timeout=600):
+ def enable(cls, max_size: int = 100, idle_timeout: int = 600) -> None:
+ """
+ Enable connection pooling with specified parameters.
+
+ Args:
+ max_size: Maximum number of connections in the pool (default: 100)
+ idle_timeout: Timeout in seconds for idle connections (default: 600)
+
+ Raises:
+ ValueError: If parameters are invalid (max_size <= 0 or idle_timeout < 0)
+ """
+ logger.debug(
+ "PoolingManager.enable: Attempting to enable pooling - max_size=%d, idle_timeout=%d",
+ max_size,
+ idle_timeout,
+ )
with cls._lock:
if cls._enabled:
+ logger.debug("PoolingManager.enable: Pooling already enabled, skipping")
return
if max_size <= 0 or idle_timeout < 0:
+ logger.error(
+ "PoolingManager.enable: Invalid parameters - max_size=%d, idle_timeout=%d",
+ max_size,
+ idle_timeout,
+ )
raise ValueError("Invalid pooling parameters")
+ logger.info(
+ "PoolingManager.enable: Enabling connection pooling - max_size=%d, idle_timeout=%d seconds",
+ max_size,
+ idle_timeout,
+ )
ddbc_bindings.enable_pooling(max_size, idle_timeout)
cls._config["max_size"] = max_size
cls._config["idle_timeout"] = idle_timeout
cls._enabled = True
cls._initialized = True
+ logger.info("PoolingManager.enable: Connection pooling enabled successfully")
@classmethod
- def disable(cls):
+ def disable(cls) -> None:
+ """
+ Disable connection pooling and clean up resources.
+
+ This method safely disables pooling and closes existing connections.
+ It can be called multiple times safely.
+ """
+ logger.debug("PoolingManager.disable: Attempting to disable pooling")
with cls._lock:
+ if (
+ cls._enabled and not cls._pools_closed
+ ): # Only cleanup if enabled and not already closed
+ logger.info("PoolingManager.disable: Closing connection pools")
+ ddbc_bindings.close_pooling()
+ logger.info("PoolingManager.disable: Connection pools closed successfully")
+ else:
+ logger.debug("PoolingManager.disable: Pooling already disabled or closed")
+ cls._pools_closed = True
cls._enabled = False
cls._initialized = True
@classmethod
- def is_enabled(cls):
+ def is_enabled(cls) -> bool:
+ """
+ Check if connection pooling is currently enabled.
+
+ Returns:
+ bool: True if pooling is enabled, False otherwise
+ """
return cls._enabled
@classmethod
- def is_initialized(cls):
+ def is_initialized(cls) -> bool:
+ """
+ Check if the pooling manager has been initialized.
+
+ Returns:
+ bool: True if initialized (either enabled or disabled), False otherwise
+ """
return cls._initialized
-
+
+ @classmethod
+ def _reset_for_testing(cls) -> None:
+ """Reset pooling state - for testing purposes only"""
+ with cls._lock:
+ cls._enabled = False
+ cls._initialized = False
+ cls._pools_closed = False
+
+
@atexit.register
def shutdown_pooling():
- if PoolingManager.is_enabled():
- ddbc_bindings.close_pooling()
+ """
+ Shutdown pooling during application exit.
+
+ This function is registered with atexit to ensure proper cleanup of
+ connection pools when the application terminates.
+ """
+ logger.debug("shutdown_pooling: atexit cleanup triggered")
+ with PoolingManager._lock:
+ if PoolingManager._enabled and not PoolingManager._pools_closed:
+ logger.info("shutdown_pooling: Closing connection pools during application exit")
+ ddbc_bindings.close_pooling()
+ PoolingManager._pools_closed = True
+ logger.info("shutdown_pooling: Connection pools closed successfully")
+ else:
+ logger.debug("shutdown_pooling: No active pools to close")
diff --git a/mssql_python/pybind/CMakeLists.txt b/mssql_python/pybind/CMakeLists.txt
index 489dfd459..458933185 100644
--- a/mssql_python/pybind/CMakeLists.txt
+++ b/mssql_python/pybind/CMakeLists.txt
@@ -5,10 +5,41 @@ project(ddbc_bindings)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
+# Enable verbose output to see actual compiler/linker commands
+set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "Verbose output" FORCE)
+
+# Treat CMake warnings as errors
+set(CMAKE_ERROR_DEPRECATED TRUE)
+set(CMAKE_WARN_DEPRECATED TRUE)
+
if (MSVC)
+ # Security compiler options for OneBranch compliance
+ message(STATUS "Applying MSVC security compiler options for OneBranch compliance")
+
+ add_compile_options(
+ /GS # Buffer security check - detects buffer overruns
+ /guard:cf # Control Flow Guard - protects against control flow hijacking
+ )
+
+ add_link_options(
+ /DYNAMICBASE # ASLR - Address Space Layout Randomization
+ /NXCOMPAT # DEP - Data Execution Prevention
+ /GUARD:CF # Control Flow Guard (linker)
+ )
+
+ # SAFESEH only for x86 (32-bit) builds
+ if(CMAKE_SIZEOF_VOID_P EQUAL 4) # 32-bit
+ message(STATUS "Applying /SAFESEH for 32-bit build")
+ add_link_options(/SAFESEH) # Safe Structured Exception Handling
+ else()
+ message(STATUS "Skipping /SAFESEH (not applicable for 64-bit builds)")
+ endif()
+
# Enable PDB generation for all target types
add_compile_options("$<$:/Zi>")
add_link_options("$<$:/DEBUG /OPT:REF /OPT:ICF>")
+
+ message(STATUS "Security flags applied: /GS /guard:cf /DYNAMICBASE /NXCOMPAT /GUARD:CF")
endif()
# Detect platform
@@ -186,8 +217,8 @@ message(STATUS "Final Python library directory: ${PYTHON_LIB_DIR}")
set(DDBC_SOURCE "ddbc_bindings.cpp")
message(STATUS "Using standard source file: ${DDBC_SOURCE}")
-# Include connection module for Windows
-add_library(ddbc_bindings MODULE ${DDBC_SOURCE} connection/connection.cpp connection/connection_pool.cpp)
+# Include connection module and logger bridge
+add_library(ddbc_bindings MODULE ${DDBC_SOURCE} connection/connection.cpp connection/connection_pool.cpp logger_bridge.cpp)
# Set the output name to include Python version and architecture
# Use appropriate file extension based on platform
@@ -275,6 +306,21 @@ if(MSVC)
target_compile_options(ddbc_bindings PRIVATE /W4 /WX)
endif()
+# Add warning flags for GCC/Clang on Linux and macOS
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+ target_compile_options(ddbc_bindings PRIVATE
+ -Werror # Treat warnings as errors
+ -Wattributes # Enable attribute warnings (cross-compiler)
+ )
+
+ # GCC-specific warning flags
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+ target_compile_options(ddbc_bindings PRIVATE
+ -Wint-to-pointer-cast # GCC-specific warning for integer-to-pointer casts
+ )
+ endif()
+endif()
+
# Add macOS-specific string conversion fix
if(APPLE)
message(STATUS "Enabling macOS string conversion fix")
diff --git a/mssql_python/pybind/README.md b/mssql_python/pybind/README.md
index faf0fbe66..f9cd28f70 100644
--- a/mssql_python/pybind/README.md
+++ b/mssql_python/pybind/README.md
@@ -99,7 +99,12 @@ mssql_python/
│ ├── debian_ubuntu/
│ │ ├── x86_64/lib/
│ │ └── arm64/lib/
-│ └── rhel/
+│ ├── rhel/
+│ │ ├── x86_64/lib/
+│ │ └── arm64/lib/
+│ ├── suse/
+│ │ └── x86_64/lib/ # ARM64 not supported by Microsoft
+│ └── alpine/
│ ├── x86_64/lib/
│ └── arm64/lib/
└── ddbc_bindings.cp{python_version}-{architecture}.{extension}
@@ -152,6 +157,20 @@ Linux builds support multiple distributions:
- `libmsodbcsql-18.5.so.1.1` - Main driver
- `libodbcinst.so.2` - Installer library
+**SUSE/openSUSE x86_64:**
+- `libmsodbcsql-18.5.so.1.1` - Main driver
+- `libodbcinst.so.2` - Installer library
+
+> **Note:** SUSE/openSUSE ARM64 is not supported by Microsoft ODBC Driver 18
+
+**Alpine x86_64:**
+- `libmsodbcsql-18.5.so.1.1` - Main driver
+- `libodbcinst.so.2` - Installer library
+
+**Alpine ARM64:**
+- `libmsodbcsql-18.5.so.1.1` - Main driver
+- `libodbcinst.so.2` - Installer library
+
## **Python Extension Modules**
Your build system generates architecture-specific Python extension modules:
diff --git a/mssql_python/pybind/build.sh b/mssql_python/pybind/build.sh
index dbd1e6c39..811777285 100755
--- a/mssql_python/pybind/build.sh
+++ b/mssql_python/pybind/build.sh
@@ -26,6 +26,13 @@ else
exit 1
fi
+# Check for coverage mode and set flags accordingly
+COVERAGE_MODE=false
+if [[ "${1:-}" == "codecov" || "${1:-}" == "--coverage" ]]; then
+ COVERAGE_MODE=true
+ echo "[MODE] Enabling Clang coverage instrumentation"
+fi
+
# Get Python version from active interpreter
PYTAG=$(python -c "import sys; print(f'{sys.version_info.major}{sys.version_info.minor}')")
@@ -47,20 +54,30 @@ if [ -d "build" ]; then
echo "Build directory removed."
fi
-# Create build directory for universal binary
+# Create build directory
BUILD_DIR="${SOURCE_DIR}/build"
mkdir -p "${BUILD_DIR}"
cd "${BUILD_DIR}"
echo "[DIAGNOSTIC] Changed to build directory: ${BUILD_DIR}"
-# Configure CMake (architecture settings handled in CMakeLists.txt)
+# Configure CMake (with Clang coverage instrumentation on Linux only - codecov is not supported for macOS)
echo "[DIAGNOSTIC] Running CMake configure"
-if [[ "$OS" == "macOS" ]]; then
- echo "[DIAGNOSTIC] Configuring for macOS (universal2 is set automatically)"
- cmake -DMACOS_STRING_FIX=ON "${SOURCE_DIR}"
+if [[ "$COVERAGE_MODE" == "true" && "$OS" == "Linux" ]]; then
+ echo "[ACTION] Configuring for Linux with Clang coverage instrumentation"
+ cmake -DARCHITECTURE="$DETECTED_ARCH" \
+ -DCMAKE_C_COMPILER=clang \
+ -DCMAKE_CXX_COMPILER=clang++ \
+ -DCMAKE_CXX_FLAGS="-fprofile-instr-generate -fcoverage-mapping" \
+ -DCMAKE_C_FLAGS="-fprofile-instr-generate -fcoverage-mapping" \
+ "${SOURCE_DIR}"
else
- echo "[DIAGNOSTIC] Configuring for Linux with architecture: $DETECTED_ARCH"
- cmake -DARCHITECTURE="$DETECTED_ARCH" "${SOURCE_DIR}"
+ if [[ "$OS" == "macOS" ]]; then
+ echo "[ACTION] Configuring for macOS (default build)"
+ cmake -DMACOS_STRING_FIX=ON "${SOURCE_DIR}"
+ else
+ echo "[ACTION] Configuring for Linux with architecture: $DETECTED_ARCH"
+ cmake -DARCHITECTURE="$DETECTED_ARCH" "${SOURCE_DIR}"
+ fi
fi
# Check if CMake configuration succeeded
@@ -101,6 +118,21 @@ else
else
echo "[WARNING] macOS dylib configuration encountered issues"
fi
+
+ # Codesign the Python extension module (.so file) to prevent SIP crashes
+ echo "[ACTION] Codesigning Python extension module..."
+ SO_FILE="$PARENT_DIR/"*.so
+ for so in $SO_FILE; do
+ if [ -f "$so" ]; then
+ echo " Signing: $so"
+ codesign -s - -f "$so" 2>/dev/null
+ if [ $? -eq 0 ]; then
+ echo "[SUCCESS] Python extension codesigned: $so"
+ else
+ echo "[WARNING] Failed to codesign: $so"
+ fi
+ fi
+ done
fi
else
echo "[ERROR] Failed to copy .so file"
diff --git a/mssql_python/pybind/connection/connection.cpp b/mssql_python/pybind/connection/connection.cpp
index a5c5f37f0..32ed55075 100644
--- a/mssql_python/pybind/connection/connection.cpp
+++ b/mssql_python/pybind/connection/connection.cpp
@@ -1,15 +1,21 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
-// INFO|TODO - Note that is file is Windows specific right now. Making it arch agnostic will be
-// taken up in future
-
-#include "connection.h"
-#include "connection_pool.h"
-#include
+#include "connection/connection.h"
+#include "connection/connection_pool.h"
+#include
+#include
#include
+#include
+#include
+#include
+#include
-#define SQL_COPT_SS_ACCESS_TOKEN 1256 // Custom attribute ID for access token
+#define SQL_COPT_SS_ACCESS_TOKEN 1256 // Custom attribute ID for access token
+#define SQL_MAX_SMALL_INT 32767 // Maximum value for SQLSMALLINT
+
+// Logging uses LOG() macro for all diagnostic output
+#include "logger_bridge.hpp"
static SqlHandlePtr getEnvHandle() {
static SqlHandlePtr envHandle = []() -> SqlHandlePtr {
@@ -23,7 +29,8 @@ static SqlHandlePtr getEnvHandle() {
if (!SQL_SUCCEEDED(ret)) {
ThrowStdException("Failed to allocate environment handle");
}
- ret = SQLSetEnvAttr_ptr(env, SQL_ATTR_ODBC_VERSION, (void*)SQL_OV_ODBC3_80, 0);
+ ret = SQLSetEnvAttr_ptr(env, SQL_ATTR_ODBC_VERSION,
+ reinterpret_cast(SQL_OV_ODBC3_80), 0);
if (!SQL_SUCCEEDED(ret)) {
ThrowStdException("Failed to set environment attributes");
}
@@ -44,14 +51,14 @@ Connection::Connection(const std::wstring& conn_str, bool use_pool)
}
Connection::~Connection() {
- disconnect(); // fallback if user forgets to disconnect
+ disconnect(); // fallback if user forgets to disconnect
}
// Allocates connection handle
void Connection::allocateDbcHandle() {
auto _envHandle = getEnvHandle();
SQLHANDLE dbc = nullptr;
- LOG("Allocate SQL Connection Handle");
+ LOG("Allocating SQL Connection Handle");
SQLRETURN ret = SQLAllocHandle_ptr(SQL_HANDLE_DBC, _envHandle->get(), &dbc);
checkError(ret);
_dbcHandle = std::make_shared(static_cast(SQL_HANDLE_DBC), dbc);
@@ -68,20 +75,18 @@ void Connection::connect(const py::dict& attrs_before) {
}
}
SQLWCHAR* connStrPtr;
-#if defined(__APPLE__) || defined(__linux__) // macOS/Linux specific handling
+#if defined(__APPLE__) || defined(__linux__) // macOS/Linux handling
LOG("Creating connection string buffer for macOS/Linux");
std::vector connStrBuffer = WStringToSQLWCHAR(_connStr);
// Ensure the buffer is null-terminated
- LOG("Connection string buffer size - {}", connStrBuffer.size());
+ LOG("Connection string buffer size=%zu", connStrBuffer.size());
connStrPtr = connStrBuffer.data();
LOG("Connection string buffer created");
#else
connStrPtr = const_cast(_connStr.c_str());
#endif
- SQLRETURN ret = SQLDriverConnect_ptr(
- _dbcHandle->get(), nullptr,
- connStrPtr, SQL_NTS,
- nullptr, 0, nullptr, SQL_DRIVER_NOPROMPT);
+ SQLRETURN ret = SQLDriverConnect_ptr(_dbcHandle->get(), nullptr, connStrPtr, SQL_NTS, nullptr,
+ 0, nullptr, SQL_DRIVER_NOPROMPT);
checkError(ret);
updateLastUsed();
}
@@ -89,17 +94,59 @@ void Connection::connect(const py::dict& attrs_before) {
void Connection::disconnect() {
if (_dbcHandle) {
LOG("Disconnecting from database");
+
+ // CRITICAL FIX: Mark all child statement handles as implicitly freed
+ // When we free the DBC handle below, the ODBC driver will automatically free
+ // all child STMT handles. We need to tell the SqlHandle objects about this
+ // so they don't try to free the handles again during their destruction.
+
+ // THREAD-SAFETY: Lock mutex to safely access _childStatementHandles
+ // This protects against concurrent allocStatementHandle() calls or GC finalizers
+ {
+ std::lock_guard lock(_childHandlesMutex);
+
+ // First compact: remove expired weak_ptrs (they're already destroyed)
+ size_t originalSize = _childStatementHandles.size();
+ _childStatementHandles.erase(
+ std::remove_if(_childStatementHandles.begin(), _childStatementHandles.end(),
+ [](const std::weak_ptr& wp) { return wp.expired(); }),
+ _childStatementHandles.end());
+
+ LOG("Compacted child handles: %zu -> %zu (removed %zu expired)",
+ originalSize, _childStatementHandles.size(),
+ originalSize - _childStatementHandles.size());
+
+ LOG("Marking %zu child statement handles as implicitly freed",
+ _childStatementHandles.size());
+ for (auto& weakHandle : _childStatementHandles) {
+ if (auto handle = weakHandle.lock()) {
+ // SAFETY ASSERTION: Only STMT handles should be in this vector
+ // This is guaranteed by allocStatementHandle() which only creates STMT handles
+ // If this assertion fails, it indicates a serious bug in handle tracking
+ if (handle->type() != SQL_HANDLE_STMT) {
+ LOG_ERROR("CRITICAL: Non-STMT handle (type=%d) found in _childStatementHandles. "
+ "This will cause a handle leak!", handle->type());
+ continue; // Skip marking to prevent leak
+ }
+ handle->markImplicitlyFreed();
+ }
+ }
+ _childStatementHandles.clear();
+ _allocationsSinceCompaction = 0;
+ } // Release lock before potentially slow SQLDisconnect call
+
SQLRETURN ret = SQLDisconnect_ptr(_dbcHandle->get());
checkError(ret);
- _dbcHandle.reset(); // triggers SQLFreeHandle via destructor, if last owner
- }
- else {
+ // triggers SQLFreeHandle via destructor, if last owner
+ _dbcHandle.reset();
+ } else {
LOG("No connection handle to disconnect");
}
}
-// TODO: Add an exception class in C++ for error handling, DB spec compliant
-void Connection::checkError(SQLRETURN ret) const{
+// TODO(microsoft): Add an exception class in C++ for error handling,
+// DB spec compliant
+void Connection::checkError(SQLRETURN ret) const {
if (!SQL_SUCCEEDED(ret)) {
ErrorInfo err = SQLCheckError_Wrap(SQL_HANDLE_DBC, _dbcHandle, ret);
std::string errorMsg = WideToUTF8(err.ddbcErrorMsg);
@@ -132,9 +179,16 @@ void Connection::setAutocommit(bool enable) {
ThrowStdException("Connection handle not allocated");
}
SQLINTEGER value = enable ? SQL_AUTOCOMMIT_ON : SQL_AUTOCOMMIT_OFF;
- LOG("Set SQL Connection Attribute");
- SQLRETURN ret = SQLSetConnectAttr_ptr(_dbcHandle->get(), SQL_ATTR_AUTOCOMMIT, reinterpret_cast(static_cast(value)), 0);
+ LOG("Setting autocommit=%d", enable);
+ SQLRETURN ret =
+ SQLSetConnectAttr_ptr(_dbcHandle->get(), SQL_ATTR_AUTOCOMMIT,
+ reinterpret_cast(static_cast(value)), 0);
checkError(ret);
+ if (value == SQL_AUTOCOMMIT_ON) {
+ LOG("Autocommit enabled");
+ } else {
+ LOG("Autocommit disabled");
+ }
_autocommit = enable;
}
@@ -142,10 +196,11 @@ bool Connection::getAutocommit() const {
if (!_dbcHandle) {
ThrowStdException("Connection handle not allocated");
}
- LOG("Get SQL Connection Attribute");
+ LOG("Getting autocommit attribute");
SQLINTEGER value;
SQLINTEGER string_length;
- SQLRETURN ret = SQLGetConnectAttr_ptr(_dbcHandle->get(), SQL_ATTR_AUTOCOMMIT, &value, sizeof(value), &string_length);
+ SQLRETURN ret = SQLGetConnectAttr_ptr(_dbcHandle->get(), SQL_ATTR_AUTOCOMMIT, &value,
+ sizeof(value), &string_length);
checkError(ret);
return value == SQL_AUTOCOMMIT_ON;
}
@@ -159,37 +214,127 @@ SqlHandlePtr Connection::allocStatementHandle() {
SQLHANDLE stmt = nullptr;
SQLRETURN ret = SQLAllocHandle_ptr(SQL_HANDLE_STMT, _dbcHandle->get(), &stmt);
checkError(ret);
- return std::make_shared(static_cast(SQL_HANDLE_STMT), stmt);
-}
+ auto stmtHandle = std::make_shared(static_cast(SQL_HANDLE_STMT), stmt);
+
+ // THREAD-SAFETY: Lock mutex before modifying _childStatementHandles
+ // This protects against concurrent disconnect() or allocStatementHandle() calls,
+ // or GC finalizers running from different threads
+ {
+ std::lock_guard lock(_childHandlesMutex);
+
+ // Track this child handle so we can mark it as implicitly freed when connection closes
+ // Use weak_ptr to avoid circular references and allow normal cleanup
+ _childStatementHandles.push_back(stmtHandle);
+ _allocationsSinceCompaction++;
+
+ // Compact expired weak_ptrs only periodically to avoid O(n²) overhead
+ // This keeps allocation fast (O(1) amortized) while preventing unbounded growth
+ // disconnect() also compacts, so this is just for long-lived connections with many cursors
+ if (_allocationsSinceCompaction >= COMPACTION_INTERVAL) {
+ size_t originalSize = _childStatementHandles.size();
+ _childStatementHandles.erase(
+ std::remove_if(_childStatementHandles.begin(), _childStatementHandles.end(),
+ [](const std::weak_ptr& wp) { return wp.expired(); }),
+ _childStatementHandles.end());
+ _allocationsSinceCompaction = 0;
+ LOG("Periodic compaction: %zu -> %zu handles (removed %zu expired)",
+ originalSize, _childStatementHandles.size(),
+ originalSize - _childStatementHandles.size());
+ }
+ } // Release lock
+ return stmtHandle;
+}
SQLRETURN Connection::setAttribute(SQLINTEGER attribute, py::object value) {
- LOG("Setting SQL attribute");
- SQLPOINTER ptr = nullptr;
- SQLINTEGER length = 0;
+ LOG("Setting SQL attribute=%d", attribute);
+ // SQLPOINTER ptr = nullptr;
+ // SQLINTEGER length = 0;
if (py::isinstance(value)) {
- int intValue = value.cast();
- ptr = reinterpret_cast(static_cast(intValue));
- length = SQL_IS_INTEGER;
+ // Get the integer value
+ int64_t longValue = value.cast();
+
+ SQLRETURN ret = SQLSetConnectAttr_ptr(
+ _dbcHandle->get(), attribute,
+ reinterpret_cast(static_cast(longValue)), SQL_IS_INTEGER);
+
+ if (!SQL_SUCCEEDED(ret)) {
+ LOG("Failed to set integer attribute=%d, ret=%d", attribute, ret);
+ } else {
+ LOG("Set integer attribute=%d successfully", attribute);
+ }
+ return ret;
+ } else if (py::isinstance(value)) {
+ try {
+ std::string utf8_str = value.cast();
+
+ // Convert to wide string
+ std::wstring wstr = Utf8ToWString(utf8_str);
+ if (wstr.empty() && !utf8_str.empty()) {
+ LOG("Failed to convert string value to wide string for "
+ "attribute=%d",
+ attribute);
+ return SQL_ERROR;
+ }
+ this->wstrStringBuffer.clear();
+ this->wstrStringBuffer = std::move(wstr);
+
+ SQLPOINTER ptr;
+ SQLINTEGER length;
+
+#if defined(__APPLE__) || defined(__linux__)
+ // For macOS/Linux, convert wstring to SQLWCHAR buffer
+ std::vector sqlwcharBuffer = WStringToSQLWCHAR(this->wstrStringBuffer);
+ if (sqlwcharBuffer.empty() && !this->wstrStringBuffer.empty()) {
+ LOG("Failed to convert wide string to SQLWCHAR buffer for "
+ "attribute=%d",
+ attribute);
+ return SQL_ERROR;
+ }
+
+ ptr = sqlwcharBuffer.data();
+ length = static_cast(sqlwcharBuffer.size() * sizeof(SQLWCHAR));
+#else
+ // On Windows, wchar_t and SQLWCHAR are the same size
+ ptr = const_cast(this->wstrStringBuffer.c_str());
+ length = static_cast(this->wstrStringBuffer.length() * sizeof(SQLWCHAR));
+#endif
+
+ SQLRETURN ret = SQLSetConnectAttr_ptr(_dbcHandle->get(), attribute, ptr, length);
+ if (!SQL_SUCCEEDED(ret)) {
+ LOG("Failed to set string attribute=%d, ret=%d", attribute, ret);
+ } else {
+ LOG("Set string attribute=%d successfully", attribute);
+ }
+ return ret;
+ } catch (const std::exception& e) {
+ LOG("Exception during string attribute=%d setting: %s", attribute, e.what());
+ return SQL_ERROR;
+ }
} else if (py::isinstance(value) || py::isinstance(value)) {
- static std::vector buffers;
- buffers.emplace_back(value.cast());
- ptr = const_cast(buffers.back().c_str());
- length = static_cast(buffers.back().size());
+ try {
+ std::string binary_data = value.cast();
+ this->strBytesBuffer.clear();
+ this->strBytesBuffer = std::move(binary_data);
+ SQLPOINTER ptr = const_cast(this->strBytesBuffer.c_str());
+ SQLINTEGER length = static_cast(this->strBytesBuffer.size());
+
+ SQLRETURN ret = SQLSetConnectAttr_ptr(_dbcHandle->get(), attribute, ptr, length);
+ if (!SQL_SUCCEEDED(ret)) {
+ LOG("Failed to set binary attribute=%d, ret=%d", attribute, ret);
+ } else {
+ LOG("Set binary attribute=%d successfully (length=%d)", attribute, length);
+ }
+ return ret;
+ } catch (const std::exception& e) {
+ LOG("Exception during binary attribute=%d setting: %s", attribute, e.what());
+ return SQL_ERROR;
+ }
} else {
- LOG("Unsupported attribute value type");
+ LOG("Unsupported attribute value type for attribute=%d", attribute);
return SQL_ERROR;
}
-
- SQLRETURN ret = SQLSetConnectAttr_ptr(_dbcHandle->get(), attribute, ptr, length);
- if (!SQL_SUCCEEDED(ret)) {
- LOG("Failed to set attribute");
- }
- else {
- LOG("Set attribute successfully");
- }
- return ret;
}
void Connection::applyAttrsBefore(const py::dict& attrs) {
@@ -201,11 +346,12 @@ void Connection::applyAttrsBefore(const py::dict& attrs) {
continue;
}
- if (key == SQL_COPT_SS_ACCESS_TOKEN) {
- SQLRETURN ret = setAttribute(key, py::reinterpret_borrow(item.second));
- if (!SQL_SUCCEEDED(ret)) {
- ThrowStdException("Failed to set access token before connect");
- }
+ // Apply all supported attributes
+ SQLRETURN ret = setAttribute(key, py::reinterpret_borrow(item.second));
+ if (!SQL_SUCCEEDED(ret)) {
+ std::string attrName = std::to_string(key);
+ std::string errorMsg = "Failed to set attribute " + attrName + " before connect";
+ ThrowStdException(errorMsg);
}
}
}
@@ -215,8 +361,8 @@ bool Connection::isAlive() const {
ThrowStdException("Connection handle not allocated");
}
SQLUINTEGER status;
- SQLRETURN ret = SQLGetConnectAttr_ptr(_dbcHandle->get(), SQL_ATTR_CONNECTION_DEAD,
- &status, 0, nullptr);
+ SQLRETURN ret =
+ SQLGetConnectAttr_ptr(_dbcHandle->get(), SQL_ATTR_CONNECTION_DEAD, &status, 0, nullptr);
return SQL_SUCCEEDED(ret) && status == SQL_CD_FALSE;
}
@@ -225,16 +371,26 @@ bool Connection::reset() {
ThrowStdException("Connection handle not allocated");
}
LOG("Resetting connection via SQL_ATTR_RESET_CONNECTION");
- SQLRETURN ret = SQLSetConnectAttr_ptr(
- _dbcHandle->get(),
- SQL_ATTR_RESET_CONNECTION,
- (SQLPOINTER)SQL_RESET_CONNECTION_YES,
- SQL_IS_INTEGER);
+ SQLRETURN ret = SQLSetConnectAttr_ptr(_dbcHandle->get(), SQL_ATTR_RESET_CONNECTION,
+ (SQLPOINTER)SQL_RESET_CONNECTION_YES, SQL_IS_INTEGER);
+ if (!SQL_SUCCEEDED(ret)) {
+ LOG("Failed to reset connection (ret=%d). Marking as dead.", ret);
+ disconnect();
+ return false;
+ }
+
+ // SQL_ATTR_RESET_CONNECTION does NOT reset the transaction isolation level.
+ // Explicitly reset it to the default (SQL_TXN_READ_COMMITTED) to prevent
+ // isolation level settings from leaking between pooled connection usages.
+ LOG("Resetting transaction isolation level to READ COMMITTED");
+ ret = SQLSetConnectAttr_ptr(_dbcHandle->get(), SQL_ATTR_TXN_ISOLATION,
+ (SQLPOINTER)SQL_TXN_READ_COMMITTED, SQL_IS_INTEGER);
if (!SQL_SUCCEEDED(ret)) {
- LOG("Failed to reset connection. Marking as dead.");
+ LOG("Failed to reset transaction isolation level (ret=%d). Marking as dead.", ret);
disconnect();
return false;
}
+
updateLastUsed();
return true;
}
@@ -247,7 +403,8 @@ std::chrono::steady_clock::time_point Connection::lastUsed() const {
return _lastUsed;
}
-ConnectionHandle::ConnectionHandle(const std::string& connStr, bool usePool, const py::dict& attrsBefore)
+ConnectionHandle::ConnectionHandle(const std::string& connStr, bool usePool,
+ const py::dict& attrsBefore)
: _usePool(usePool) {
_connStr = Utf8ToWString(connStr);
if (_usePool) {
@@ -309,4 +466,100 @@ SqlHandlePtr ConnectionHandle::allocStatementHandle() {
ThrowStdException("Connection object is not initialized");
}
return _conn->allocStatementHandle();
-}
\ No newline at end of file
+}
+
+py::object Connection::getInfo(SQLUSMALLINT infoType) const {
+ if (!_dbcHandle) {
+ ThrowStdException("Connection handle not allocated");
+ }
+
+ // First call with NULL buffer to get required length
+ SQLSMALLINT requiredLen = 0;
+ SQLRETURN ret = SQLGetInfo_ptr(_dbcHandle->get(), infoType, NULL, 0, &requiredLen);
+
+ if (!SQL_SUCCEEDED(ret)) {
+ checkError(ret);
+ return py::none();
+ }
+
+ // For zero-length results
+ if (requiredLen == 0) {
+ py::dict result;
+ result["data"] = py::bytes("", 0);
+ result["length"] = 0;
+ result["info_type"] = infoType;
+ return result;
+ }
+
+ // Cap buffer allocation to SQL_MAX_SMALL_INT to prevent excessive
+ // memory usage
+ SQLSMALLINT allocSize = requiredLen + 10;
+ if (allocSize > SQL_MAX_SMALL_INT) {
+ allocSize = SQL_MAX_SMALL_INT;
+ }
+ std::vector buffer(allocSize, 0); // Extra padding for safety
+
+ // Get the actual data - avoid using std::min
+ SQLSMALLINT bufferSize = requiredLen + 10;
+ if (bufferSize > SQL_MAX_SMALL_INT) {
+ bufferSize = SQL_MAX_SMALL_INT;
+ }
+
+ SQLSMALLINT returnedLen = 0;
+ ret = SQLGetInfo_ptr(_dbcHandle->get(), infoType, buffer.data(), bufferSize, &returnedLen);
+
+ if (!SQL_SUCCEEDED(ret)) {
+ checkError(ret);
+ return py::none();
+ }
+
+ // Create a dictionary with the raw data
+ py::dict result;
+
+ // IMPORTANT: Pass exactly what SQLGetInfo returned
+ // No null-terminator manipulation, just pass the raw data
+ result["data"] = py::bytes(buffer.data(), returnedLen);
+ result["length"] = returnedLen;
+ result["info_type"] = infoType;
+
+ return result;
+}
+
+py::object ConnectionHandle::getInfo(SQLUSMALLINT infoType) const {
+ if (!_conn) {
+ ThrowStdException("Connection object is not initialized");
+ }
+ return _conn->getInfo(infoType);
+}
+
+void ConnectionHandle::setAttr(int attribute, py::object value) {
+ if (!_conn) {
+ ThrowStdException("Connection not established");
+ }
+
+ // Use existing setAttribute with better error handling
+ SQLRETURN ret = _conn->setAttribute(static_cast(attribute), value);
+ if (!SQL_SUCCEEDED(ret)) {
+ // Get detailed error information from ODBC
+ try {
+ ErrorInfo errorInfo = SQLCheckError_Wrap(SQL_HANDLE_DBC, _conn->getDbcHandle(), ret);
+
+ std::string errorMsg =
+ "Failed to set connection attribute " + std::to_string(attribute);
+ if (!errorInfo.ddbcErrorMsg.empty()) {
+ // Convert wstring to string for concatenation
+ std::string ddbcErrorStr = WideToUTF8(errorInfo.ddbcErrorMsg);
+ errorMsg += ": " + ddbcErrorStr;
+ }
+
+ LOG("Connection setAttribute failed: %s", errorMsg.c_str());
+ ThrowStdException(errorMsg);
+ } catch (...) {
+ // Fallback to generic error if detailed error retrieval fails
+ std::string errorMsg =
+ "Failed to set connection attribute " + std::to_string(attribute);
+ LOG("Connection setAttribute failed: %s", errorMsg.c_str());
+ ThrowStdException(errorMsg);
+ }
+ }
+}
diff --git a/mssql_python/pybind/connection/connection.h b/mssql_python/pybind/connection/connection.h
index 6129125e1..6c6f1e63c 100644
--- a/mssql_python/pybind/connection/connection.h
+++ b/mssql_python/pybind/connection/connection.h
@@ -1,18 +1,26 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
-// INFO|TODO - Note that is file is Windows specific right now. Making it arch agnostic will be
-// taken up in future.
-
#pragma once
-#include "ddbc_bindings.h"
+#include "../ddbc_bindings.h"
+#include
+#include
+#include
// Represents a single ODBC database connection.
// Manages connection handles.
// Note: This class does NOT implement pooling logic directly.
+//
+// THREADING MODEL (per DB-API 2.0 threadsafety=1):
+// - Connections should NOT be shared between threads in normal usage
+// - However, _childStatementHandles is mutex-protected because:
+// 1. Python GC/finalizers can run from any thread
+// 2. Native code may release GIL during blocking ODBC calls
+// 3. Provides safety if user accidentally shares connection
+// - All accesses to _childStatementHandles are guarded by _childHandlesMutex
class Connection {
-public:
+ public:
Connection(const std::wstring& connStr, bool fromPool);
~Connection();
@@ -42,10 +50,17 @@ class Connection {
// Allocate a new statement handle on this connection.
SqlHandlePtr allocStatementHandle();
-private:
+ // Get information about the driver and data source
+ py::object getInfo(SQLUSMALLINT infoType) const;
+
+ SQLRETURN setAttribute(SQLINTEGER attribute, py::object value);
+
+ // Add getter for DBC handle for error reporting
+ const SqlHandlePtr& getDbcHandle() const { return _dbcHandle; }
+
+ private:
void allocateDbcHandle();
void checkError(SQLRETURN ret) const;
- SQLRETURN setAttribute(SQLINTEGER attribute, py::object value);
void applyAttrsBefore(const py::dict& attrs_before);
std::wstring _connStr;
@@ -53,11 +68,30 @@ class Connection {
bool _autocommit = true;
SqlHandlePtr _dbcHandle;
std::chrono::steady_clock::time_point _lastUsed;
+ std::wstring wstrStringBuffer; // wstr buffer for string attribute setting
+ std::string strBytesBuffer; // string buffer for byte attributes setting
+
+ // Track child statement handles to mark them as implicitly freed when connection closes
+ // Uses weak_ptr to avoid circular references and allow normal cleanup
+ // THREAD-SAFETY: All accesses must be guarded by _childHandlesMutex
+ std::vector> _childStatementHandles;
+
+ // Counter for periodic compaction of expired weak_ptrs
+ // Compact every N allocations to avoid O(n²) overhead in hot path
+ // THREAD-SAFETY: Protected by _childHandlesMutex
+ size_t _allocationsSinceCompaction = 0;
+ static constexpr size_t COMPACTION_INTERVAL = 100;
+
+ // Mutex protecting _childStatementHandles and _allocationsSinceCompaction
+ // Prevents data races between allocStatementHandle() and disconnect(),
+ // or concurrent GC finalizers running from different threads
+ mutable std::mutex _childHandlesMutex;
};
class ConnectionHandle {
-public:
- ConnectionHandle(const std::string& connStr, bool usePool, const py::dict& attrsBefore = py::dict());
+ public:
+ ConnectionHandle(const std::string& connStr, bool usePool,
+ const py::dict& attrsBefore = py::dict());
~ConnectionHandle();
void close();
@@ -66,9 +100,13 @@ class ConnectionHandle {
void setAutocommit(bool enabled);
bool getAutocommit() const;
SqlHandlePtr allocStatementHandle();
+ void setAttr(int attribute, py::object value);
-private:
+ // Get information about the driver and data source
+ py::object getInfo(SQLUSMALLINT infoType) const;
+
+ private:
std::shared_ptr _conn;
bool _usePool;
std::wstring _connStr;
-};
\ No newline at end of file
+};
diff --git a/mssql_python/pybind/connection/connection_pool.cpp b/mssql_python/pybind/connection/connection_pool.cpp
index 60dd54151..3000a9702 100644
--- a/mssql_python/pybind/connection/connection_pool.cpp
+++ b/mssql_python/pybind/connection/connection_pool.cpp
@@ -1,16 +1,19 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
-// INFO|TODO - Note that is file is Windows specific right now. Making it arch agnostic will be
-// taken up in future.
-
-#include "connection_pool.h"
+#include "connection/connection_pool.h"
#include
+#include
+#include
+
+// Logging uses LOG() macro for all diagnostic output
+#include "logger_bridge.hpp"
ConnectionPool::ConnectionPool(size_t max_size, int idle_timeout_secs)
- : _max_size(max_size), _idle_timeout_secs(idle_timeout_secs), _current_size(0) {}
+ : _max_size(max_size), _idle_timeout_secs(idle_timeout_secs), _current_size(0) {}
-std::shared_ptr ConnectionPool::acquire(const std::wstring& connStr, const py::dict& attrs_before) {
+std::shared_ptr ConnectionPool::acquire(const std::wstring& connStr,
+ const py::dict& attrs_before) {
std::vector> to_disconnect;
std::shared_ptr valid_conn = nullptr;
{
@@ -20,14 +23,18 @@ std::shared_ptr ConnectionPool::acquire(const std::wstring& connStr,
// Phase 1: Remove stale connections, collect for later disconnect
_pool.erase(std::remove_if(_pool.begin(), _pool.end(),
- [&](const std::shared_ptr& conn) {
- auto idle_time = std::chrono::duration_cast(now - conn->lastUsed()).count();
- if (idle_time > _idle_timeout_secs) {
- to_disconnect.push_back(conn);
- return true;
- }
- return false;
- }), _pool.end());
+ [&](const std::shared_ptr& conn) {
+ auto idle_time =
+ std::chrono::duration_cast(
+ now - conn->lastUsed())
+ .count();
+ if (idle_time > _idle_timeout_secs) {
+ to_disconnect.push_back(conn);
+ return true;
+ }
+ return false;
+ }),
+ _pool.end());
size_t pruned = before - _pool.size();
_current_size = (_current_size >= pruned) ? (_current_size - pruned) : 0;
@@ -65,7 +72,7 @@ std::shared_ptr ConnectionPool::acquire(const std::wstring& connStr,
try {
conn->disconnect();
} catch (const std::exception& ex) {
- LOG("Disconnect bad/expired connections failed: {}", ex.what());
+ LOG("Disconnect bad/expired connections failed: %s", ex.what());
}
}
return valid_conn;
@@ -76,10 +83,10 @@ void ConnectionPool::release(std::shared_ptr conn) {
if (_pool.size() < _max_size) {
conn->updateLastUsed();
_pool.push_back(conn);
- }
- else {
+ } else {
conn->disconnect();
- if (_current_size > 0) --_current_size;
+ if (_current_size > 0)
+ --_current_size;
}
}
@@ -97,7 +104,7 @@ void ConnectionPool::close() {
try {
conn->disconnect();
} catch (const std::exception& ex) {
- LOG("ConnectionPool::close: disconnect failed: {}", ex.what());
+ LOG("ConnectionPool::close: disconnect failed: %s", ex.what());
}
}
}
@@ -107,7 +114,8 @@ ConnectionPoolManager& ConnectionPoolManager::getInstance() {
return manager;
}
-std::shared_ptr ConnectionPoolManager::acquireConnection(const std::wstring& connStr, const py::dict& attrs_before) {
+std::shared_ptr ConnectionPoolManager::acquireConnection(const std::wstring& connStr,
+ const py::dict& attrs_before) {
std::lock_guard lock(_manager_mutex);
auto& pool = _pools[connStr];
@@ -118,7 +126,8 @@ std::shared_ptr ConnectionPoolManager::acquireConnection(const std::
return pool->acquire(connStr, attrs_before);
}
-void ConnectionPoolManager::returnConnection(const std::wstring& conn_str, const std::shared_ptr conn) {
+void ConnectionPoolManager::returnConnection(const std::wstring& conn_str,
+ const std::shared_ptr conn) {
std::lock_guard lock(_manager_mutex);
if (_pools.find(conn_str) != _pools.end()) {
_pools[conn_str]->release((conn));
diff --git a/mssql_python/pybind/connection/connection_pool.h b/mssql_python/pybind/connection/connection_pool.h
index dc2de5a8f..7a8a98c5c 100644
--- a/mssql_python/pybind/connection/connection_pool.h
+++ b/mssql_python/pybind/connection/connection_pool.h
@@ -1,25 +1,27 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
-// INFO|TODO - Note that is file is Windows specific right now. Making it arch agnostic will be
-// taken up in future.
+#ifndef MSSQL_PYTHON_CONNECTION_POOL_H_
+#define MSSQL_PYTHON_CONNECTION_POOL_H_
#pragma once
+#include "connection/connection.h"
+#include
#include
-#include
#include
#include
#include
-#include
-#include "connection.h"
+#include
-// Manages a fixed-size pool of reusable database connections for a single connection string
+// Manages a fixed-size pool of reusable database connections for a
+// single connection string
class ConnectionPool {
-public:
+ public:
ConnectionPool(size_t max_size, int idle_timeout_secs);
// Acquires a connection from the pool or creates a new one if under limit
- std::shared_ptr acquire(const std::wstring& connStr, const py::dict& attrs_before = py::dict());
+ std::shared_ptr acquire(const std::wstring& connStr,
+ const py::dict& attrs_before = py::dict());
// Returns a connection to the pool for reuse
void release(std::shared_ptr conn);
@@ -27,24 +29,25 @@ class ConnectionPool {
// Closes all connections in the pool, releasing resources
void close();
-private:
- size_t _max_size; // Maximum number of connections allowed
- int _idle_timeout_secs; // Idle time before connections are considered stale
+ private:
+ size_t _max_size; // Maximum number of connections allowed
+ int _idle_timeout_secs; // Idle time before connections are stale
size_t _current_size = 0;
std::deque> _pool; // Available connections
- std::mutex _mutex; // Mutex for thread-safe access
+ std::mutex _mutex; // Mutex for thread-safe access
};
// Singleton manager that handles multiple pools keyed by connection string
class ConnectionPoolManager {
-public:
+ public:
// Returns the singleton instance of the manager
static ConnectionPoolManager& getInstance();
void configure(int max_size, int idle_timeout);
// Gets a connection from the appropriate pool (creates one if none exists)
- std::shared_ptr acquireConnection(const std::wstring& conn_str, const py::dict& attrs_before = py::dict());
+ std::shared_ptr acquireConnection(const std::wstring& conn_str,
+ const py::dict& attrs_before = py::dict());
// Returns a connection to its original pool
void returnConnection(const std::wstring& conn_str, std::shared_ptr conn);
@@ -52,8 +55,8 @@ class ConnectionPoolManager {
// Closes all pools and their connections
void closePools();
-private:
- ConnectionPoolManager() = default;
+ private:
+ ConnectionPoolManager() = default;
~ConnectionPoolManager() = default;
// Map from connection string to connection pool
@@ -63,8 +66,10 @@ class ConnectionPoolManager {
std::mutex _manager_mutex;
size_t _default_max_size = 10;
int _default_idle_secs = 300;
-
+
// Prevent copying
ConnectionPoolManager(const ConnectionPoolManager&) = delete;
ConnectionPoolManager& operator=(const ConnectionPoolManager&) = delete;
};
+
+#endif // MSSQL_PYTHON_CONNECTION_POOL_H_
diff --git a/mssql_python/pybind/ddbc_bindings.cpp b/mssql_python/pybind/ddbc_bindings.cpp
index 49c7c7af4..2cf04fe0d 100644
--- a/mssql_python/pybind/ddbc_bindings.cpp
+++ b/mssql_python/pybind/ddbc_bindings.cpp
@@ -1,17 +1,20 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
-// INFO|TODO - Note that is file is Windows specific right now. Making it arch agnostic will be
+// INFO|TODO - Note that is file is Windows specific right now. Making it arch
+// agnostic will be
// taken up in beta release
#include "ddbc_bindings.h"
#include "connection/connection.h"
#include "connection/connection_pool.h"
+#include "logger_bridge.hpp"
#include
+#include // For std::memcpy
+#include
#include // std::setw, std::setfill
#include
#include // std::forward
-#include
//-------------------------------------------------------------------------------------------------
// Macro definitions
@@ -19,17 +22,91 @@
// This constant is not exposed via sql.h, hence define it here
#define SQL_SS_TIME2 (-154)
-
+#define SQL_SS_TIMESTAMPOFFSET (-155)
+#define SQL_C_SS_TIMESTAMPOFFSET (0x4001)
#define MAX_DIGITS_IN_NUMERIC 64
+#define SQL_MAX_NUMERIC_LEN 16
+#define SQL_SS_XML (-152)
-#define STRINGIFY_FOR_CASE(x) \
- case x: \
+#define STRINGIFY_FOR_CASE(x) \
+ case x: \
return #x
// Architecture-specific defines
#ifndef ARCHITECTURE
#define ARCHITECTURE "win64" // Default to win64 if not defined during compilation
#endif
+#define DAE_CHUNK_SIZE 8192
+#define SQL_MAX_LOB_SIZE 8000
+
+//-------------------------------------------------------------------------------------------------
+//-------------------------------------------------------------------------------------------------
+// Logging Infrastructure:
+// - LOG() macro: All diagnostic/debug logging at DEBUG level (single level)
+// - LOG_INFO/WARNING/ERROR: Higher-level messages for production
+// Uses printf-style formatting: LOG("Value: %d", x) -- __FILE__/__LINE__
+// embedded in macro
+//-------------------------------------------------------------------------------------------------
+namespace PythonObjectCache {
+static py::object datetime_class;
+static py::object date_class;
+static py::object time_class;
+static py::object decimal_class;
+static py::object uuid_class;
+static bool cache_initialized = false;
+
+void initialize() {
+ if (!cache_initialized) {
+ auto datetime_module = py::module_::import("datetime");
+ datetime_class = datetime_module.attr("datetime");
+ date_class = datetime_module.attr("date");
+ time_class = datetime_module.attr("time");
+
+ auto decimal_module = py::module_::import("decimal");
+ decimal_class = decimal_module.attr("Decimal");
+
+ auto uuid_module = py::module_::import("uuid");
+ uuid_class = uuid_module.attr("UUID");
+
+ cache_initialized = true;
+ }
+}
+
+py::object get_datetime_class() {
+ if (cache_initialized && datetime_class) {
+ return datetime_class;
+ }
+ return py::module_::import("datetime").attr("datetime");
+}
+
+py::object get_date_class() {
+ if (cache_initialized && date_class) {
+ return date_class;
+ }
+ return py::module_::import("datetime").attr("date");
+}
+
+py::object get_time_class() {
+ if (cache_initialized && time_class) {
+ return time_class;
+ }
+ return py::module_::import("datetime").attr("time");
+}
+
+py::object get_decimal_class() {
+ if (cache_initialized && decimal_class) {
+ return decimal_class;
+ }
+ return py::module_::import("decimal").attr("Decimal");
+}
+
+py::object get_uuid_class() {
+ if (cache_initialized && uuid_class) {
+ return uuid_class;
+ }
+ return py::module_::import("uuid").attr("UUID");
+}
+} // namespace PythonObjectCache
//-------------------------------------------------------------------------------------------------
// Class definitions
@@ -37,16 +114,26 @@
// Struct to hold parameter information for binding. Used by SQLBindParameter.
// This struct is shared between C++ & Python code.
+// Suppress -Wattributes warning for ParamInfo struct
+// The warning is triggered because pybind11 handles visibility attributes automatically,
+// and having additional attributes on the struct can cause conflicts on Linux with GCC
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wattributes"
+#endif
struct ParamInfo {
SQLSMALLINT inputOutputType;
SQLSMALLINT paramCType;
SQLSMALLINT paramSQLType;
SQLULEN columnSize;
SQLSMALLINT decimalDigits;
- // TODO: Reuse python buffer for large data using Python buffer protocol
- // Stores pointer to the python object that holds parameter value
- // py::object* dataPtr;
+ SQLLEN strLenOrInd = 0; // Required for DAE
+ bool isDAE = false; // Indicates if we need to stream
+ py::object dataPtr;
};
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
// Mirrors the SQL_NUMERIC_STRUCT. But redefined to replace val char array
// with std::string, because pybind doesn't allow binding char array.
@@ -54,43 +141,20 @@ struct ParamInfo {
struct NumericData {
SQLCHAR precision;
SQLSCHAR scale;
- SQLCHAR sign; // 1=pos, 0=neg
- std::uint64_t val; // 123.45 -> 12345
+ SQLCHAR sign; // 1=pos, 0=neg
+ std::string val; // 123.45 -> 12345
- NumericData() : precision(0), scale(0), sign(0), val(0) {}
+ NumericData() : precision(0), scale(0), sign(0), val(SQL_MAX_NUMERIC_LEN, '\0') {}
- NumericData(SQLCHAR precision, SQLSCHAR scale, SQLCHAR sign, std::uint64_t value)
- : precision(precision), scale(scale), sign(sign), val(value) {}
-};
-
-// Struct to hold data buffers and indicators for each column
-struct ColumnBuffers {
- std::vector> charBuffers;
- std::vector> wcharBuffers;
- std::vector> intBuffers;
- std::vector> smallIntBuffers;
- std::vector> realBuffers;
- std::vector> doubleBuffers;
- std::vector> timestampBuffers;
- std::vector> bigIntBuffers;
- std::vector> dateBuffers;
- std::vector> timeBuffers;
- std::vector> guidBuffers;
- std::vector> indicators;
-
- ColumnBuffers(SQLSMALLINT numCols, int fetchSize)
- : charBuffers(numCols),
- wcharBuffers(numCols),
- intBuffers(numCols),
- smallIntBuffers(numCols),
- realBuffers(numCols),
- doubleBuffers(numCols),
- timestampBuffers(numCols),
- bigIntBuffers(numCols),
- dateBuffers(numCols),
- timeBuffers(numCols),
- guidBuffers(numCols),
- indicators(numCols, std::vector(fetchSize)) {}
+ NumericData(SQLCHAR precision, SQLSCHAR scale, SQLCHAR sign, const std::string& valueBytes)
+ : precision(precision), scale(scale), sign(sign), val(SQL_MAX_NUMERIC_LEN, '\0') {
+ if (valueBytes.size() > SQL_MAX_NUMERIC_LEN) {
+ throw std::runtime_error(
+ "NumericData valueBytes size exceeds SQL_MAX_NUMERIC_LEN (16)");
+ }
+ // Copy binary data to buffer, remaining bytes stay zero-padded
+ std::memcpy(&val[0], valueBytes.data(), valueBytes.size());
+ }
};
//-------------------------------------------------------------------------------------------------
@@ -123,6 +187,14 @@ SQLBindColFunc SQLBindCol_ptr = nullptr;
SQLDescribeColFunc SQLDescribeCol_ptr = nullptr;
SQLMoreResultsFunc SQLMoreResults_ptr = nullptr;
SQLColAttributeFunc SQLColAttribute_ptr = nullptr;
+SQLGetTypeInfoFunc SQLGetTypeInfo_ptr = nullptr;
+SQLProceduresFunc SQLProcedures_ptr = nullptr;
+SQLForeignKeysFunc SQLForeignKeys_ptr = nullptr;
+SQLPrimaryKeysFunc SQLPrimaryKeys_ptr = nullptr;
+SQLSpecialColumnsFunc SQLSpecialColumns_ptr = nullptr;
+SQLStatisticsFunc SQLStatistics_ptr = nullptr;
+SQLColumnsFunc SQLColumns_ptr = nullptr;
+SQLGetInfoFunc SQLGetInfo_ptr = nullptr;
// Transaction APIs
SQLEndTranFunc SQLEndTran_ptr = nullptr;
@@ -135,6 +207,13 @@ SQLFreeStmtFunc SQLFreeStmt_ptr = nullptr;
// Diagnostic APIs
SQLGetDiagRecFunc SQLGetDiagRec_ptr = nullptr;
+// DAE APIs
+SQLParamDataFunc SQLParamData_ptr = nullptr;
+SQLPutDataFunc SQLPutData_ptr = nullptr;
+SQLTablesFunc SQLTables_ptr = nullptr;
+
+SQLDescribeParamFunc SQLDescribeParam_ptr = nullptr;
+
namespace {
const char* GetSqlCTypeAsString(const SQLSMALLINT cType) {
@@ -168,15 +247,17 @@ const char* GetSqlCTypeAsString(const SQLSMALLINT cType) {
}
std::string MakeParamMismatchErrorStr(const SQLSMALLINT cType, const int paramIndex) {
- std::string errorString =
- "Parameter's object type does not match parameter's C type. paramIndex - " +
- std::to_string(paramIndex) + ", C type - " + GetSqlCTypeAsString(cType);
+ std::string errorString = "Parameter's object type does not match "
+ "parameter's C type. paramIndex - " +
+ std::to_string(paramIndex) + ", C type - " +
+ GetSqlCTypeAsString(cType);
return errorString;
}
-// This function allocates a buffer of ParamType, stores it as a void* in paramBuffers for
-// book-keeping and then returns a ParamType* to the allocated memory.
-// ctorArgs are the arguments to ParamType's constructor used while creating/allocating ParamType
+// This function allocates a buffer of ParamType, stores it as a void* in
+// paramBuffers for book-keeping and then returns a ParamType* to the allocated
+// memory. ctorArgs are the arguments to ParamType's constructor used while
+// creating/allocating ParamType
template
ParamType* AllocateParamBuffer(std::vector>& paramBuffers,
CtorArgs&&... ctorArgs) {
@@ -204,39 +285,120 @@ std::string DescribeChar(unsigned char ch) {
}
}
-// Given a list of parameters and their ParamInfo, calls SQLBindParameter on each of them with
-// appropriate arguments
+// Given a list of parameters and their ParamInfo, calls SQLBindParameter on
+// each of them with appropriate arguments
SQLRETURN BindParameters(SQLHANDLE hStmt, const py::list& params,
- const std::vector& paramInfos,
- std::vector>& paramBuffers) {
- LOG("Starting parameter binding. Number of parameters: {}", params.size());
+ std::vector& paramInfos,
+ std::vector>& paramBuffers,
+ const std::string& charEncoding = "utf-8") {
+ LOG("BindParameters: Starting parameter binding for statement handle %p "
+ "with %zu parameters",
+ (void*)hStmt, params.size());
for (int paramIndex = 0; paramIndex < params.size(); paramIndex++) {
const auto& param = params[paramIndex];
- const ParamInfo& paramInfo = paramInfos[paramIndex];
- LOG("Binding parameter {} - C Type: {}, SQL Type: {}", paramIndex, paramInfo.paramCType, paramInfo.paramSQLType);
+ ParamInfo& paramInfo = paramInfos[paramIndex];
+ LOG("BindParameters: Processing param[%d] - C_Type=%d, SQL_Type=%d, "
+ "ColumnSize=%lu, DecimalDigits=%d, InputOutputType=%d",
+ paramIndex, paramInfo.paramCType, paramInfo.paramSQLType,
+ (unsigned long)paramInfo.columnSize, paramInfo.decimalDigits,
+ paramInfo.inputOutputType);
void* dataPtr = nullptr;
SQLLEN bufferLength = 0;
SQLLEN* strLenOrIndPtr = nullptr;
// TODO: Add more data types like money, guid, interval, TVPs etc.
switch (paramInfo.paramCType) {
- case SQL_C_CHAR:
+ case SQL_C_CHAR: {
+ if (!py::isinstance(param) && !py::isinstance(param) &&
+ !py::isinstance(param)) {
+ ThrowStdException(MakeParamMismatchErrorStr(paramInfo.paramCType, paramIndex));
+ }
+ if (paramInfo.isDAE) {
+ LOG("BindParameters: param[%d] SQL_C_CHAR - Using DAE "
+ "(Data-At-Execution) for large string streaming",
+ paramIndex);
+ dataPtr =
+ const_cast(reinterpret_cast(¶mInfos[paramIndex]));
+ strLenOrIndPtr = AllocateParamBuffer(paramBuffers);
+ *strLenOrIndPtr = SQL_LEN_DATA_AT_EXEC(0);
+ bufferLength = 0;
+ } else {
+ // Use Python's codec system to encode the string with specified encoding
+ std::string encodedStr;
+
+ if (py::isinstance(param)) {
+ // Encode Unicode string using the specified encoding
+ try {
+ py::object encoded = param.attr("encode")(charEncoding, "strict");
+ encodedStr = encoded.cast();
+ LOG("BindParameters: param[%d] SQL_C_CHAR - Encoded with '%s', "
+ "size=%zu bytes",
+ paramIndex, charEncoding.c_str(), encodedStr.size());
+ } catch (const py::error_already_set& e) {
+ LOG_ERROR("BindParameters: param[%d] SQL_C_CHAR - Failed to encode "
+ "with '%s': %s",
+ paramIndex, charEncoding.c_str(), e.what());
+ throw std::runtime_error(std::string("Failed to encode parameter ") +
+ std::to_string(paramIndex) +
+ " with encoding '" + charEncoding +
+ "': " + e.what());
+ }
+ } else {
+ // bytes/bytearray - use as-is (already encoded)
+ if (py::isinstance(param)) {
+ encodedStr = param.cast();
+ } else {
+ // bytearray
+ encodedStr = std::string(
+ reinterpret_cast(PyByteArray_AsString(param.ptr())),
+ PyByteArray_Size(param.ptr()));
+ }
+ LOG("BindParameters: param[%d] SQL_C_CHAR - Using raw bytes, size=%zu",
+ paramIndex, encodedStr.size());
+ }
+
+ std::string* strParam =
+ AllocateParamBuffer(paramBuffers, encodedStr);
+ dataPtr = const_cast(static_cast(strParam->c_str()));
+ bufferLength = strParam->size() + 1;
+ strLenOrIndPtr = AllocateParamBuffer(paramBuffers);
+ *strLenOrIndPtr = SQL_NTS;
+ }
+ break;
+ }
case SQL_C_BINARY: {
if (!py::isinstance(param) && !py::isinstance(param) &&
!py::isinstance(param)) {
ThrowStdException(MakeParamMismatchErrorStr(paramInfo.paramCType, paramIndex));
}
- std::string* strParam =
- AllocateParamBuffer(paramBuffers, param.cast());
- if (strParam->size() > 8192 /* TODO: Fix max length */) {
- ThrowStdException(
- "Streaming parameters is not yet supported. Parameter size"
- " must be less than 8192 bytes");
+ if (paramInfo.isDAE) {
+ // Deferred execution for VARBINARY(MAX)
+ LOG("BindParameters: param[%d] SQL_C_BINARY - Using DAE "
+ "for VARBINARY(MAX) streaming",
+ paramIndex);
+ dataPtr =
+ const_cast(reinterpret_cast(¶mInfos[paramIndex]));
+ strLenOrIndPtr = AllocateParamBuffer(paramBuffers);
+ *strLenOrIndPtr = SQL_LEN_DATA_AT_EXEC(0);
+ bufferLength = 0;
+ } else {
+ // small binary
+ std::string binData;
+ if (py::isinstance(param)) {
+ binData = param.cast();
+ } else {
+ // bytearray
+ binData = std::string(
+ reinterpret_cast(PyByteArray_AsString(param.ptr())),
+ PyByteArray_Size(param.ptr()));
+ }
+ std::string* binBuffer =
+ AllocateParamBuffer(paramBuffers, binData);
+ dataPtr = const_cast(static_cast(binBuffer->data()));
+ bufferLength = static_cast(binBuffer->size());
+ strLenOrIndPtr = AllocateParamBuffer(paramBuffers);
+ *strLenOrIndPtr = bufferLength;
}
- dataPtr = const_cast(static_cast(strParam->c_str()));
- bufferLength = strParam->size() + 1 /* null terminator */;
- strLenOrIndPtr = AllocateParamBuffer(paramBuffers);
- *strLenOrIndPtr = SQL_NTS;
break;
}
case SQL_C_WCHAR: {
@@ -244,53 +406,31 @@ SQLRETURN BindParameters(SQLHANDLE hStmt, const py::list& params,
!py::isinstance(param)) {
ThrowStdException(MakeParamMismatchErrorStr(paramInfo.paramCType, paramIndex));
}
- std::wstring* strParam =
- AllocateParamBuffer(paramBuffers, param.cast());
- if (strParam->size() > 4096 /* TODO: Fix max length */) {
- ThrowStdException(
- "Streaming parameters is not yet supported. Parameter size"
- " must be less than 8192 bytes");
- }
-
- // Log detailed parameter information
- LOG("SQL_C_WCHAR Parameter[{}]: Length={}, Content='{}'",
- paramIndex,
- strParam->size(),
- (strParam->size() <= 100
- ? WideToUTF8(std::wstring(strParam->begin(), strParam->end()))
- : WideToUTF8(std::wstring(strParam->begin(), strParam->begin() + 100)) + "..."));
-
- // Log each character's code point for debugging
- if (strParam->size() <= 20) {
- for (size_t i = 0; i < strParam->size(); i++) {
- unsigned char ch = static_cast((*strParam)[i]);
- LOG(" char[{}] = {} ({})", i, static_cast(ch), DescribeChar(ch));
- }
+ if (paramInfo.isDAE) {
+ // deferred execution
+ LOG("BindParameters: param[%d] SQL_C_WCHAR - Using DAE for "
+ "NVARCHAR(MAX) streaming",
+ paramIndex);
+ dataPtr =
+ const_cast(reinterpret_cast(¶mInfos[paramIndex]));
+ strLenOrIndPtr = AllocateParamBuffer(paramBuffers);
+ *strLenOrIndPtr = SQL_LEN_DATA_AT_EXEC(0);
+ bufferLength = 0;
+ } else {
+ // Normal small-string case
+ std::wstring* strParam =
+ AllocateParamBuffer(paramBuffers, param.cast());
+ LOG("BindParameters: param[%d] SQL_C_WCHAR - String "
+ "length=%zu characters, buffer=%zu bytes",
+ paramIndex, strParam->size(), strParam->size() * sizeof(SQLWCHAR));
+ std::vector* sqlwcharBuffer =
+ AllocateParamBuffer>(paramBuffers,
+ WStringToSQLWCHAR(*strParam));
+ dataPtr = sqlwcharBuffer->data();
+ bufferLength = sqlwcharBuffer->size() * sizeof(SQLWCHAR);
+ strLenOrIndPtr = AllocateParamBuffer(paramBuffers);
+ *strLenOrIndPtr = SQL_NTS;
}
-#if defined(__APPLE__) || defined(__linux__)
- // On macOS/Linux, we need special handling for wide characters
- // Create a properly encoded SQLWCHAR buffer for the parameter
- std::vector* sqlwcharBuffer =
- AllocateParamBuffer>(paramBuffers);
-
- // Reserve space and convert from wstring to SQLWCHAR array
- sqlwcharBuffer->resize(strParam->size() + 1, 0); // +1 for null terminator
-
- // Convert each wchar_t (4 bytes on macOS) to SQLWCHAR (2 bytes)
- for (size_t i = 0; i < strParam->size(); i++) {
- (*sqlwcharBuffer)[i] = static_cast((*strParam)[i]);
- }
- // Use the SQLWCHAR buffer instead of the wstring directly
- dataPtr = sqlwcharBuffer->data();
- bufferLength = (strParam->size() + 1) * sizeof(SQLWCHAR);
- LOG("macOS: Created SQLWCHAR buffer for parameter with size: {} bytes", bufferLength);
-#else
- // On Windows, wchar_t and SQLWCHAR are the same size, so direct cast works
- dataPtr = const_cast(static_cast(strParam->c_str()));
- bufferLength = (strParam->size() + 1 /* null terminator */) * sizeof(wchar_t);
-#endif
- strLenOrIndPtr = AllocateParamBuffer(paramBuffers);
- *strLenOrIndPtr = SQL_NTS;
break;
}
case SQL_C_BIT: {
@@ -305,11 +445,34 @@ SQLRETURN BindParameters(SQLHANDLE hStmt, const py::list& params,
if (!py::isinstance(param)) {
ThrowStdException(MakeParamMismatchErrorStr(paramInfo.paramCType, paramIndex));
}
- // TODO: This wont work for None values added to BINARY/VARBINARY columns. None values
- // of binary columns need to have C type = SQL_C_BINARY & SQL type = SQL_BINARY
+ SQLSMALLINT sqlType = paramInfo.paramSQLType;
+ SQLULEN columnSize = paramInfo.columnSize;
+ SQLSMALLINT decimalDigits = paramInfo.decimalDigits;
+ if (sqlType == SQL_UNKNOWN_TYPE) {
+ SQLSMALLINT describedType;
+ SQLULEN describedSize;
+ SQLSMALLINT describedDigits;
+ SQLSMALLINT nullable;
+ RETCODE rc = SQLDescribeParam_ptr(
+ hStmt, static_cast(paramIndex + 1), &describedType,
+ &describedSize, &describedDigits, &nullable);
+ if (!SQL_SUCCEEDED(rc)) {
+ LOG("BindParameters: SQLDescribeParam failed for "
+ "param[%d] (NULL parameter) - SQLRETURN=%d",
+ paramIndex, rc);
+ return rc;
+ }
+ sqlType = describedType;
+ columnSize = describedSize;
+ decimalDigits = describedDigits;
+ }
dataPtr = nullptr;
strLenOrIndPtr = AllocateParamBuffer(paramBuffers);
*strLenOrIndPtr = SQL_NULL_DATA;
+ bufferLength = 0;
+ paramInfo.paramSQLType = sqlType;
+ paramInfo.columnSize = columnSize;
+ paramInfo.decimalDigits = decimalDigits;
break;
}
case SQL_C_STINYINT:
@@ -321,8 +484,11 @@ SQLRETURN BindParameters(SQLHANDLE hStmt, const py::list& params,
}
int value = param.cast();
// Range validation for signed 16-bit integer
- if (value < std::numeric_limits |