From a6936c725633178eed26c9a10e9b97649fc3b451 Mon Sep 17 00:00:00 2001 From: Vincent Vatelot Date: Fri, 30 Jan 2026 14:01:56 +0000 Subject: [PATCH] chore(devcontainer): Update devcontainer configuration and enhance Python environment - Renamed the devcontainer to "EcoIndex Python" and updated the base image version. - Removed unnecessary extensions and added essential Python extensions for linting and formatting. - Configured VSCode settings for Python formatting and linting. - Added a new function in tasks.py to convert URLs with emoji domains to Punycode for compatibility with requests. - Updated the add_ecoindex_analysis_task function to use the new URL conversion function. --- .devcontainer/devcontainer.json | 53 ++++++++++-------------- bases/ecoindex/backend/routers/tasks.py | 55 +++++++++++++++++++++++-- 2 files changed, 74 insertions(+), 34 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 3eb76fb..82341cd 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,42 +1,33 @@ { - "name": "Ecoindex python full stack dev container", - "image": "mcr.microsoft.com/devcontainers/python:3.12", - "postCreateCommand": "pipx install poetry==1.8.5 && poetry self add poetry-multiproject-plugin && poetry self add poetry-polylith-plugin", + "name": "EcoIndex Python", + "image": "mcr.microsoft.com/devcontainers/python:1-3.12", "features": { "ghcr.io/audacioustux/devcontainers/taskfile": {}, - "ghcr.io/devcontainers/features/docker-in-docker:2": { - "installDockerBuildx": true, - "version": "latest", - "dockerDashComposeVersion": "v2" - } + "ghcr.io/devcontainers/features/docker-in-docker:2": {} }, - "forwardPorts": [ - 8000 - ], "customizations": { "vscode": { "extensions": [ - "-ms-python.autopep8", - "adrianwilczynski.alpine-js-intellisense", - "adrianwilczynski.alpine-js-intellisense", - "bierner.markdown-emoji", - "charliermarsh.ruff", - "Codeium.codeium", - "github.vscode-github-actions", - "Gruntfuggly.todo-tree", - "mhutchie.git-graph", - "ms-azuretools.vscode-docker", - "ms-python.mypy-type-checker", "ms-python.python", - "Perkovec.emoji", - "samuelcolvin.jinjahtml", + "ms-python.vscode-pylance", + "ms-python.black-formatter", + "charliermarsh.ruff", "tamasfe.even-better-toml", - "ue.alphabetical-sorter", - "yzhang.markdown-all-in-one", - "esbenp.prettier-vscode", - "ms-pyright.pyright", - "-ms-python.vscode-pylance" - ] + "ms-python.mypy-type-checker", + "mhutchie.git-graph" + ], + "settings": { + "python.defaultInterpreterPath": "/usr/local/bin/python", + "python.formatting.provider": "black", + "python.linting.enabled": true, + "python.linting.ruffEnabled": true, + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.organizeImports": "explicit" + } + } } - } + }, + "postCreateCommand": "pipx install poetry==1.8.5 && poetry self add poetry-multiproject-plugin && poetry self add poetry-polylith-plugin", + "remoteUser": "vscode" } \ No newline at end of file diff --git a/bases/ecoindex/backend/routers/tasks.py b/bases/ecoindex/backend/routers/tasks.py index e4885f8..4ba1944 100644 --- a/bases/ecoindex/backend/routers/tasks.py +++ b/bases/ecoindex/backend/routers/tasks.py @@ -1,6 +1,8 @@ from json import loads from typing import Annotated +from urllib.parse import urlparse, urlunparse +import idna import requests from celery.result import AsyncResult from ecoindex.backend.dependencies.validation import validate_api_key_batch @@ -26,6 +28,48 @@ router = APIRouter(prefix="/v1/tasks/ecoindexes", tags=["Tasks"]) +def convert_url_to_punycode(url: str) -> str: + """ + Convert an URL with emoji domain (or any Unicode domain) to Punycode. + This makes the URL compatible with requests library. + + Args: + url: The URL string that may contain Unicode characters in the domain + + Returns: + The URL with the domain converted to Punycode + """ + parsed = urlparse(url) + + # Extract the hostname (netloc may contain port, so we need to handle that) + hostname = parsed.hostname + if not hostname: + return url + + try: + # Convert the hostname to Punycode + hostname_punycode = idna.encode(hostname).decode("ascii") + + # Reconstruct the netloc with the converted hostname + if parsed.port: + netloc = f"{hostname_punycode}:{parsed.port}" + else: + netloc = hostname_punycode + + # Reconstruct the URL with the converted hostname + return urlunparse(( + parsed.scheme, + netloc, + parsed.path, + parsed.params, + parsed.query, + parsed.fragment, + )) + except (idna.IDNAError, UnicodeError): + # If conversion fails, return the original URL + return url + + @router.post( name="Add new ecoindex analysis task to the waiting queue", path="/", @@ -46,7 +90,8 @@ async def add_ecoindex_analysis_task( Body( default=..., title="Web page to analyze defined by its url and its screen resolution", - example=WebPage(url="https://www.ecoindex.fr", width=1920, height=1080), + example=WebPage(url="https://www.ecoindex.fr", + width=1920, height=1080), ), ], custom_headers: Annotated[ @@ -64,7 +109,8 @@ async def add_ecoindex_analysis_task( ) if remaining_quota: - response.headers["X-Remaining-Daily-Requests"] = str(remaining_quota - 1) + response.headers["X-Remaining-Daily-Requests"] = str( + remaining_quota - 1) if ( Settings().EXCLUDED_HOSTS @@ -78,9 +124,12 @@ async def add_ecoindex_analysis_task( ua = EcoindexScraper.get_user_agent() headers = {**custom_headers, **ua.headers.get()} + # Convert URL to Punycode to handle emoji domains and other Unicode domains + url_for_request = convert_url_to_punycode(str(web_page.url)) + try: r = requests.head( - url=web_page.url, + url=url_for_request, timeout=5, headers=headers, )