Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 22 additions & 31 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,42 +1,33 @@
{
"name": "Ecoindex python full stack dev container",
"image": "mcr.microsoft.com/devcontainers/python:3.12",
"postCreateCommand": "pipx install poetry==1.8.5 && poetry self add poetry-multiproject-plugin && poetry self add poetry-polylith-plugin",
"name": "EcoIndex Python",
"image": "mcr.microsoft.com/devcontainers/python:1-3.12",
"features": {
"ghcr.io/audacioustux/devcontainers/taskfile": {},
"ghcr.io/devcontainers/features/docker-in-docker:2": {
"installDockerBuildx": true,
"version": "latest",
"dockerDashComposeVersion": "v2"
}
"ghcr.io/devcontainers/features/docker-in-docker:2": {}
},
"forwardPorts": [
8000
],
"customizations": {
"vscode": {
"extensions": [
"-ms-python.autopep8",
"adrianwilczynski.alpine-js-intellisense",
"adrianwilczynski.alpine-js-intellisense",
"bierner.markdown-emoji",
"charliermarsh.ruff",
"Codeium.codeium",
"github.vscode-github-actions",
"Gruntfuggly.todo-tree",
"mhutchie.git-graph",
"ms-azuretools.vscode-docker",
"ms-python.mypy-type-checker",
"ms-python.python",
"Perkovec.emoji",
"samuelcolvin.jinjahtml",
"ms-python.vscode-pylance",
"ms-python.black-formatter",
"charliermarsh.ruff",
"tamasfe.even-better-toml",
"ue.alphabetical-sorter",
"yzhang.markdown-all-in-one",
"esbenp.prettier-vscode",
"ms-pyright.pyright",
"-ms-python.vscode-pylance"
]
"ms-python.mypy-type-checker",
"mhutchie.git-graph"
],
"settings": {
"python.defaultInterpreterPath": "/usr/local/bin/python",
"python.formatting.provider": "black",
"python.linting.enabled": true,
"python.linting.ruffEnabled": true,
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
"source.organizeImports": "explicit"
}
}
}
}
},
"postCreateCommand": "pipx install poetry==1.8.5 && poetry self add poetry-multiproject-plugin && poetry self add poetry-polylith-plugin",
"remoteUser": "vscode"
}
55 changes: 52 additions & 3 deletions bases/ecoindex/backend/routers/tasks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from json import loads
from typing import Annotated
from urllib.parse import urlparse, urlunparse

import idna
import requests
from celery.result import AsyncResult
from ecoindex.backend.dependencies.validation import validate_api_key_batch
Expand All @@ -26,6 +28,48 @@
router = APIRouter(prefix="/v1/tasks/ecoindexes", tags=["Tasks"])


def convert_url_to_punycode(url: str) -> str:
"""
Convert an URL with emoji domain (or any Unicode domain) to Punycode.
This makes the URL compatible with requests library.

Args:
url: The URL string that may contain Unicode characters in the domain

Returns:
The URL with the domain converted to Punycode
"""
parsed = urlparse(url)

# Extract the hostname (netloc may contain port, so we need to handle that)
hostname = parsed.hostname
if not hostname:
return url

try:
# Convert the hostname to Punycode
hostname_punycode = idna.encode(hostname).decode("ascii")

# Reconstruct the netloc with the converted hostname
if parsed.port:
netloc = f"{hostname_punycode}:{parsed.port}"
else:
netloc = hostname_punycode

# Reconstruct the URL with the converted hostname
return urlunparse((
parsed.scheme,
netloc,
parsed.path,
parsed.params,
parsed.query,
parsed.fragment,
))
except (idna.IDNAError, UnicodeError):
# If conversion fails, return the original URL
return url


@router.post(
name="Add new ecoindex analysis task to the waiting queue",
path="/",
Expand All @@ -46,7 +90,8 @@ async def add_ecoindex_analysis_task(
Body(
default=...,
title="Web page to analyze defined by its url and its screen resolution",
example=WebPage(url="https://www.ecoindex.fr", width=1920, height=1080),
example=WebPage(url="https://www.ecoindex.fr",
width=1920, height=1080),
),
],
custom_headers: Annotated[
Expand All @@ -64,7 +109,8 @@ async def add_ecoindex_analysis_task(
)

if remaining_quota:
response.headers["X-Remaining-Daily-Requests"] = str(remaining_quota - 1)
response.headers["X-Remaining-Daily-Requests"] = str(
remaining_quota - 1)

if (
Settings().EXCLUDED_HOSTS
Expand All @@ -78,9 +124,12 @@ async def add_ecoindex_analysis_task(
ua = EcoindexScraper.get_user_agent()
headers = {**custom_headers, **ua.headers.get()}

# Convert URL to Punycode to handle emoji domains and other Unicode domains
url_for_request = convert_url_to_punycode(str(web_page.url))

try:
r = requests.head(
url=web_page.url,
url=url_for_request,
timeout=5,
headers=headers,
)
Expand Down
Loading