Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions src/packageurl/contrib/purl2url.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
# Visit https://github.com/package-url/packageurl-python for support and
# download.

import re

from packageurl import PackageURL
from packageurl.contrib.route import NoRouteAvailable
from packageurl.contrib.route import Router
Expand Down Expand Up @@ -172,6 +174,83 @@ def build_gitlab_repo_url(purl):
return f"https://gitlab.com/{namespace}/{name}"


GIT_REPO_GENERIC = {
# cgit
(
r"git\.kernel\.org",
r"gitweb\.gentoo\.org",
): {
"commit_url": "https://{namespace}/{name}.git/commit/?id={version}",
"repo_url": "https://{namespace}/{name}.git",
},
# gitiles
(
r"android\.googlesource\.com",
r"aomedia\.googlesource\.com",
r"chromium\.googlesource\.com",
): {
"commit_url": "https://{namespace}/{name}/+/{version}",
"repo_url": "https://{namespace}/{name}",
},
# allura
(r"sourceforge\.net", r"forge-allura\.apache\.org"): {
"commit_url": "https://{namespace}/{name}/ci/{version}",
"repo_url": "https://{namespace}/{name}",
},
# gitweb
(
r"gcc\.gnu\.org",
r"git\.postgresql\.org",
r"sourceware\.org/git",
): {
"commit_url": "https://{namespace}/?p={name}.git;a=commit;h={version}",
"repo_url": "https://{namespace}/?p={name}.git",
},
# gitea / forgejo
(
r"codeberg\.org",
r"gitea\.com",
): {
"commit_url": "https://{namespace}/{name}/commit/{version}",
"repo_url": "https://{namespace}/{name}",
},
# sub gitlab ( excludes gitlab.com )
(
r"git\.codelinaro\.org.*",
r"gitlab\.(?!com\b)[^/]+",
): {
"commit_url": "https://{namespace}/{name}/-/commit/{version}",
"repo_url": "https://{namespace}/{name}",
},
}


@repo_router.route("pkg:generic/.*")
def build_generic_repo_url(purl):
"""
Return a Commit URL from the `purl` string.
"""
purl_data = PackageURL.from_string(purl)
name = purl_data.name
namespace = purl_data.namespace
version = purl_data.version

if not (namespace and name):
return

for patterns, template_url in GIT_REPO_GENERIC.items():
for pattern in patterns:
if not re.match(pattern, namespace):
continue

if version:
return template_url["commit_url"].format(
namespace=namespace, name=name, version=version
)
return template_url["repo_url"].format(namespace=namespace, name=name)
return


@repo_router.route("pkg:(gem|rubygems)/.*")
def build_rubygems_repo_url(purl):
"""
Expand Down
230 changes: 230 additions & 0 deletions src/packageurl/contrib/url2purl.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,236 @@ def build_bitbucket_purl(url):
)


def build_route_regex(domain_patterns, path_suffix="/.*"):
"""
Build a route regex from a list of domains
"""
domain_pattern = "|".join(domain_patterns)
return rf"https?://({domain_pattern}){path_suffix}"


SUB_GITLAB_DOMAINS = [r"git\.codelinaro\.org", r"gitlab\.(?!com\b)[^/]+"]
SUB_GITLAB_ROUTE_REGEX = build_route_regex(SUB_GITLAB_DOMAINS)


@purl_router.route(SUB_GITLAB_ROUTE_REGEX)
def build_gitlab_sub_purl(url):
"""
Return a PackageURL object from a GitLab Sub domains commit URL
For example:
https://git.codelinaro.org/linaro/qcom/project/-/commit/a40a9732c840e5a324fba78b0ff7980b497c3831
"""

gitlab_sub_commit_pattern = (
r"^https?://"
r"(?P<domain>[^/]+)/"
r"(?P<namespace>.+?)/(?P<name>(?!-/)[^/]+)/(?:-/)?commit/(?P<version>[0-9a-fA-F]{7,64})/?$"
)

commit_match = re.search(gitlab_sub_commit_pattern, url)
if commit_match:
domain = commit_match.group("domain")
raw_namespace = commit_match.group("namespace").strip("/")
namespace = f"{domain}/{raw_namespace}"

return PackageURL(
type="generic",
namespace=namespace,
name=commit_match.group("name"),
version=commit_match.group("version"),
)

return None


GITEA_DOMAINS = ["codeberg\.org", "gitea\.com"]
GITEA_ROUTE_REGEX = build_route_regex(GITEA_DOMAINS)


@purl_router.route(GITEA_ROUTE_REGEX)
def build_gitea_purl(url):
"""
Return a PackageURL object from a gitea/forgejo url
For example:
https://codeberg.org/alpinelinux/aports/commit/a40a9732c840e5a324fba78b0ff7980b497c3831
https://gitea.com/htc47/entur/commit/271b852cfb761a1fe257aa0f0a12ff38bd8bfd1c
"""

gitea_commit_pattern = (
r"^https?://"
r"(?P<domain>[^/]+)/"
r"(?P<namespace>[^/]+)/(?P<name>[^/]+)/commit/(?P<version>[0-9a-fA-F]{7,64})/?$"
)

commit_match = re.search(gitea_commit_pattern, url)
if commit_match:
domain = commit_match.group("domain")
namespace = f"{domain}/{commit_match.group('namespace')}"

return PackageURL(
type="generic",
namespace=namespace,
name=commit_match.group("name"),
version=commit_match.group("version"),
)

return None


CGIT_DOMAINS = [r"git\.kernel\.org", r"gitweb\.gentoo\.org", "cgit\.git\.savannah\.gnu\.org"]
CGIT_ROUTE_REGEX = build_route_regex(CGIT_DOMAINS)


@purl_router.route(CGIT_ROUTE_REGEX)
def build_cgit_purl(url):
"""
Return a PackageURL object from a cgit url
For example:
https://git.kernel.org/pub/scm/bluetooth/bluez.git/commit/?id=74770b1fd2be612f9c2cf807db81fcdcc35e6560
https://cgit.git.savannah.gnu.org/cgit/uddf.git/commit/?id=98c41e131dc952aee43d4ec392b80ca4c426be8d
https://gitweb.gentoo.org/dev/darkside.git/commit/?id=8d4b0836f3b6ab7075212926d9aad0b50246d825
"""

cgit_project_pattern = (
r"^https?://"
r"(?P<domain>[^/]+)/"
r"(?P<namespace>.+)/"
r"(?P<name>[^/]+?)"
r"(?:\.git)?"
r"/commit/"
r"(?:[^?]+)?"
r"\?id="
r"(?P<version>[0-9a-fA-F]{7,64})/?$"
)

commit_match = re.search(cgit_project_pattern, url)
if commit_match:
domain = commit_match.group("domain")
namespace = f"{domain}/{commit_match.group('namespace')}"
return PackageURL(
type="generic",
namespace=namespace,
name=commit_match.group("name"),
version=commit_match.group("version"),
qualifiers={},
subpath="",
)


GITILES_DOMAINS = [
r"android\.googlesource\.com",
r"aomedia\.googlesource\.com",
r"chromium\.googlesource\.com",
]
GITILES_ROUTE_REGEX = build_route_regex(GITILES_DOMAINS)


@purl_router.route(GITILES_ROUTE_REGEX)
def build_gitiles_purl(url):
"""
Return a PackageURL object from Gitiles url
For example:
https://android.googlesource.com/platform/packages/apps/Settings/+/2968ccc911956fa5813a9a6a5e5c8970e383a60f
https://aomedia.googlesource.com/libavifinfo/+/43716e9c34d3389b4882fbd1a81c04543ed04fe3
"""

gitiles_project_pattern = (
r"^https?://"
r"(?P<domain>[^/]+)/"
r"(?:(?P<namespace>.+)/)?"
r"(?P<name>[^/]+?)"
r"/\+/"
r"(?P<version>[0-9a-fA-F]{7,64})/?$"
)

match = re.search(gitiles_project_pattern, url)
if match:
raw_namespace = match.group("namespace")
domain = match.group("domain")
namespace = f"{domain}/{raw_namespace}" if raw_namespace else domain
return PackageURL(
type="generic",
namespace=namespace,
name=match.group("name"),
version=match.group("version"),
qualifiers={},
subpath="",
)


ALLURA_DOMAINS = [r"sourceforge\.net", r"forge-allura\.apache\.org"]
ALLURA_ROUTE_REGEX = build_route_regex(ALLURA_DOMAINS, "/p/.*")


@purl_router.route(ALLURA_ROUTE_REGEX)
def build_allura_purl(url):
"""
Return a PackageURL object from an Apache Allura url (e.g., SourceForge).
For example:
https://sourceforge.net/p/djvu/djvulibre-git/ci/e15d51510048927f172f1bf1f27ede65907d940d
https://sourceforge.net/p/infrarecorder/code/ci/9361b6f267e7b1c1576c48f6dac6dec18d8a93e0/
https://forge-allura.apache.org/p/allura/git/ci/674e070e5ca7db7c75cf61d8efd2a3e3e49bd946/
"""

allura_pattern = (
r"^https?://"
r"(?P<domain>[^/]+)"
r"(?P<namespace>.+)/"
r"(?P<name>[^/]+?)"
r"/ci/"
r"(?P<version>[0-9a-fA-F]{7,64})/?$"
)

commit_match = re.search(allura_pattern, url)
if commit_match:
domain = commit_match.group("domain")
namespace = f"{domain}/{commit_match.group('namespace')}"
return PackageURL(
type="generic",
namespace=namespace,
name=commit_match.group("name"),
version=commit_match.group("version"),
qualifiers={},
subpath="",
)


GITWEB_DOMAINS = [r"gcc\.gnu\.org/git", r"git\.postgresql\.org/gitweb", "sourceware\.org/git"]
GITWEB_ROUTE_REGEX = build_route_regex(GITWEB_DOMAINS)


@purl_router.route(GITWEB_ROUTE_REGEX)
def build_gitweb_purl(url):
"""
Return a PackageURL object from a Gitweb url.
For example:
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=82cc94e5fb69d1c45a386f83798251de5bff9339
https://git.postgresql.org/gitweb/?p=hamn.git;a=commit;h=a796b71a5b3fe7f751f1086a08cb114b9877dea2
https://sourceware.org/git/?p=glibc.git;a=commit;h=dedebed24f77762eea7d3c5ed2739a90a4d60461
"""

gitweb_pattern = (
r"^https?://"
r"(?P<namespace>[^?]+?)"
r"/?(?=\?)"
r"(?=.*[?;&]p=(?P<name>[^;&]+?)(?:\.git)?(?:[;&]|$))"
r"(?=.*[?;&]h=(?P<version>[0-9a-fA-F]{7,64}))"
)

commit_match = re.search(gitweb_pattern, url)
if commit_match:
namespace = commit_match.group("namespace")
name = commit_match.group("name")
return PackageURL(
type="generic",
namespace=namespace,
name=name,
version=commit_match.group("version"),
qualifiers={},
subpath="",
)


@purl_router.route("https?://gitlab\\.com/(?!.*/archive/).*")
def build_gitlab_purl(url):
"""
Expand Down
26 changes: 25 additions & 1 deletion tests/contrib/data/url2purl.json
Original file line number Diff line number Diff line change
Expand Up @@ -277,5 +277,29 @@
"https://packagemanager.rstudio.com/cran/2022-06-23/src/contrib/curl_4.3.2.tar.gz": "pkg:cran/curl@4.3.2?download_url=https://packagemanager.rstudio.com/cran/2022-06-23/src/contrib/curl_4.3.2.tar.gz",
"https://github.com/TG1999/first_repo/commit/98e516011d6e096e25247b82fc5f196bbeecff10": "pkg:github/tg1999/first_repo@98e516011d6e096e25247b82fc5f196bbeecff10",
"https://gitlab.com/TG1999/first_repo/-/commit/bf04e5f289885cf2f20a92b387bcc6df33e30809": "pkg:gitlab/tg1999/first_repo@bf04e5f289885cf2f20a92b387bcc6df33e30809",
"https://bitbucket.org/TG1999/first_repo/commits/16a60c4a74ef477cd8c16ca82442eaab2fbe8c86": "pkg:bitbucket/tg1999/first_repo@16a60c4a74ef477cd8c16ca82442eaab2fbe8c86"

"https://git.codelinaro.org/clo/qsdk/oss/kernel/linux-msm/-/commit/a5f07894058c4198f61e533d727b343c5be879b0": "pkg:generic/git.codelinaro.org/clo/qsdk/oss/kernel/linux-msm@a5f07894058c4198f61e533d727b343c5be879b0",
"https://gitlab.gnome.org/GNOME/gimp/-/commit/112a5e038f0646eae5ae314988ec074433d2b365": "pkg:generic/gitlab.gnome.org/GNOME/gimp@112a5e038f0646eae5ae314988ec074433d2b365",
"https://gitlab.freedesktop.org/poppler/poppler/-/commit/8677500399fc2548fa816b619580c2c07915a98c": "pkg:generic/gitlab.freedesktop.org/poppler/poppler@8677500399fc2548fa816b619580c2c07915a98c",
"https://gitea.com/htc47/entur/commit/271b852cfb761a1fe257aa0f0a12ff38bd8bfd1c": "pkg:generic/gitea.com/htc47/entur@271b852cfb761a1fe257aa0f0a12ff38bd8bfd1c",
"https://codeberg.org/alpinelinux/aports/commit/a40a9732c840e5a324fba78b0ff7980b497c3831": "pkg:generic/codeberg.org/alpinelinux/aports@a40a9732c840e5a324fba78b0ff7980b497c3831",

"https://git.kernel.org/pub/scm/utils/b4/b4.git/commit/?id=477734000555ffc24bf873952e40367deee26f17": "pkg:generic/git.kernel.org/pub/scm/utils/b4/b4@477734000555ffc24bf873952e40367deee26f17",
"https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/net/core/sock.c?id=9d538fa60bad4f7b23193c89e843797a1cf71ef3": "pkg:generic/git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux@9d538fa60bad4f7b23193c89e843797a1cf71ef3",
"https://cgit.git.savannah.gnu.org/cgit/uddf.git/commit/?id=98c41e131dc952aee43d4ec392b80ca4c426be8d": "pkg:generic/cgit.git.savannah.gnu.org/cgit/uddf@98c41e131dc952aee43d4ec392b80ca4c426be8d",
"https://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git/commit/?id=7457fe9541b5162f285454947448d553a5d5a531": "pkg:generic/git.kernel.org/pub/scm/virt/kvm/mst/qemu@7457fe9541b5162f285454947448d553a5d5a531",

"https://gitweb.gentoo.org/dev/darkside.git/commit/?id=8d4b0836f3b6ab7075212926d9aad0b50246d825": "pkg:generic/gitweb.gentoo.org/dev/darkside@8d4b0836f3b6ab7075212926d9aad0b50246d825",
"https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=f73ae47c5e48010f504f3f55567152258f3013ae": "pkg:generic/gitweb.gentoo.org/repo/gentoo@f73ae47c5e48010f504f3f55567152258f3013ae",
"https://android.googlesource.com/platform/frameworks/base/+/b4da73a935a8c906ff5df562155824d63ac849ab": "pkg:generic/android.googlesource.com/platform/frameworks/base@b4da73a935a8c906ff5df562155824d63ac849ab",
"https://android.googlesource.com/device/generic/vulkan-cereal/+/240dedcb0fa917b3d2dcc4a9d4c332697c5e48a0": "pkg:generic/android.googlesource.com/device/generic/vulkan-cereal@240dedcb0fa917b3d2dcc4a9d4c332697c5e48a0",
"https://chromium.googlesource.com/aosp/platform/external/dbus-binding-generator/+/7574c671c7c64aab957dc507fffff3c8c38dc7cb": "pkg:generic/chromium.googlesource.com/aosp/platform/external/dbus-binding-generator@7574c671c7c64aab957dc507fffff3c8c38dc7cb",
"https://aomedia.googlesource.com/libavifinfo/+/43716e9c34d3389b4882fbd1a81c04543ed04fe3": "pkg:generic/aomedia.googlesource.com/libavifinfo@43716e9c34d3389b4882fbd1a81c04543ed04fe3",

"https://sourceforge.net/p/djvu/djvulibre-git/ci/e15d51510048927f172f1bf1f27ede65907d940d": "pkg:generic/sourceforge.net/p/djvu/djvulibre-git@e15d51510048927f172f1bf1f27ede65907d940d",
"https://sourceforge.net/p/expat/code_git/ci/f0bec73b018caa07d3e75ec8dd967f3785d71bde": "pkg:generic/sourceforge.net/p/expat/code_git@f0bec73b018caa07d3e75ec8dd967f3785d71bde",
"https://forge-allura.apache.org/p/allura/git/ci/674e070e5ca7db7c75cf61d8efd2a3e3e49bd946": "pkg:generic/forge-allura.apache.org/p/allura/git@674e070e5ca7db7c75cf61d8efd2a3e3e49bd946",
"https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=82cc94e5fb69d1c45a386f83798251de5bff9339": "pkg:generic/gcc.gnu.org/git/gcc@82cc94e5fb69d1c45a386f83798251de5bff9339",
"https://git.postgresql.org/gitweb/?p=hamn.git;a=commit;h=a796b71a5b3fe7f751f1086a08cb114b9877dea2": "pkg:generic/git.postgresql.org/gitweb/hamn@a796b71a5b3fe7f751f1086a08cb114b9877dea2",
"https://sourceware.org/git/?p=bunsen.git;a=commit;h=6c55933f37099517e050c923527b0b2267e1deed": "pkg:generic/sourceware.org/git/bunsen@6c55933f37099517e050c923527b0b2267e1deed"
}
Loading
Loading