From 6085edc434c28c4490b425980e1766f231947e2e Mon Sep 17 00:00:00 2001 From: Tom Kralidis Date: Tue, 21 Oct 2025 20:52:01 -0400 Subject: [PATCH 1/2] fix support for custom formatters (#1997) --- docs/source/configuration.rst | 4 ++ docs/source/plugins.rst | 5 +- pygeoapi/__init__.py | 2 +- pygeoapi/api/__init__.py | 26 +++++++-- pygeoapi/api/itemtypes.py | 57 ++++++++++++++----- pygeoapi/formatter/base.py | 19 ++++--- pygeoapi/formatter/csv_.py | 6 +- .../schemas/config/pygeoapi-config-0.x.yml | 19 +++++++ pygeoapi/util.py | 22 +++++++ tests/api/test_api.py | 4 +- tests/api/test_itemtypes.py | 53 ++++++++++++----- 11 files changed, 168 insertions(+), 49 deletions(-) diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst index 6f8e3f3ac..11c39255b 100644 --- a/docs/source/configuration.rst +++ b/docs/source/configuration.rst @@ -257,6 +257,10 @@ default. storage_crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 # optional CRS in which data is stored, default: as 'crs' field storage_crs_coordinate_epoch: 2017.23 # optional, if storage_crs is a dynamic coordinate reference system always_xy: false # optional should CRS respect axis ordering + formatters: # list of 1..n formatter definitions + - name: path.to.formatter # Python path of formatter definition + attachment: true # whether or not to provide as an attachment or normal response + geom: false # whether or not to include geometry hello-world: # name of process type: process # REQUIRED (collection, process, or stac-collection) diff --git a/docs/source/plugins.rst b/docs/source/plugins.rst index dcaa48435..4d02699ca 100644 --- a/docs/source/plugins.rst +++ b/docs/source/plugins.rst @@ -435,7 +435,10 @@ The below template provides a minimal example (let's call the file ``mycooljsonf """Inherit from parent class""" super().__init__({'name': 'cooljson', 'geom': None}) - self.mimetype = 'application/json; subtype:mycooljson' + self.f = 'cooljson' # f= value + self.mimetype = 'application/json; subtype:mycooljson' # response media type + self.attachment = False # whether to provide as an attachment (default False) + self.extension = 'cooljson' # filename extension if providing as an attachment def write(self, options={}, data=None): """custom writer""" diff --git a/pygeoapi/__init__.py b/pygeoapi/__init__.py index 95b975d8e..f62d8c5a2 100644 --- a/pygeoapi/__init__.py +++ b/pygeoapi/__init__.py @@ -68,7 +68,7 @@ def decorator(click_group): try: click_group.add_command(entry_point.load()) except Exception as err: - print(err) + click.echo(err) return click_group return decorator diff --git a/pygeoapi/api/__init__.py b/pygeoapi/api/__init__.py index 34ddcd3ec..363e4fbad 100644 --- a/pygeoapi/api/__init__.py +++ b/pygeoapi/api/__init__.py @@ -68,7 +68,8 @@ TEMPLATESDIR, UrlPrefetcher, dategetter, filter_dict_by_key_value, filter_providers_by_type, get_api_rules, get_base_url, get_provider_by_type, get_provider_default, get_typed_value, - render_j2_template, to_json, get_choice_from_headers, get_from_headers + render_j2_template, to_json, get_choice_from_headers, get_from_headers, + get_dataset_formatters ) LOGGER = logging.getLogger(__name__) @@ -319,11 +320,14 @@ def _get_locale(self, headers: dict, return raw, default_locale - def _get_format(self, headers: dict) -> Union[str, None]: + def _get_format(self, headers: dict, + extra_formats: dict = {}) -> Union[str, None]: """ Get `Request` format type from query parameters or headers. :param headers: Dict of Request headers + :param extra_formats: Dict of extra dataset specific formats + :returns: format value or None if not found/specified """ @@ -339,10 +343,14 @@ def _get_format(self, headers: dict) -> Union[str, None]: if types_ is None: return - (fmts, mimes) = zip(*FORMAT_TYPES.items()) + merged_format_types = FORMAT_TYPES | extra_formats + + (fmts, mimes) = zip(*merged_format_types.items()) + mimes2 = [m.split(';')[0] for m in mimes] + for type_ in types_: - if type_ in mimes: - idx_ = mimes.index(type_) + if type_ in mimes2: + idx_ = mimes2.index(type_) return fmts[idx_] @property @@ -1042,6 +1050,14 @@ def describe_collections(api: API, request: APIRequest, 'href': f'{api.get_collections_url()}/{k}/items?f={F_HTML}' # noqa }) + for key, value in get_dataset_formatters(v).items(): + collection['links'].append({ + 'type': value.mimetype, + 'rel': 'items', + 'title': l10n.translate(f'Items as {key}', request.locale), # noqa + 'href': f'{api.get_collections_url()}/{k}/items?f={value.f}' # noqa + }) + # OAPIF Part 2 - list supported CRSs and StorageCRS if collection_data_type in ['edr', 'feature']: collection['crs'] = get_supported_crs_list(collection_data) diff --git a/pygeoapi/api/itemtypes.py b/pygeoapi/api/itemtypes.py index 2810e7f09..0caa8808c 100644 --- a/pygeoapi/api/itemtypes.py +++ b/pygeoapi/api/itemtypes.py @@ -7,7 +7,7 @@ # Colin Blackburn # Ricardo Garcia Silva # -# Copyright (c) 2025 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # Copyright (c) 2025 Francesco Bartoli # Copyright (c) 2022 John A Stevenson and Colin Blackburn # Copyright (c) 2023 Ricardo Garcia Silva @@ -55,13 +55,15 @@ set_content_crs_header) from pygeoapi.formatter.base import FormatterSerializationError from pygeoapi.linked_data import geojson2jsonld +from pygeoapi.openapi import get_oas_30_parameters from pygeoapi.plugin import load_plugin, PLUGINS from pygeoapi.provider.base import ( ProviderGenericError, ProviderTypeError, SchemaType) from pygeoapi.util import (filter_providers_by_type, to_json, filter_dict_by_key_value, str2bool, - get_provider_by_type, render_j2_template) + get_provider_by_type, render_j2_template, + get_dataset_formatters) from . import ( APIRequest, API, SYSTEM_LOCALE, F_JSON, FORMAT_TYPES, F_HTML, F_JSONLD, @@ -241,9 +243,6 @@ def get_collection_items( :returns: tuple of headers, status code, content """ - if not request.is_valid(PLUGINS['formatter'].keys()): - return api.get_format_exception(request) - # Set Content-Language to system locale until provider locale # has been determined headers = request.get_response_headers(SYSTEM_LOCALE, @@ -352,6 +351,20 @@ def get_collection_items( err.http_status_code, headers, request.format, err.ogc_exception_code, err.message) + LOGGER.debug('Validating requested format') + dataset_formatters = get_dataset_formatters(collections[dataset]) + + if dataset_formatters: + LOGGER.debug(f'Dataset formatters: {dataset_formatters}') + request._format = request._get_format( + request.get_request_headers(request.headers), + {v.f: v.mimetype for v in dataset_formatters.values()}) + + LOGGER.debug(f'Request format: {request.format}') + + if not request.is_valid(dataset_formatters.keys()): + return api.get_format_exception(request) + crs_transform_spec = None if provider_type == 'feature': # crs query parameter is only available for OGC API - Features @@ -581,6 +594,14 @@ def get_collection_items( 'href': f'{uri}?f={F_HTML}{serialized_query_params}' }]) + for key, value in dataset_formatters.items(): + content['links'].append({ + 'type': value.mimetype, + 'rel': 'alternate', + 'title': f'This document as {key}', + 'href': f'{uri}?f={value.name}{serialized_query_params}' + }) + next_link = False prev_link = False @@ -656,9 +677,9 @@ def get_collection_items( 'collections/items/index.html', content, request.locale) return headers, HTTPStatus.OK, content - elif request.format == 'csv': # render - formatter = load_plugin('formatter', - {'name': 'CSV', 'geom': True}) + elif request.format in [df.f for df in dataset_formatters.values()]: + formatter = [v for v in dataset_formatters.values() if + v.f == request.format][0] try: content = formatter.write( @@ -677,13 +698,14 @@ def get_collection_items( headers['Content-Type'] = formatter.mimetype - if p.filename is None: - filename = f'{dataset}.csv' - else: - filename = f'{p.filename}' + if formatter.attachment: + if p.filename is None: + filename = f'{dataset}.{formatter.extension}' + else: + filename = f'{p.filename}' - cd = f'attachment; filename="{filename}"' - headers['Content-Disposition'] = cd + cd = f'attachment; filename="{filename}"' + headers['Content-Disposition'] = cd return headers, HTTPStatus.OK, content @@ -1073,6 +1095,11 @@ def get_oas_30(cfg: dict, locale: str) -> tuple[list[dict[str, str]], dict[str, v.get('limits', {}) ) + dataset_formatters = get_dataset_formatters(v) + coll_f_parameter = deepcopy(get_oas_30_parameters(cfg, locale))['f'] # noqa + for key, value in dataset_formatters.items(): + coll_f_parameter['schema']['enum'].append(value.f) + paths[items_path] = { 'get': { 'summary': f'Get {title} items', @@ -1080,7 +1107,7 @@ def get_oas_30(cfg: dict, locale: str) -> tuple[list[dict[str, str]], dict[str, 'tags': [k], 'operationId': f'get{k.capitalize()}Features', 'parameters': [ - {'$ref': '#/components/parameters/f'}, + coll_f_parameter, {'$ref': '#/components/parameters/lang'}, {'$ref': '#/components/parameters/bbox'}, coll_limit, diff --git a/pygeoapi/formatter/base.py b/pygeoapi/formatter/base.py index 95f20491e..f566df114 100644 --- a/pygeoapi/formatter/base.py +++ b/pygeoapi/formatter/base.py @@ -2,7 +2,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2022 Tom Kralidis +# Copyright (c) 2026 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -39,23 +39,28 @@ def __init__(self, formatter_def: dict): """ Initialize object - :param formatter_def: formatter definition + param formatter_def: formatter definition :returns: pygeoapi.formatter.base.BaseFormatter """ + self.extension = None + self.f = None self.mimetype = None - self.geom = False - self.name = formatter_def['name'] - if 'geom' in formatter_def: - self.geom = formatter_def['geom'] + try: + self.name = formatter_def['name'] + except KeyError: + raise RuntimeError('name is required') + + self.geom = formatter_def.get('geom', False) + self.attachment = formatter_def.get('attachment', False) def write(self, options: dict = {}, data: dict | None = None) -> str: """ Generate data in specified format - :param options: CSV formatting options + :param options: formatting options :param data: dict representation of GeoJSON object :returns: string representation of format diff --git a/pygeoapi/formatter/csv_.py b/pygeoapi/formatter/csv_.py index d5e856665..2dd8c9dfb 100644 --- a/pygeoapi/formatter/csv_.py +++ b/pygeoapi/formatter/csv_.py @@ -48,12 +48,12 @@ def __init__(self, formatter_def: dict): :returns: `pygeoapi.formatter.csv_.CSVFormatter` """ - geom = False - if 'geom' in formatter_def: - geom = formatter_def['geom'] + geom = formatter_def.get('geom', False) super().__init__({'name': 'csv', 'geom': geom}) self.mimetype = 'text/csv; charset=utf-8' + self.f = 'csv' + self.extension = 'csv' def write(self, options: dict = {}, data: dict = None) -> str: """ diff --git a/pygeoapi/resources/schemas/config/pygeoapi-config-0.x.yml b/pygeoapi/resources/schemas/config/pygeoapi-config-0.x.yml index 477526f0b..f48f615ff 100644 --- a/pygeoapi/resources/schemas/config/pygeoapi-config-0.x.yml +++ b/pygeoapi/resources/schemas/config/pygeoapi-config-0.x.yml @@ -583,6 +583,25 @@ properties: - type - name - data + formatters: + type: array + description: custom formatters to apply to output + items: + type: object + properties: + name: + type: string + description: name of formatter + geom: + type: boolean + default: true + description: whether to include geometry + attachment: + type: boolean + default: false + description: whether to provide as an attachment + required: + - name required: - type - title diff --git a/pygeoapi/util.py b/pygeoapi/util.py index 108853ee7..66c26a612 100644 --- a/pygeoapi/util.py +++ b/pygeoapi/util.py @@ -64,6 +64,7 @@ from pygeoapi import __version__ from pygeoapi import l10n from pygeoapi.models import config as config_models +from pygeoapi.plugin import load_plugin, PLUGINS from pygeoapi.provider.base import ProviderTypeError @@ -764,3 +765,24 @@ def get_choice_from_headers(headers: dict, # Return one or all choices return sorted_choices if all else sorted_choices[0] + + +def get_dataset_formatters(dataset: dict) -> dict: + """ + Helper function to derive all formatters for an itemtype + + :param dataset: `dict` of dataset resource definition + + :returns: `dict` of formatters + """ + + dataset_formatters = {} + + for key, value in PLUGINS['formatter'].items(): + df2 = load_plugin('formatter', {'name': key}) + dataset_formatters[key] = df2 + for df in dataset.get('formatters', []): + df2 = load_plugin('formatter', df) + dataset_formatters[df2.name] = df2 + + return dataset_formatters diff --git a/tests/api/test_api.py b/tests/api/test_api.py index fc1f03d00..3625eac94 100644 --- a/tests/api/test_api.py +++ b/tests/api/test_api.py @@ -591,7 +591,7 @@ def test_describe_collections(config, api_): assert collection['id'] == 'obs' assert collection['title'] == 'Observations' assert collection['description'] == 'My cool observations' - assert len(collection['links']) == 14 + assert len(collection['links']) == 15 assert collection['extent'] == { 'spatial': { 'bbox': [[-180, -90, 180, 90]], @@ -682,7 +682,7 @@ def test_describe_collections_json_ld(config, api_): assert len(expanded['http://schema.org/dataset']) == 1 dataset = expanded['http://schema.org/dataset'][0] assert dataset['@type'][0] == 'http://schema.org/Dataset' - assert len(dataset['http://schema.org/distribution']) == 14 + assert len(dataset['http://schema.org/distribution']) == 15 assert all(dist['@type'][0] == 'http://schema.org/DataDownload' for dist in dataset['http://schema.org/distribution']) diff --git a/tests/api/test_itemtypes.py b/tests/api/test_itemtypes.py index 5a1762151..e47414190 100644 --- a/tests/api/test_itemtypes.py +++ b/tests/api/test_itemtypes.py @@ -220,7 +220,7 @@ def test_get_collection_items(config, api_): assert features['features'][1]['properties']['stn_id'] == 35 links = features['links'] - assert len(links) == 5 + assert len(links) == 6 assert '/collections/obs/items?f=json' in links[0]['href'] assert links[0]['rel'] == 'self' assert '/collections/obs/items?f=jsonld' in links[1]['href'] @@ -228,8 +228,9 @@ def test_get_collection_items(config, api_): assert '/collections/obs/items?f=html' in links[2]['href'] assert links[2]['rel'] == 'alternate' assert '/collections/obs' in links[3]['href'] - assert links[3]['rel'] == 'next' - assert links[4]['rel'] == 'collection' + assert links[3]['rel'] == 'alternate' + assert links[4]['rel'] == 'next' + assert links[5]['rel'] == 'collection' # Invalid offset req = mock_api_request({'offset': -1}) @@ -246,17 +247,19 @@ def test_get_collection_items(config, api_): assert features['features'][1]['properties']['stn_id'] == 2147 links = features['links'] - assert len(links) == 5 + assert len(links) == 6 assert '/collections/obs/items?f=json' in links[0]['href'] assert links[0]['rel'] == 'self' assert '/collections/obs/items?f=jsonld' in links[1]['href'] assert links[1]['rel'] == 'alternate' assert '/collections/obs/items?f=html' in links[2]['href'] assert links[2]['rel'] == 'alternate' - assert '/collections/obs/items?offset=0' in links[3]['href'] - assert links[3]['rel'] == 'prev' - assert '/collections/obs' in links[4]['href'] - assert links[4]['rel'] == 'collection' + assert '/collections/obs/items?f=csv' in links[3]['href'] + assert links[3]['rel'] == 'alternate' + assert '/collections/obs/items?offset=0' in links[4]['href'] + assert links[4]['rel'] == 'prev' + assert '/collections/obs' in links[5]['href'] + assert links[5]['rel'] == 'collection' req = mock_api_request({ 'offset': '1', @@ -269,7 +272,7 @@ def test_get_collection_items(config, api_): assert len(features['features']) == 1 links = features['links'] - assert len(links) == 6 + assert len(links) == 7 assert '/collections/obs/items?f=json&limit=1&bbox=-180,-90,180,90' in \ links[0]['href'] assert links[0]['rel'] == 'self' @@ -279,13 +282,16 @@ def test_get_collection_items(config, api_): assert '/collections/obs/items?f=html&limit=1&bbox=-180,-90,180,90' in \ links[2]['href'] assert links[2]['rel'] == 'alternate' - assert '/collections/obs/items?offset=0&limit=1&bbox=-180,-90,180,90' \ + assert '/collections/obs/items?f=csv&limit=1&bbox=-180,-90,180,90' \ in links[3]['href'] - assert links[3]['rel'] == 'prev' - assert '/collections/obs' in links[4]['href'] - assert links[3]['rel'] == 'prev' - assert links[4]['rel'] == 'next' - assert links[5]['rel'] == 'collection' + assert links[3]['rel'] == 'alternate' + assert '/collections/obs/items?offset=0&limit=1&bbox=-180,-90,180,90' \ + in links[4]['href'] + assert links[4]['rel'] == 'prev' + assert '/collections/obs' in links[5]['href'] + assert links[4]['rel'] == 'prev' + assert links[5]['rel'] == 'next' + assert links[6]['rel'] == 'collection' req = mock_api_request({ 'sortby': '' @@ -394,6 +400,23 @@ def test_get_collection_items(config, api_): assert code == HTTPStatus.BAD_REQUEST + # test Accept header for dataset formatters + req = mock_api_request(HTTP_ACCEPT='application/json') + rsp_headers, code, response = get_collection_items(api_, req, 'obs') + assert rsp_headers['Content-Type'] == 'application/json' + + req = mock_api_request(HTTP_ACCEPT='text/csv') + rsp_headers, code, response = get_collection_items(api_, req, 'obs') + assert rsp_headers['Content-Type'] == 'text/csv; charset=utf-8' + + req = mock_api_request({'f': 'json'}, HTTP_ACCEPT='text/csv') + rsp_headers, code, response = get_collection_items(api_, req, 'obs') + assert rsp_headers['Content-Type'] == 'application/json' + + req = mock_api_request({'f': 'csv'}, HTTP_ACCEPT='application/json') + rsp_headers, code, response = get_collection_items(api_, req, 'obs') + assert rsp_headers['Content-Type'] == 'text/csv; charset=utf-8' + def test_get_collection_items_include_extra_query_parameters(config, api_): req = mock_api_request() From 4731c0fe24a3b48fe5aaeabf578f9b6a8765ddeb Mon Sep 17 00:00:00 2001 From: Tom Kralidis Date: Sun, 11 Jan 2026 01:40:06 -0500 Subject: [PATCH 2/2] fix typo --- pygeoapi/provider/csw_facade.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygeoapi/provider/csw_facade.py b/pygeoapi/provider/csw_facade.py index bfd77735a..f326ab65a 100644 --- a/pygeoapi/provider/csw_facade.py +++ b/pygeoapi/provider/csw_facade.py @@ -53,7 +53,7 @@ def __init__(self, provider_def): :param provider_def: provider definition - :returns: pygeoapi.provider.csv_.CSWFacadeProvider + :returns: pygeoapi.provider.csw_facade.CSWFacadeProvider """ super().__init__(provider_def)