From cc6f5c23f5cd62163f69aa81b53e48a55ea50627 Mon Sep 17 00:00:00 2001 From: Elise Hinman Date: Thu, 15 Jan 2026 13:27:50 -0600 Subject: [PATCH 1/6] make ref table function way simpler and more efficient --- dataretrieval/waterdata/api.py | 35 +++++++++++++--------------------- tests/waterdata_test.py | 2 +- 2 files changed, 14 insertions(+), 23 deletions(-) diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index 59cc5a1..1facd9d 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -1394,6 +1394,7 @@ def get_field_measurements( return get_ogc_data(args, output_id, service) + def get_reference_table( collection: str, limit: Optional[int] = None, @@ -1426,29 +1427,19 @@ def get_reference_table( f"Valid options are: {valid_code_services}." ) - req = _construct_api_requests( - service=collection, - limit=limit, - skip_geometry=True, - ) - # Run API request and iterate through pages if needed - return_list, response = _walk_pages( - geopd=False, req=req - ) - - # Give ID column a more meaningful name - if collection.endswith("s"): - return_list = return_list.rename( - columns={"id": f"{collection[:-1].replace('-', '_')}_id"} - ) + # Give ID column the collection name with underscores + if collection.endswith("s") and collection != "counties": + output_id = f"{collection[:-1].replace('-', '_')}" + elif collection == "counties": + output_id = "county" else: - return_list = return_list.rename( - columns={"id": f"{collection.replace('-', '_')}_id"} - ) - - # Create metadata object from response - metadata = BaseMetadata(response) - return return_list, metadata + output_id = f"{collection.replace('-', '_')}" + + return get_ogc_data( + args={}, + output_id=output_id, + service=collection + ) def get_codes(code_service: CODE_SERVICES) -> pd.DataFrame: diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py index abdd823..7800ca7 100755 --- a/tests/waterdata_test.py +++ b/tests/waterdata_test.py @@ -248,7 +248,7 @@ def test_get_time_series_metadata(): def test_get_reference_table(): df, md = get_reference_table("agency-codes") - assert "agency_code_id" in df.columns + assert "agency_code" in df.columns assert df.shape[0] > 0 assert hasattr(md, 'url') assert hasattr(md, 'query_time') From c326643e0a15831f349419a8cdf38510efbac298 Mon Sep 17 00:00:00 2001 From: Elise Hinman Date: Thu, 15 Jan 2026 13:49:50 -0600 Subject: [PATCH 2/6] add more documentation, an example --- dataretrieval/waterdata/api.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index 1facd9d..cb462f8 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -1419,6 +1419,27 @@ def get_reference_table( allowable limit is 50000. It may be beneficial to set this number lower if your internet connection is spotty. The default (None) will set the limit to the maximum allowable limit for the service. + + Returns + ------- + df : ``pandas.DataFrame`` or ``geopandas.GeoDataFrame`` + Formatted data returned from the API query. The primary metadata + of each reference table will show up in the first column, where + the name of the column is the singular form of the collection name, + separated by underscores (e.g. the "medium-codes" reference table + has a column called "medium_code", which contains all possible + medium code values). + md: :obj:`dataretrieval.utils.Metadata` + A custom metadata object including the URL request and query time. + + Examples + -------- + .. code:: + + >>> # Get table of USGS parameter codes + >>> ref, md = dataretrieval.waterdata.get_reference_table( + ... collection="parameter-codes + ... ) """ valid_code_services = get_args(METADATA_COLLECTIONS) if collection not in valid_code_services: From b956dbfad95a0b49d0c8ba567555aebb9687e147 Mon Sep 17 00:00:00 2001 From: Elise Hinman Date: Thu, 15 Jan 2026 15:02:54 -0600 Subject: [PATCH 3/6] add in the deduplication line --- dataretrieval/waterdata/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index 5c6ae10..cad8272 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -498,6 +498,7 @@ def _get_resp_data(resp: requests.Response, geopd: bool) -> pd.DataFrame: ) df.columns = [col.replace("properties_", "") for col in df.columns] df.rename(columns={"geometry_coordinates": "geometry"}, inplace=True) + df = df.loc[:, ~df.columns.duplicated()] return df # Organize json into geodataframe and make sure id column comes along. From adef16e774308cc7f1b5733dcd67d64d07dcf492 Mon Sep 17 00:00:00 2001 From: Elise Hinman Date: Tue, 20 Jan 2026 10:22:08 -0600 Subject: [PATCH 4/6] add required packages for gpd.explore in docs and move a requirement to docs section of pyproject.toml --- .github/workflows/sphinx-docs.yml | 1 - pyproject.toml | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/sphinx-docs.yml b/.github/workflows/sphinx-docs.yml index dbd1395..e949c48 100644 --- a/.github/workflows/sphinx-docs.yml +++ b/.github/workflows/sphinx-docs.yml @@ -18,7 +18,6 @@ jobs: shell: bash -l {0} run: | python -m pip install --upgrade pip - pip install "docutils<0.22" pip install .[doc,nldi] ipython kernel install --name "python3" --user sudo apt update -y && sudo apt install -y latexmk texlive-latex-recommended texlive-latex-extra texlive-fonts-recommended dvipng pandoc diff --git a/pyproject.toml b/pyproject.toml index 692a01e..88590ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ test = [ "flake8", ] doc = [ + "docutils<0.22", "sphinx", "sphinx-rtd-theme", "nbsphinx", @@ -46,6 +47,8 @@ doc = [ "ipython", "ipykernel", "matplotlib", + "folium>=0.12", + "mapclassify" ] nldi = [ 'geopandas>=0.10' From 276babc189da2f3c86e5455b7c60960ade7ed290 Mon Sep 17 00:00:00 2001 From: Elise Hinman <121896266+ehinman@users.noreply.github.com> Date: Thu, 22 Jan 2026 09:40:48 -0600 Subject: [PATCH 5/6] Update dataretrieval/waterdata/api.py Co-authored-by: Joe Zemmels (he/him) --- dataretrieval/waterdata/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index b428332..4348d6a 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -1461,7 +1461,7 @@ def get_reference_table( >>> # Get table of USGS parameter codes >>> ref, md = dataretrieval.waterdata.get_reference_table( - ... collection="parameter-codes + ... collection="parameter-codes" ... ) """ valid_code_services = get_args(METADATA_COLLECTIONS) From b027451d6c7fd0ef1bbde3db5d99b6d50f0906f1 Mon Sep 17 00:00:00 2001 From: Elise Hinman Date: Thu, 22 Jan 2026 10:23:13 -0600 Subject: [PATCH 6/6] update notebook to pip install all required packages and fix small change --- demos/WaterData_demo.ipynb | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/demos/WaterData_demo.ipynb b/demos/WaterData_demo.ipynb index b7d116b..40f5d56 100644 --- a/demos/WaterData_demo.ipynb +++ b/demos/WaterData_demo.ipynb @@ -87,20 +87,17 @@ "metadata": {}, "source": [ "## Examples\n", - "Let's get into some examples using the functions listed above. First, we need to load the `waterdata` module and a few other packages and functions to go through the examples. To run the entirety of this notebook, you will need to install `dataretrieval`, `matplotlib`, and `geopandas` packages. `matplotlib` is needed to create the plots, and `geopandas` is needed to create the interactive maps." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cd626a14", - "metadata": {}, - "outputs": [], - "source": [ - "# Install necessary packages to run notebook\n", + "Let's get into some examples using the functions listed above. First, we need to load the `waterdata` module and a few other packages and functions to go through the examples. To run the entirety of this notebook, you will need to install `dataretrieval`, `matplotlib`, and `geopandas` packages (plus dependencies). `matplotlib` is needed to create the plots, and `geopandas` is needed to create the interactive maps.\n", + "\n", + "Note that if you use conda rather than pip, you do not need to install folium and mapclassify separately, as they are included in the conda-forge geopandas install.\n", + "\n", + "```python\n", "!pip install dataretrieval\n", "!pip install matplotlib\n", - "!pip install geopandas" + "!pip install geopandas\n", + "!pip install folium\n", + "!pip install mapclassify\n", + "``` " ] }, { @@ -156,7 +153,7 @@ "outputs": [], "source": [ "streamflow_pcodes = pcodes[pcodes['parameter_name'].str.contains('streamflow|discharge', case=False, na=False)]\n", - "display(streamflow_pcodes[['parameter_code_id', 'parameter_name']])" + "display(streamflow_pcodes[['parameter_code', 'parameter_name']])" ] }, { @@ -599,7 +596,7 @@ ], "metadata": { "kernelspec": { - "display_name": "waterdata-demo", + "display_name": "waterdata-demo-pip", "language": "python", "name": "python3" },