Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/sphinx-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ jobs:
shell: bash -l {0}
run: |
python -m pip install --upgrade pip
pip install "docutils<0.22"
pip install .[doc,nldi]
ipython kernel install --name "python3" --user
sudo apt update -y && sudo apt install -y latexmk texlive-latex-recommended texlive-latex-extra texlive-fonts-recommended dvipng pandoc
Expand Down
56 changes: 34 additions & 22 deletions dataretrieval/waterdata/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1417,6 +1417,7 @@ def get_field_measurements(

return get_ogc_data(args, output_id, service)


def get_reference_table(
collection: str,
limit: Optional[int] = None,
Expand All @@ -1441,6 +1442,27 @@ def get_reference_table(
allowable limit is 50000. It may be beneficial to set this number lower
if your internet connection is spotty. The default (None) will set the
limit to the maximum allowable limit for the service.

Returns
-------
df : ``pandas.DataFrame`` or ``geopandas.GeoDataFrame``
Formatted data returned from the API query. The primary metadata
of each reference table will show up in the first column, where
the name of the column is the singular form of the collection name,
separated by underscores (e.g. the "medium-codes" reference table
has a column called "medium_code", which contains all possible
medium code values).
md: :obj:`dataretrieval.utils.Metadata`
A custom metadata object including the URL request and query time.

Examples
--------
.. code::

>>> # Get table of USGS parameter codes
>>> ref, md = dataretrieval.waterdata.get_reference_table(
... collection="parameter-codes"
... )
"""
valid_code_services = get_args(METADATA_COLLECTIONS)
if collection not in valid_code_services:
Expand All @@ -1449,29 +1471,19 @@ def get_reference_table(
f"Valid options are: {valid_code_services}."
)

req = _construct_api_requests(
service=collection,
limit=limit,
skip_geometry=True,
)
# Run API request and iterate through pages if needed
return_list, response = _walk_pages(
geopd=False, req=req
)

# Give ID column a more meaningful name
if collection.endswith("s"):
return_list = return_list.rename(
columns={"id": f"{collection[:-1].replace('-', '_')}_id"}
)
# Give ID column the collection name with underscores
if collection.endswith("s") and collection != "counties":
output_id = f"{collection[:-1].replace('-', '_')}"
elif collection == "counties":
output_id = "county"
else:
return_list = return_list.rename(
columns={"id": f"{collection.replace('-', '_')}_id"}
)

# Create metadata object from response
metadata = BaseMetadata(response)
return return_list, metadata
output_id = f"{collection.replace('-', '_')}"

return get_ogc_data(
args={},
output_id=output_id,
service=collection
)


def get_codes(code_service: CODE_SERVICES) -> pd.DataFrame:
Expand Down
25 changes: 11 additions & 14 deletions demos/WaterData_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -87,20 +87,17 @@
"metadata": {},
"source": [
"## Examples\n",
"Let's get into some examples using the functions listed above. First, we need to load the `waterdata` module and a few other packages and functions to go through the examples. To run the entirety of this notebook, you will need to install `dataretrieval`, `matplotlib`, and `geopandas` packages. `matplotlib` is needed to create the plots, and `geopandas` is needed to create the interactive maps."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cd626a14",
"metadata": {},
"outputs": [],
"source": [
"# Install necessary packages to run notebook\n",
"Let's get into some examples using the functions listed above. First, we need to load the `waterdata` module and a few other packages and functions to go through the examples. To run the entirety of this notebook, you will need to install `dataretrieval`, `matplotlib`, and `geopandas` packages (plus dependencies). `matplotlib` is needed to create the plots, and `geopandas` is needed to create the interactive maps.\n",
"\n",
"Note that if you use conda rather than pip, you do not need to install folium and mapclassify separately, as they are included in the conda-forge geopandas install.\n",
"\n",
"```python\n",
"!pip install dataretrieval\n",
"!pip install matplotlib\n",
"!pip install geopandas"
"!pip install geopandas\n",
"!pip install folium\n",
"!pip install mapclassify\n",
"``` "
]
},
{
Expand Down Expand Up @@ -156,7 +153,7 @@
"outputs": [],
"source": [
"streamflow_pcodes = pcodes[pcodes['parameter_name'].str.contains('streamflow|discharge', case=False, na=False)]\n",
"display(streamflow_pcodes[['parameter_code_id', 'parameter_name']])"
"display(streamflow_pcodes[['parameter_code', 'parameter_name']])"
]
},
{
Expand Down Expand Up @@ -599,7 +596,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "waterdata-demo",
"display_name": "waterdata-demo-pip",
"language": "python",
"name": "python3"
},
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,16 @@ test = [
"flake8",
]
doc = [
"docutils<0.22",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these new dependencies from the changes made in this PR?
I don't see docutils or mapclassify used explicitly in the package files anywhere (from a CTRL + ALT + F through the package files)

Copy link
Collaborator Author

@ehinman ehinman Jan 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

docutils is one of the dependencies for building the documentation, and they made some update to the latest version that breaks the pipeline. Keeping it at an earlier version while the other packages catch up seems to fix it for now.

The other dependencies are needed to use the gpd.explore() interactive mapping functionality of geopandas, and are not installed if you simply pip install geopandas. The waterdata demo notebook I merged in earlier this week uses gpd.explore() and currently shows errors in the Jupyter notebook in the documentation: https://doi-usgs.github.io/dataretrieval-python/examples/WaterData_demo.html

"sphinx",
"sphinx-rtd-theme",
"nbsphinx",
"nbsphinx_link",
"ipython",
"ipykernel",
"matplotlib",
"folium>=0.12",
"mapclassify"
]
nldi = [
'geopandas>=0.10'
Expand Down
2 changes: 1 addition & 1 deletion tests/waterdata_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def test_get_time_series_metadata():

def test_get_reference_table():
df, md = get_reference_table("agency-codes")
assert "agency_code_id" in df.columns
assert "agency_code" in df.columns
assert df.shape[0] > 0
assert hasattr(md, 'url')
assert hasattr(md, 'query_time')
Expand Down