From 491eb5c3d838f2401983ad7f93942d49b40c6d79 Mon Sep 17 00:00:00 2001 From: Elise Hinman Date: Mon, 2 Jun 2025 17:17:42 -0500 Subject: [PATCH 01/15] remove usage of qwdata --- README.md | 14 +-- dataretrieval/nwis.py | 124 ++-------------------- demos/R Python Vignette equivalents.ipynb | 18 ++-- 3 files changed, 21 insertions(+), 135 deletions(-) diff --git a/README.md b/README.md index 6a4cf4fd..107894dc 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,8 @@ ![Conda Version](https://img.shields.io/conda/v/conda-forge/dataretrieval) ![Downloads](https://static.pepy.tech/badge/dataretrieval) -:warning: USGS data availability and format are changing on Water Quality Portal (WQP). Since March 2024, data obtained from WQP legacy profiles will not include new USGS data or recent updates to existing data. +:warning: USGS data availability and format have changed on Water Quality Portal (WQP). Since March 2024, data obtained from WQP legacy profiles will not include new USGS data or recent updates to existing data. All USGS data (up to and beyond March 2024) are available using the new WQP beta services. You can access the beta services by setting `legacy=False` in the functions in the `wqp` module. + To view the status of changes in data availability and code functionality, visit: https://doi-usgs.github.io/dataRetrieval/articles/Status.html :mega: **09/03/2024:** The groundwater levels service has switched endpoints, and `dataretrieval` was updated accordingly in [`v1.0.10`](https://github.com/DOI-USGS/dataretrieval-python/releases/tag/v1.0.10). Older versions using the discontinued endpoint will return 503 errors for `nwis.get_gwlevels` or the `service='gwlevels'` argument. Visit [Water Data For the Nation](https://waterdata.usgs.gov/blog/wdfn-waterservices-2024/) for more information. @@ -34,15 +35,11 @@ import dataretrieval.nwis as nwis # specify the USGS site code for which we want data. site = '03339000' - # get instantaneous values (iv) df = nwis.get_record(sites=site, service='iv', start='2017-12-31', end='2018-01-01') -# get water quality samples (qwdata) -df2 = nwis.get_record(sites=site, service='qwdata', start='2017-12-31', end='2018-01-01') - # get basic info about the site -df3 = nwis.get_record(sites=site, service='site') +df2 = nwis.get_record(sites=site, service='site') ``` Services available from NWIS include: - instantaneous values (iv) @@ -51,7 +48,10 @@ Services available from NWIS include: - site info (site) - discharge peaks (peaks) - discharge measurements (measurements) -* water quality samples (qwdata) + +Water quality data are available from: +- [Samples](https://waterdata.usgs.gov/download-samples/#dataProfile=site) - Discrete USGS water quality data only +- [Water Quality Portal](https://www.waterqualitydata.us/) - Discrete water quality data from USGS and EPA. Older data are available in the legacy WQX version 2 format; all data are available in the beta WQX3.0 format. To access the full functionality available from NWIS web services, nwis.get record appends any additional kwargs into the REST request. For example ```python diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py index 20c25ca5..d743b02a 100644 --- a/dataretrieval/nwis.py +++ b/dataretrieval/nwis.py @@ -36,7 +36,6 @@ WATERSERVICES_SERVICES = ["dv", "iv", "site", "stat"] WATERDATA_SERVICES = [ - "qwdata", "gwlevels", "measurements", "peaks", @@ -135,125 +134,14 @@ def get_qwdata( **kwargs, ) -> Tuple[pd.DataFrame, BaseMetadata]: """ - Get water sample data from qwdata service. - - .. warning:: - - WARNING: Beginning in March 2024 the NWIS qw data endpoint will - not deliver new data or updates to existing data. - Eventually the endpoint will be retired. For updated information visit: - https://waterdata.usgs.gov.nwis/qwdata - For additional details, see the R package vignette: - https://doi-usgs.github.io/dataRetrieval/articles/Status.html - If you have additional questions about the qw data service, - email CompTools@usgs.gov. - - Parameters - ---------- - sites: string or list of strings, optional, default is None - If the qwdata parameter site_no is supplied, it will overwrite the - sites parameter - start: string, optional, default is None - If the qwdata parameter begin_date is supplied, it will overwrite the - start parameter (YYYY-MM-DD) - end: string, optional, default is None - If the qwdata parameter end_date is supplied, it will overwrite the - end parameter (YYYY-MM-DD) - multi_index: bool, optional - If False, a dataframe with a single-level index (datetime) is returned, - default is True - wide_format : bool, optional - If True, return data in wide format with multiple samples per row and - one row per time, default is True - datetime_index : bool, optional - If True, create a datetime index, default is True - ssl_check: bool, optional - If True, check SSL certificates, if False, do not check SSL, - default is True - **kwargs: optional - If supplied, will be used as query parameters - - Returns - ------- - df: ``pandas.DataFrame`` - Times series data from the NWIS JSON - md: :obj:`dataretrieval.utils.Metadata` - A custom metadata object - - Examples - -------- - .. doctest:: - - >>> # get water sample information for site 11447650 - >>> df, md = dataretrieval.nwis.get_qwdata( - ... sites="11447650", start="2010-01-01", end="2010-02-01" - ... ) + Get water sample data from qwdata service - deprecated, use `get_usgs_samples()` + in the samples module. """ - warnings.warn( - ( - "WARNING: Starting in March 2024, the NWIS qw data endpoint is " - "retiring and no longer receives updates. For more information, " - "refer to https://waterdata.usgs.gov.nwis/qwdata and " - "https://doi-usgs.github.io/dataRetrieval/articles/Status.html " - "or email CompTools@usgs.gov." - ) - ) - - _check_sites_value_types(sites) - - kwargs["site_no"] = kwargs.pop("site_no", sites) - kwargs["begin_date"] = kwargs.pop("begin_date", start) - kwargs["end_date"] = kwargs.pop("end_date", end) - kwargs["multi_index"] = multi_index - if wide_format: - kwargs["qw_sample_wide"] = "qw_sample_wide" - - payload = { - "agency_cd": "USGS", - "format": "rdb", - "pm_cd_compare": "Greater than", - "inventory_output": "0", - "rdb_inventory_output": "file", - "TZoutput": "0", - "rdb_qw_attributes": "expanded", - "date_format": "YYYY-MM-DD", - "rdb_compression": "value", - "submitted_form": "brief_list", - } - - # check for parameter codes, and reformat query args - qwdata_parameter_code_field = "parameterCd" - if kwargs.get(qwdata_parameter_code_field): - parameter_codes = kwargs.pop(qwdata_parameter_code_field) - parameter_codes = to_str(parameter_codes) - kwargs["multiple_parameter_cds"] = parameter_codes - kwargs["param_cd_operator"] = "OR" - - search_criteria = kwargs.get("list_of_search_criteria") - if search_criteria: - kwargs["list_of_search_criteria"] = "{},{}".format( - search_criteria, "multiple_parameter_cds" - ) - else: - kwargs["list_of_search_criteria"] = "multiple_parameter_cds" - - kwargs.update(payload) - - warnings.warn( - "NWIS qw web services are being retired. " - + "See this note from the R package for more: " - + "https://doi-usgs.github.io/dataRetrieval/articles/qwdata_changes.html", - category=DeprecationWarning, - ) - response = query_waterdata("qwdata", ssl_check=ssl_check, **kwargs) - - df = _read_rdb(response.text) - - if datetime_index is True: - df = format_datetime(df, "sample_dt", "sample_tm", "sample_start_time_datum_cd") - - return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs) + return print("This function is deprecated and has been " \ + "replaced with `get_usgs_samples() in the " \ + "samples module. If you have questions, " \ + "please reach out to comptools@usgs.gov") def get_discharge_measurements( diff --git a/demos/R Python Vignette equivalents.ipynb b/demos/R Python Vignette equivalents.ipynb index f99d82a5..9aeda84b 100755 --- a/demos/R Python Vignette equivalents.ipynb +++ b/demos/R Python Vignette equivalents.ipynb @@ -14,6 +14,7 @@ "outputs": [], "source": [ "from dataretrieval import nwis\n", + "from dataretrieval import samples\n", "from dataretrieval import wqp" ] }, @@ -45,8 +46,8 @@ "\n", "# Sample data Nitrate:\n", "parameterCd <- \"00618\"\n", - "qwData <- readNWISqw(siteNumber,parameterCd,\n", - " \"1980-01-01\",\"2010-01-01\")\n", + "qwData <- read_USGS_samples(monitoringLocationIdentifier=sprintf(\"USGS-%s\", siteNumber),usgsPCode=parameterCd,\n", + " activityStartDateLower=\"1980-01-01\", activityStartDateUpper=\"2010-01-01\")\n", "\n", "pCode <- readNWISpCode(parameterCd)\n", "'''\n", @@ -61,7 +62,7 @@ "\n", "# sample data Nitrate:\n", "parameterCd = \"00618\"\n", - "qwData, md = nwis.get_qwdata(sites=siteNumber, parameterCd=parameterCd, start=\"1980-01-01\", end=\"2010-01-01\")\n", + "usgs_samples_data, md = samples.get_usgs_samples(monitoringLocationIdentifier=f\"USGS-{siteNumber}\", usgsPCode=parameterCd, activityStartDateLower=\"1980-01-01\", activityStartDateUpper=\"2010-01-01\")\n", "\n", "pCode, md = nwis.get_pmcodes(parameterCd=parameterCd)" ] @@ -199,18 +200,15 @@ "parameterCd <- c(\"00618\",\"71851\")\n", "startDate <- \"1985-10-01\"\n", "endDate <- \"2012-09-30\"\n", - "dfLong <- readNWISqw(siteNumber, parameterCd, \n", - " startDate, endDate)\n", - "# Or the wide return:\n", - "dfWide <- readNWISqw(siteNumber, parameterCd,\n", - " startDate, endDate, reshape=TRUE)\n", + "dfLong <- read_USGS_samples(monitoringLocationIdentifier=sprintf(\"USGS-%s\", siteNumber), usgsPCode=parameterCd, \n", + " activityStartDateLower=startDate, activityStartDateUpper=endDate)\n", "'''\n", "siteNumber = \"01491000\"\n", "parameterCd = [\"00618\",\"71851\"]\n", "startDate = \"1985-10-01\"\n", "endDate = \"2012-09-30\"\n", - "dfLong, md = nwis.get_qwdata(sites=siteNumber, parameterCd=parameterCd,\n", - " start=startDate, end=endDate)" + "dfLong, md = samples.get_usgs_samples(monitoringLocationIdentifier=f\"USGS-{siteNumber}\", usgsPCode=parameterCd,\n", + " activityStartDateLower=startDate, activityStartDateUpper=endDate)" ] }, { From c9c91ad0118a0b803a90bec1d62816b69ff08214 Mon Sep 17 00:00:00 2001 From: Elise Hinman Date: Mon, 2 Jun 2025 17:26:58 -0500 Subject: [PATCH 02/15] remove more qwdata examples and add note in get_record --- dataretrieval/nwis.py | 27 ++++------------- tests/waterservices_test.py | 60 ------------------------------------- 2 files changed, 6 insertions(+), 81 deletions(-) diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py index d743b02a..c499cb5f 100644 --- a/dataretrieval/nwis.py +++ b/dataretrieval/nwis.py @@ -157,14 +157,10 @@ def get_discharge_measurements( Parameters ---------- sites: string or list of strings, optional, default is None - If the qwdata parameter site_no is supplied, it will overwrite the - sites parameter - start: string, optional, default is None - If the qwdata parameter begin_date is supplied, it will overwrite the - start parameter (YYYY-MM-DD) + start: string, optional, default is None + Supply date in the format: YYYY-MM-DD end: string, optional, default is None - If the qwdata parameter end_date is supplied, it will overwrite the - end parameter (YYYY-MM-DD) + Supply date in the format: YYYY-MM-DD ssl_check: bool, optional If True, check SSL certificates, if False, do not check SSL, default is True @@ -1071,7 +1067,6 @@ def get_record( service: string, default is 'iv' - 'iv' : instantaneous data - 'dv' : daily mean data - - 'qwdata' : discrete samples - 'site' : site description - 'measurements' : discharge measurements - 'peaks': discharge peaks @@ -1100,9 +1095,6 @@ def get_record( >>> # Get latest daily mean data from site 01585200 >>> df = dataretrieval.nwis.get_record(sites="01585200", service="dv") - >>> # Get all discrete sample data from site 01585200 - >>> df = dataretrieval.nwis.get_record(sites="01585200", service="qwdata") - >>> # Get site description for site 01585200 >>> df = dataretrieval.nwis.get_record(sites="01585200", service="site") @@ -1169,16 +1161,9 @@ def get_record( return df elif service == "qwdata": - df, _ = get_qwdata( - site_no=sites, - begin_date=start, - end_date=end, - multi_index=multi_index, - wide_format=wide_format, - ssl_check=ssl_check, - **kwargs, - ) - return df + return print("qw data are no longer available from" \ + "NWIS. Please use `samples.get_usgs_samples()` instead." \ + " If you have questions, please reach out to comptools@usgs.gov") elif service == "site": df, _ = get_info(sites=sites, ssl_check=ssl_check, **kwargs) diff --git a/tests/waterservices_test.py b/tests/waterservices_test.py index 323b6051..19cc30fb 100755 --- a/tests/waterservices_test.py +++ b/tests/waterservices_test.py @@ -11,7 +11,6 @@ get_info, get_iv, get_pmcodes, - get_qwdata, get_ratings, get_record, get_stats, @@ -203,65 +202,6 @@ def test_get_info(requests_mock): assert_metadata(requests_mock, request_url, md, site, [parameter_cd], format) -def test_get_qwdata(requests_mock): - """Tests get_qwdata method correctly generates the request url and returns - the result in a DataFrame""" - format = "rdb" - site = "01491000%2C01645000" - request_url = ( - "https://nwis.waterdata.usgs.gov/nwis/qwdata?site_no={}" - "&qw_sample_wide=qw_sample_wide&agency_cd=USGS&format={}&pm_cd_compare=Greater+than" - "&inventory_output=0&rdb_inventory_output=file&TZoutput=0&rdb_qw_attributes=expanded" - "&date_format=YYYY-MM-DD&rdb_compression=value&submitted_form=brief_list".format( - site, format - ) - ) - response_file_path = "data/waterdata_qwdata.txt" - mock_request(requests_mock, request_url, response_file_path) - with pytest.warns(DeprecationWarning): - df, md = get_qwdata(sites=["01491000", "01645000"]) - if not isinstance(df, DataFrame): - raise AssertionError(f"{type(df)} is not DataFrame base class type") - - if "geometry" in list(df): - if not isinstance(df, gpd.GeoDataFrame): - raise AssertionError(f"{type(df)} is not a GeoDataFrame") - - geom_type = df.geom_type.unique() - if len(geom_type) > 1 or geom_type[0] != "Point": - raise AssertionError( - f"Geometry type {geom_type} not valid, expecting Point" - ) - - assert df.size == 1821472 - assert_metadata(requests_mock, request_url, md, site, None, format) - - -@pytest.mark.parametrize("site_input_type_list", [True, False]) -def test_get_qwdata_site_value_types(requests_mock, site_input_type_list): - """Tests get_qwdata method for valid input types for the 'sites' parameter""" - _format = "rdb" - site = "01491000" - request_url = ( - "https://nwis.waterdata.usgs.gov/nwis/qwdata?site_no={}" - "&qw_sample_wide=qw_sample_wide&agency_cd=USGS&format={}&pm_cd_compare=Greater+than" - "&inventory_output=0&rdb_inventory_output=file&TZoutput=0&rdb_qw_attributes=expanded" - "&date_format=YYYY-MM-DD&rdb_compression=value&submitted_form=brief_list".format( - site, _format - ) - ) - response_file_path = "data/waterdata_qwdata.txt" - mock_request(requests_mock, request_url, response_file_path) - if site_input_type_list: - sites = [site] - else: - sites = site - with pytest.warns(DeprecationWarning): - df, md = get_qwdata(sites=sites) - assert type(df) is DataFrame - assert df.size == 1821472 - - def test_get_gwlevels(requests_mock): """Tests get_gwlevels method correctly generates the request url and returns the result in a DataFrame.""" format = "rdb" From 12320f3a14cad956a0548b935037785e372c3b9f Mon Sep 17 00:00:00 2001 From: Elise Hinman Date: Mon, 2 Jun 2025 17:32:26 -0500 Subject: [PATCH 03/15] qwdata no longer a service --- dataretrieval/nwis.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py index c499cb5f..e6cb9a27 100644 --- a/dataretrieval/nwis.py +++ b/dataretrieval/nwis.py @@ -1160,11 +1160,6 @@ def get_record( ) return df - elif service == "qwdata": - return print("qw data are no longer available from" \ - "NWIS. Please use `samples.get_usgs_samples()` instead." \ - " If you have questions, please reach out to comptools@usgs.gov") - elif service == "site": df, _ = get_info(sites=sites, ssl_check=ssl_check, **kwargs) return df From 81dc0b5e24ce2422e5abaa6594721946160b6fa0 Mon Sep 17 00:00:00 2001 From: Elise Hinman Date: Mon, 9 Jun 2025 10:55:30 -0500 Subject: [PATCH 04/15] update hydroshare wq example --- ..._dataretrieval_WaterSamples_Examples.ipynb | 208 ++++++++++++------ 1 file changed, 137 insertions(+), 71 deletions(-) diff --git a/demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb b/demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb index 44a9f3b3..e1e45385 100644 --- a/demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb +++ b/demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb @@ -10,7 +10,7 @@ "source": [ "# USGS dataretrieval Python Package `get_qwdata()` Examples\n", "\n", - "This notebook provides examples of using the Python dataretrieval package to retrieve water quality sample data for United States Geological Survey (USGS) monitoring sites. The dataretrieval package provides a collection of functions to get data from the USGS National Water Information System (NWIS) and other online sources of hydrology and water quality data, including the United States Environmental Protection Agency (USEPA)." + "This notebook provides examples of using the Python dataretrieval package to retrieve water quality sample data for United States Geological Survey (USGS) monitoring sites. The dataretrieval package provides a collection of functions to get data from the USGS Samples database and other online sources of hydrology and water quality data, including the United States Environmental Protection Agency (USEPA)." ] }, { @@ -60,7 +60,7 @@ }, "outputs": [], "source": [ - "from dataretrieval import nwis\n", + "from dataretrieval import samples\n", "from IPython.display import display" ] }, @@ -70,16 +70,119 @@ "source": [ "### Basic Usage\n", "\n", - "The dataretrieval package has several functions that allow you to retrieve data from different web services. This examples uses the `get_qwdata()` function to retrieve water quality sample data for USGS monitoring sites from NWIS. The following arguments are supported:\n", + "The dataretrieval package has several functions that allow you to retrieve data from different web services. This examples uses the `get_usgs_samples()` function to retrieve water quality sample data for USGS monitoring sites from Samples. The following arguments are supported:\n", "\n", - "Arguments (Additional arguments, if supplied, will be used as query parameters)\n", - "\n", - "* **sites** (string or list of strings): A list of USGS site identifiers for which to retrieve data. If the qwdata parameter site_no is supplied, it will overwrite the sites parameter.\n", - "* **parameterCd** (string or list of strings): A list of USGS parameter codes for which to retrieve data.\n", - "* **start** (string): The beginning date for a period for which to retrieve data. If the qwdata parameter begin_date is supplied, it will overwrite the start parameter.\n", - "* **end** (string): The ending date for a period for which to retrieve data. If the qwdata parameter end_date is supplied, it will overwrite the end parameter.\n", - "* **datetime_index** (boolean): If True, create a datetime index\n", - "* **wide_format** (boolean): If True, return data in wide format with multiple samples per row and one row per time." + "* **ssl_check** : boolean, optional\n", + " Check the SSL certificate.\n", + "* **service** : string\n", + " One of the available Samples services: \"results\", \"locations\", \"activities\",\n", + " \"projects\", or \"organizations\". Defaults to \"results\".\n", + "* **profile** : string\n", + " One of the available profiles associated with a service. Options for each\n", + " service are:\n", + " results - \"fullphyschem\", \"basicphyschem\",\n", + " \"fullbio\", \"basicbio\", \"narrow\",\n", + " \"resultdetectionquantitationlimit\",\n", + " \"labsampleprep\", \"count\"\n", + " locations - \"site\", \"count\"\n", + " activities - \"sampact\", \"actmetric\",\n", + " \"actgroup\", \"count\"\n", + " projects - \"project\", \"projectmonitoringlocationweight\"\n", + " organizations - \"organization\", \"count\"\n", + "* **activityMediaName** : string or list of strings, optional\n", + " Name or code indicating environmental medium in which sample was taken.\n", + " Check the `activityMediaName_lookup()` function in this module for all\n", + " possible inputs.\n", + " Example: \"Water\".\n", + "* **activityStartDateLower** : string, optional\n", + " The start date if using a date range. Takes the format YYYY-MM-DD.\n", + " The logic is inclusive, i.e. it will also return results that\n", + " match the date. If left as None, will pull all data on or before\n", + " activityStartDateUpper, if populated.\n", + "* **activityStartDateUpper** : string, optional\n", + " The end date if using a date range. Takes the format YYYY-MM-DD.\n", + " The logic is inclusive, i.e. it will also return results that\n", + " match the date. If left as None, will pull all data after\n", + " activityStartDateLower up to the most recent available results.\n", + "* **activityTypeCode** : string or list of strings, optional\n", + " Text code that describes type of field activity performed.\n", + " Example: \"Sample-Routine, regular\".\n", + "* **characteristicGroup** : string or list of strings, optional\n", + " Characteristic group is a broad category of characteristics\n", + " describing one or more results. Check the `characteristicGroup_lookup()`\n", + " function in this module for all possible inputs.\n", + " Example: \"Organics, PFAS\"\n", + "* **characteristic** : string or list of strings, optional\n", + " Characteristic is a specific category describing one or more results.\n", + " Check the `characteristic_lookup()` function in this module for all\n", + " possible inputs.\n", + " Example: \"Suspended Sediment Discharge\"\n", + "* **characteristicUserSupplied** : string or list of strings, optional\n", + " A user supplied characteristic name describing one or more results.\n", + "* **boundingBox**: list of four floats, optional\n", + " Filters on the the associated monitoring location's point location\n", + " by checking if it is located within the specified geographic area. \n", + " The logic is inclusive, i.e. it will include locations that overlap\n", + " with the edge of the bounding box. Values are separated by commas,\n", + " expressed in decimal degrees, NAD83, and longitudes west of Greenwich\n", + " are negative.\n", + " The format is a string consisting of:\n", + " - Western-most longitude\n", + " - Southern-most latitude\n", + " - Eastern-most longitude\n", + " - Northern-most longitude \n", + " Example: [-92.8,44.2,-88.9,46.0]\n", + "* **countryFips** : string or list of strings, optional\n", + " Example: \"US\" (United States)\n", + "* **stateFips** : string or list of strings, optional\n", + " Check the `stateFips_lookup()` function in this module for all\n", + " possible inputs.\n", + " Example: \"US:15\" (United States: Hawaii)\n", + "* **countyFips** : string or list of strings, optional\n", + " Check the `countyFips_lookup()` function in this module for all\n", + " possible inputs.\n", + " Example: \"US:15:001\" (United States: Hawaii, Hawaii County)\n", + "* **siteTypeCode** : string or list of strings, optional\n", + " An abbreviation for a certain site type. Check the `siteType_lookup()`\n", + " function in this module for all possible inputs.\n", + " Example: \"GW\" (Groundwater site)\n", + "* **siteTypeName** : string or list of strings, optional\n", + " A full name for a certain site type. Check the `siteType_lookup()`\n", + " function in this module for all possible inputs.\n", + " Example: \"Well\"\n", + "* **usgsPCode** : string or list of strings, optional\n", + " 5-digit number used in the US Geological Survey computerized\n", + " data system, National Water Information System (NWIS), to\n", + " uniquely identify a specific constituent. Check the \n", + " `characteristic_lookup()` function in this module for all possible\n", + " inputs.\n", + " Example: \"00060\" (Discharge, cubic feet per second)\n", + "* **hydrologicUnit** : string or list of strings, optional\n", + " Max 12-digit number used to describe a hydrologic unit.\n", + " Example: \"070900020502\"\n", + "* **monitoringLocationIdentifier** : string or list of strings, optional\n", + " A monitoring location identifier has two parts: the agency code\n", + " and the location number, separated by a dash (-).\n", + " Example: \"USGS-040851385\"\n", + "* **organizationIdentifier** : string or list of strings, optional\n", + " Designator used to uniquely identify a specific organization.\n", + " Currently only accepting the organization \"USGS\".\n", + "* **pointLocationLatitude** : float, optional\n", + " Latitude for a point/radius query (decimal degrees). Must be used\n", + " with pointLocationLongitude and pointLocationWithinMiles.\n", + "* **pointLocationLongitude** : float, optional\n", + " Longitude for a point/radius query (decimal degrees). Must be used\n", + " with pointLocationLatitude and pointLocationWithinMiles.\n", + "* **pointLocationWithinMiles** : float, optional\n", + " Radius for a point/radius query. Must be used with\n", + " pointLocationLatitude and pointLocationLongitude\n", + "* **projectIdentifier** : string or list of strings, optional\n", + " Designator used to uniquely identify a data collection project. Project\n", + " identifiers are specific to an organization (e.g. USGS).\n", + " Example: \"ZH003QW03\"\n", + "* **recordIdentifierUserSupplied** : string or list of strings, optional\n", + " Internal AQS record identifier that returns 1 entry. Only available\n", + " for the \"results\" service." ] }, { @@ -103,8 +206,8 @@ }, "outputs": [], "source": [ - "siteID = '10109000'\n", - "wq_data = nwis.get_qwdata(sites=siteID)\n", + "siteID = 'USGS-10109000'\n", + "wq_data = samples.get_usgs_samples(monitoringLocationIdentifier=siteID)\n", "print('Retrieved data for ' + str(len(wq_data[0])) + ' samples.')" ] }, @@ -114,7 +217,7 @@ "source": [ "### Interpreting the Result\n", "\n", - "The result of calling the `get_qwdata()` function is an object that contains a Pandas data frame object and an associated metadata object. The Pandas data frame contains the water quality sample data for the requested site, and or observed variables and time frame.\n", + "The result of calling the `get_usgs_samples()` function is an object that contains a Pandas data frame object and an associated metadata object. The Pandas data frame contains the water quality sample data for the requested site, and or observed variables and time frame.\n", "\n", "Once you've got the data frame, there's several useful things you can do to explore the data." ] @@ -127,7 +230,7 @@ } }, "source": [ - "Display the data frame as a table. The default data frame for this function is a wide, cross-tabulated table, with columns for each observed variable and a row for each sample date (wide_format=True)." + "Display the data frame as a table. The default data frame for this function is a long, flat table, with a row for each observed variable at a given site and date/time." ] }, { @@ -175,7 +278,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The other part of the result returned from the `get_qwdata()` function is a metadata object that contains information about the query that was executed to return the data. For example, you can access the URL that was assembled to retrieve the requested data from the USGS web service. The USGS web service responses contain a descriptive header that defines and can be helpful in interpreting the contents of the response." + "The other part of the result returned from the `get_usgs_data()` function is a metadata object that contains information about the query that was executed to return the data. For example, you can access the URL that was assembled to retrieve the requested data from the USGS web service. The USGS web service responses contain a descriptive header that defines and can be helpful in interpreting the contents of the response." ] }, { @@ -192,7 +295,7 @@ }, "outputs": [], "source": [ - "print('The query URL used to retrieve the data from NWIS was: ' + wq_data[1].url)" + "print('The query URL used to retrieve the data from USGS Samples was: ' + wq_data[1].url)" ] }, { @@ -218,27 +321,9 @@ }, "outputs": [], "source": [ - "site_ids = ['04024430', '04024000']\n", + "site_ids = ['USGS-04024430', 'USGS-04024000']\n", "parameter_code = '00065'\n", - "wq_multi_site = nwis.get_qwdata(sites=site_ids, parameterCd=parameter_code)\n", - "print('Retrieved data for ' + str(len(wq_multi_site[0])) + ' samples.')\n", - "display(wq_multi_site[0])" - ] - }, - { - "metadata": {}, - "cell_type": "markdown", - "source": "The following example is the same as the previous example but with multi index turned off (multi_index=False)" - }, - { - "metadata": {}, - "cell_type": "code", - "outputs": [], - "execution_count": null, - "source": [ - "site_ids = ['04024430', '04024000']\n", - "parameter_code = '00065'\n", - "wq_multi_site = nwis.get_qwdata(sites=site_ids, parameterCd=parameter_code, multi_index=False)\n", + "wq_multi_site = samples.get_usgs_samples(monitoringLocationIdentifier=site_ids, usgsPCode=parameter_code)\n", "print('Retrieved data for ' + str(len(wq_multi_site[0])) + ' samples.')\n", "display(wq_multi_site[0])" ] @@ -251,7 +336,7 @@ } }, "source": [ - "#### Example 3: Retrieve water quality sample data for multiple sites, including a list of parameters, within a time period defined by start and end dates" + "#### Example 3: Retrieve water quality sample data for multiple sites, including a list of parameters, within a time period defined by start date until present" ] }, { @@ -268,44 +353,22 @@ }, "outputs": [], "source": [ - "site_ids = ['04024430', '04024000']\n", + "site_ids = ['USGS-04024430', 'USGS-04024000']\n", "parameterCd = ['34247', '30234', '32104', '34220']\n", "startDate = '2012-01-01'\n", - "endDate = ''\n", - "wq_data2 = nwis.get_qwdata(sites=site_ids, parameterCd=parameterCd,\n", - " start=startDate, end=endDate)\n", + "wq_data2 = samples.get_usgs_samples(monitoringLocationIdentifier=site_ids, usgsPCode=parameterCd,\n", + " activityStartDateLower=startDate)\n", "print('Retrieved data for ' + str(len(wq_multi_site[0])) + ' samples.')\n", "display(wq_data2[0])\n" ] }, - { - "metadata": {}, - "cell_type": "markdown", - "source": "The following example is the same as the previous example but with multi index turned off (multi_index=False)" - }, - { - "metadata": {}, - "cell_type": "code", - "outputs": [], - "execution_count": null, - "source": [ - "site_ids = ['04024430', '04024000']\n", - "parameterCd = ['34247', '30234', '32104', '34220']\n", - "startDate = '2012-01-01'\n", - "endDate = ''\n", - "wq_data2 = nwis.get_qwdata(sites=site_ids, parameterCd=parameterCd,\n", - " start=startDate, end=endDate, multi_index=False)\n", - "print('Retrieved data for ' + str(len(wq_multi_site[0])) + ' samples.')\n", - "display(wq_data2[0])" - ] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Example 4: Retrieve water quality sample data for one site in serial format\n", + "#### Example 4: Retrieve water quality sample data for one site and convert to a wide format\n", "\n", - "Each row in the resulting table represents a single observation of a single parameters. Each sample may be analyzed for multiple parameters and so a single water quality sample can result in multiple rows in serial format." + "Note that the USGS samples database returns multiple parameters in a \"long\" format: each row in the resulting table represents a single observation of a single parameters. Furthermore, every observation has 181 fields of metadata. However, if you wanted to place your water quality data into a \"wide\" format, where each column represents a water quality parameter code, the code below details one solution." ] }, { @@ -314,16 +377,19 @@ "metadata": {}, "outputs": [], "source": [ - "siteID = '10109000'\n", - "wq_data = nwis.get_qwdata(sites=siteID, wide_format=False)\n", - "print('Retrieved data for ' + str(len(wq_data[0])) + ' sample results.')\n", - "display(wq_data[0])" + "siteID = 'USGS-10109000'\n", + "wq_data,_ = samples.get_usgs_samples(monitoringLocationIdentifier=siteID)\n", + "print('Retrieved data for ' + str(len(wq_data)) + ' sample results.')\n", + "\n", + "wq_data[\"characteristic_unit\"] = wq_data[\"Result_Characteristic\"] + \", \" + wq_data[\"Result_MeasureUnit\"]\n", + "wq_data_wide = wq_data.pivot_table(index=['Location_Identifier', 'Activity_StartDate', 'Activity_StartTime'], columns=\"characteristic_unit\", values=\"Result_Measure\", aggfunc='first')\n", + "display(wq_data_wide)\n" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "dr-test", "language": "python", "name": "python3" }, @@ -337,7 +403,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.11.12" } }, "nbformat": 4, From 0e58bb2fa3e91788d678fb15e0d2ac84a8fe299a Mon Sep 17 00:00:00 2001 From: Elise Hinman Date: Mon, 9 Jun 2025 10:56:45 -0500 Subject: [PATCH 05/15] forgot one reference to qwdata --- demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb b/demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb index e1e45385..32f8601e 100644 --- a/demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb +++ b/demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb @@ -8,7 +8,7 @@ } }, "source": [ - "# USGS dataretrieval Python Package `get_qwdata()` Examples\n", + "# USGS dataretrieval Python Package `get_usgs_samples()` Examples\n", "\n", "This notebook provides examples of using the Python dataretrieval package to retrieve water quality sample data for United States Geological Survey (USGS) monitoring sites. The dataretrieval package provides a collection of functions to get data from the USGS Samples database and other online sources of hydrology and water quality data, including the United States Environmental Protection Agency (USEPA)." ] From e5d4da4f475bda0b530ae7b6cbfb8cd3538034e5 Mon Sep 17 00:00:00 2001 From: Elise Hinman Date: Mon, 9 Jun 2025 14:16:13 -0500 Subject: [PATCH 06/15] move samples function to waterdata.py --- README.md | 4 +- dataretrieval/__init__.py | 1 + dataretrieval/nwis.py | 8 +- dataretrieval/samples.py | 6 +- dataretrieval/waterdata.py | 350 ++++++++++++++++++ demos/R Python Vignette equivalents.ipynb | 6 +- ..._dataretrieval_WaterSamples_Examples.ipynb | 20 +- docs/source/reference/waterdata.rst | 8 + tests/samples_test.py | 2 +- tests/waterdata_test.py | 105 ++++++ 10 files changed, 487 insertions(+), 23 deletions(-) create mode 100644 dataretrieval/waterdata.py create mode 100644 docs/source/reference/waterdata.rst create mode 100755 tests/waterdata_test.py diff --git a/README.md b/README.md index 107894dc..edbe3cb8 100644 --- a/README.md +++ b/README.md @@ -53,11 +53,11 @@ Water quality data are available from: - [Samples](https://waterdata.usgs.gov/download-samples/#dataProfile=site) - Discrete USGS water quality data only - [Water Quality Portal](https://www.waterqualitydata.us/) - Discrete water quality data from USGS and EPA. Older data are available in the legacy WQX version 2 format; all data are available in the beta WQX3.0 format. -To access the full functionality available from NWIS web services, nwis.get record appends any additional kwargs into the REST request. For example +To access the full functionality available from NWIS web services, nwis.get record appends any additional kwargs into the REST request. For example: ```python nwis.get_record(sites='03339000', service='dv', start='2017-12-31', parameterCd='00060') ``` -will download daily data with the parameter code 00060 (discharge). +...will download daily data with the parameter code 00060 (discharge). ## Accessing the "Internal" NWIS If you're connected to the USGS network, dataretrieval call pull from the internal (non-public) NWIS interface. diff --git a/dataretrieval/__init__.py b/dataretrieval/__init__.py index 200b0182..07374f47 100644 --- a/dataretrieval/__init__.py +++ b/dataretrieval/__init__.py @@ -5,6 +5,7 @@ from dataretrieval.samples import * from dataretrieval.streamstats import * from dataretrieval.utils import * +from dataretrieval.waterdata import * from dataretrieval.waterwatch import * from dataretrieval.wqp import * diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py index e6cb9a27..02eefcbd 100644 --- a/dataretrieval/nwis.py +++ b/dataretrieval/nwis.py @@ -134,13 +134,13 @@ def get_qwdata( **kwargs, ) -> Tuple[pd.DataFrame, BaseMetadata]: """ - Get water sample data from qwdata service - deprecated, use `get_usgs_samples()` - in the samples module. + Get water sample data from qwdata service - deprecated, use `get_samples()` + in the waterdata module. """ return print("This function is deprecated and has been " \ - "replaced with `get_usgs_samples() in the " \ - "samples module. If you have questions, " \ + "replaced with `get_samples() in the " \ + "waterdata module. If you have questions, " \ "please reach out to comptools@usgs.gov") diff --git a/dataretrieval/samples.py b/dataretrieval/samples.py index eb3a614f..3aad8e5a 100644 --- a/dataretrieval/samples.py +++ b/dataretrieval/samples.py @@ -272,13 +272,13 @@ def get_usgs_samples( .. code:: >>> # Get PFAS results within a bounding box - >>> df, md = dataretrieval.samples.get_usgs_samples( + >>> df, md = dataretrieval.samples.get_samples( ... boundingBox=[-90.2,42.6,-88.7,43.2], ... characteristicGroup="Organics, PFAS" ... ) >>> # Get all activities for the Commonwealth of Virginia over a date range - >>> df, md = dataretrieval.samples.get_usgs_samples( + >>> df, md = dataretrieval.samples.get_samples( ... service="activities", ... profile="sampact", ... activityStartDateLower="2023-10-01", @@ -286,7 +286,7 @@ def get_usgs_samples( ... stateFips="US:51") >>> # Get all pH samples for two sites in Utah - >>> df, md = dataretrieval.samples.get_usgs_samples( + >>> df, md = dataretrieval.samples.get_samples( ... monitoringLocationIdentifier=['USGS-393147111462301', 'USGS-393343111454101'], ... usgsPCode='00400') diff --git a/dataretrieval/waterdata.py b/dataretrieval/waterdata.py new file mode 100644 index 00000000..bd9cda38 --- /dev/null +++ b/dataretrieval/waterdata.py @@ -0,0 +1,350 @@ +"""Functions for downloading data from the Water Data APIs, including the USGS Aquarius Samples database. + +See https://api.waterdata.usgs.gov/ for API reference. +""" + +from __future__ import annotations + +import json +from io import StringIO +from typing import TYPE_CHECKING, Literal, get_args + +import pandas as pd +import requests +from requests.models import PreparedRequest + +from dataretrieval.utils import BaseMetadata, to_str + +if TYPE_CHECKING: + from typing import Optional, Tuple, Union + + from pandas import DataFrame + + +_BASE_URL = "https://api.waterdata.usgs.gov/samples-data" + +_CODE_SERVICES = Literal[ + "characteristicgroup", + "characteristics", + "counties", + "countries", + "observedproperty", + "samplemedia", + "sitetype", + "states", +] + + +_SERVICES = Literal["activities", "locations", "organizations", "projects", "results"] + +_PROFILES = Literal[ + "actgroup", + "actmetric", + "basicbio", + "basicphyschem", + "count", + "fullbio", + "fullphyschem", + "labsampleprep", + "narrow", + "organization", + "project", + "projectmonitoringlocationweight", + "resultdetectionquantitationlimit", + "sampact", + "site", +] + +_PROFILE_LOOKUP = { + "activities": ["sampact", "actmetric", "actgroup", "count"], + "locations": ["site", "count"], + "organizations": ["organization", "count"], + "projects": ["project", "projectmonitoringlocationweight"], + "results": [ + "fullphyschem", + "basicphyschem", + "fullbio", + "basicbio", + "narrow", + "resultdetectionquantitationlimit", + "labsampleprep", + "count", + ], +} + + +def get_codes(code_service: _CODE_SERVICES) -> DataFrame: + """Return codes from a Samples code service. + + Parameters + ---------- + code_service : string + One of the following options: "states", "counties", "countries" + "sitetype", "samplemedia", "characteristicgroup", "characteristics", + or "observedproperty" + """ + valid_code_services = get_args(_CODE_SERVICES) + if code_service not in valid_code_services: + raise ValueError( + f"Invalid code service: '{code_service}'. " + f"Valid options are: {valid_code_services}." + ) + + url = f"{_BASE_URL}/codeservice/{code_service}?mimeType=application%2Fjson" + + response = requests.get(url) + + response.raise_for_status() + + data_dict = json.loads(response.text) + data_list = data_dict['data'] + + df = pd.DataFrame(data_list) + + return df + +def get_samples( + ssl_check: bool = True, + service: _SERVICES = "results", + profile: _PROFILES = "fullphyschem", + activityMediaName: Optional[Union[str, list[str]]] = None, + activityStartDateLower: Optional[str] = None, + activityStartDateUpper: Optional[str] = None, + activityTypeCode: Optional[Union[str, list[str]]] = None, + characteristicGroup: Optional[Union[str, list[str]]] = None, + characteristic: Optional[Union[str, list[str]]] = None, + characteristicUserSupplied: Optional[Union[str, list[str]]] = None, + boundingBox: Optional[list[float]] = None, + countryFips: Optional[Union[str, list[str]]] = None, + stateFips: Optional[Union[str, list[str]]] = None, + countyFips: Optional[Union[str, list[str]]] = None, + siteTypeCode: Optional[Union[str, list[str]]] = None, + siteTypeName: Optional[Union[str, list[str]]] = None, + usgsPCode: Optional[Union[str, list[str]]] = None, + hydrologicUnit: Optional[Union[str, list[str]]] = None, + monitoringLocationIdentifier: Optional[Union[str, list[str]]] = None, + organizationIdentifier: Optional[Union[str, list[str]]] = None, + pointLocationLatitude: Optional[float] = None, + pointLocationLongitude: Optional[float] = None, + pointLocationWithinMiles: Optional[float] = None, + projectIdentifier: Optional[Union[str, list[str]]] = None, + recordIdentifierUserSupplied: Optional[Union[str, list[str]]] = None, +) -> Tuple[DataFrame, BaseMetadata]: + """Search Samples database for USGS water quality data. + This is a wrapper function for the Samples database API. All potential + filters are provided as arguments to the function, but please do not + populate all possible filters; leave as many as feasible with their default + value (None). This is important because overcomplicated web service queries + can bog down the database's ability to return an applicable dataset before + it times out. + + The web GUI for the Samples database can be found here: + https://waterdata.usgs.gov/download-samples/#dataProfile=site + + If you would like more details on feasible query parameters (complete with + examples), please visit the Samples database swagger docs, here: + https://api.waterdata.usgs.gov/samples-data/docs#/ + + Parameters + ---------- + ssl_check : bool, optional + Check the SSL certificate. + service : string + One of the available Samples services: "results", "locations", "activities", + "projects", or "organizations". Defaults to "results". + profile : string + One of the available profiles associated with a service. Options for each + service are: + results - "fullphyschem", "basicphyschem", + "fullbio", "basicbio", "narrow", + "resultdetectionquantitationlimit", + "labsampleprep", "count" + locations - "site", "count" + activities - "sampact", "actmetric", + "actgroup", "count" + projects - "project", "projectmonitoringlocationweight" + organizations - "organization", "count" + activityMediaName : string or list of strings, optional + Name or code indicating environmental medium in which sample was taken. + Check the `activityMediaName_lookup()` function in this module for all + possible inputs. + Example: "Water". + activityStartDateLower : string, optional + The start date if using a date range. Takes the format YYYY-MM-DD. + The logic is inclusive, i.e. it will also return results that + match the date. If left as None, will pull all data on or before + activityStartDateUpper, if populated. + activityStartDateUpper : string, optional + The end date if using a date range. Takes the format YYYY-MM-DD. + The logic is inclusive, i.e. it will also return results that + match the date. If left as None, will pull all data after + activityStartDateLower up to the most recent available results. + activityTypeCode : string or list of strings, optional + Text code that describes type of field activity performed. + Example: "Sample-Routine, regular". + characteristicGroup : string or list of strings, optional + Characteristic group is a broad category of characteristics + describing one or more results. Check the `characteristicGroup_lookup()` + function in this module for all possible inputs. + Example: "Organics, PFAS" + characteristic : string or list of strings, optional + Characteristic is a specific category describing one or more results. + Check the `characteristic_lookup()` function in this module for all + possible inputs. + Example: "Suspended Sediment Discharge" + characteristicUserSupplied : string or list of strings, optional + A user supplied characteristic name describing one or more results. + boundingBox: list of four floats, optional + Filters on the the associated monitoring location's point location + by checking if it is located within the specified geographic area. + The logic is inclusive, i.e. it will include locations that overlap + with the edge of the bounding box. Values are separated by commas, + expressed in decimal degrees, NAD83, and longitudes west of Greenwich + are negative. + The format is a string consisting of: + - Western-most longitude + - Southern-most latitude + - Eastern-most longitude + - Northern-most longitude + Example: [-92.8,44.2,-88.9,46.0] + countryFips : string or list of strings, optional + Example: "US" (United States) + stateFips : string or list of strings, optional + Check the `stateFips_lookup()` function in this module for all + possible inputs. + Example: "US:15" (United States: Hawaii) + countyFips : string or list of strings, optional + Check the `countyFips_lookup()` function in this module for all + possible inputs. + Example: "US:15:001" (United States: Hawaii, Hawaii County) + siteTypeCode : string or list of strings, optional + An abbreviation for a certain site type. Check the `siteType_lookup()` + function in this module for all possible inputs. + Example: "GW" (Groundwater site) + siteTypeName : string or list of strings, optional + A full name for a certain site type. Check the `siteType_lookup()` + function in this module for all possible inputs. + Example: "Well" + usgsPCode : string or list of strings, optional + 5-digit number used in the US Geological Survey computerized + data system, National Water Information System (NWIS), to + uniquely identify a specific constituent. Check the + `characteristic_lookup()` function in this module for all possible + inputs. + Example: "00060" (Discharge, cubic feet per second) + hydrologicUnit : string or list of strings, optional + Max 12-digit number used to describe a hydrologic unit. + Example: "070900020502" + monitoringLocationIdentifier : string or list of strings, optional + A monitoring location identifier has two parts: the agency code + and the location number, separated by a dash (-). + Example: "USGS-040851385" + organizationIdentifier : string or list of strings, optional + Designator used to uniquely identify a specific organization. + Currently only accepting the organization "USGS". + pointLocationLatitude : float, optional + Latitude for a point/radius query (decimal degrees). Must be used + with pointLocationLongitude and pointLocationWithinMiles. + pointLocationLongitude : float, optional + Longitude for a point/radius query (decimal degrees). Must be used + with pointLocationLatitude and pointLocationWithinMiles. + pointLocationWithinMiles : float, optional + Radius for a point/radius query. Must be used with + pointLocationLatitude and pointLocationLongitude + projectIdentifier : string or list of strings, optional + Designator used to uniquely identify a data collection project. Project + identifiers are specific to an organization (e.g. USGS). + Example: "ZH003QW03" + recordIdentifierUserSupplied : string or list of strings, optional + Internal AQS record identifier that returns 1 entry. Only available + for the "results" service. + + Returns + ------- + df : ``pandas.DataFrame`` + Formatted data returned from the API query. + md : :obj:`dataretrieval.utils.Metadata` + Custom ``dataretrieval`` metadata object pertaining to the query. + + Examples + -------- + .. code:: + + >>> # Get PFAS results within a bounding box + >>> df, md = dataretrieval.samples.get_samples( + ... boundingBox=[-90.2,42.6,-88.7,43.2], + ... characteristicGroup="Organics, PFAS" + ... ) + + >>> # Get all activities for the Commonwealth of Virginia over a date range + >>> df, md = dataretrieval.samples.get_samples( + ... service="activities", + ... profile="sampact", + ... activityStartDateLower="2023-10-01", + ... activityStartDateUpper="2024-01-01", + ... stateFips="US:51") + + >>> # Get all pH samples for two sites in Utah + >>> df, md = dataretrieval.samples.get_samples( + ... monitoringLocationIdentifier=['USGS-393147111462301', 'USGS-393343111454101'], + ... usgsPCode='00400') + + """ + + _check_profiles(service, profile) + + params = { + k: v for k, v in locals().items() + if k not in ["ssl_check", "service", "profile"] + and v is not None + } + + + params.update({"mimeType": "text/csv"}) + + if "boundingBox" in params: + params["boundingBox"] = to_str(params["boundingBox"]) + + url = f"{_BASE_URL}/{service}/{profile}" + + req = PreparedRequest() + req.prepare_url(url, params=params) + print(f"Request: {req.url}") + + response = requests.get(url, params=params, verify=ssl_check) + + response.raise_for_status() + + df = pd.read_csv(StringIO(response.text), delimiter=",") + + return df, BaseMetadata(response) + +def _check_profiles( + service: _SERVICES, + profile: _PROFILES, +) -> None: + """Check whether a service profile is valid. + + Parameters + ---------- + service : string + One of the service names from the "services" list. + profile : string + One of the profile names from "results_profiles", + "locations_profiles", "activities_profiles", + "projects_profiles" or "organizations_profiles". + """ + valid_services = get_args(_SERVICES) + if service not in valid_services: + raise ValueError( + f"Invalid service: '{service}'. " + f"Valid options are: {valid_services}." + ) + + valid_profiles = _PROFILE_LOOKUP[service] + if profile not in valid_profiles: + raise ValueError( + f"Invalid profile: '{profile}' for service '{service}'. " + f"Valid options are: {valid_profiles}." + ) + diff --git a/demos/R Python Vignette equivalents.ipynb b/demos/R Python Vignette equivalents.ipynb index 9aeda84b..12cd52e3 100755 --- a/demos/R Python Vignette equivalents.ipynb +++ b/demos/R Python Vignette equivalents.ipynb @@ -14,7 +14,7 @@ "outputs": [], "source": [ "from dataretrieval import nwis\n", - "from dataretrieval import samples\n", + "from dataretrieval import waterdata\n", "from dataretrieval import wqp" ] }, @@ -62,7 +62,7 @@ "\n", "# sample data Nitrate:\n", "parameterCd = \"00618\"\n", - "usgs_samples_data, md = samples.get_usgs_samples(monitoringLocationIdentifier=f\"USGS-{siteNumber}\", usgsPCode=parameterCd, activityStartDateLower=\"1980-01-01\", activityStartDateUpper=\"2010-01-01\")\n", + "samples_data, md = waterdata.get_samples(monitoringLocationIdentifier=f\"USGS-{siteNumber}\", usgsPCode=parameterCd, activityStartDateLower=\"1980-01-01\", activityStartDateUpper=\"2010-01-01\")\n", "\n", "pCode, md = nwis.get_pmcodes(parameterCd=parameterCd)" ] @@ -207,7 +207,7 @@ "parameterCd = [\"00618\",\"71851\"]\n", "startDate = \"1985-10-01\"\n", "endDate = \"2012-09-30\"\n", - "dfLong, md = samples.get_usgs_samples(monitoringLocationIdentifier=f\"USGS-{siteNumber}\", usgsPCode=parameterCd,\n", + "dfLong, md = waterdata.get_samples(monitoringLocationIdentifier=f\"USGS-{siteNumber}\", usgsPCode=parameterCd,\n", " activityStartDateLower=startDate, activityStartDateUpper=endDate)" ] }, diff --git a/demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb b/demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb index 32f8601e..6a2266f1 100644 --- a/demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb +++ b/demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb @@ -8,7 +8,7 @@ } }, "source": [ - "# USGS dataretrieval Python Package `get_usgs_samples()` Examples\n", + "# USGS dataretrieval Python Package `get_samples()` Examples\n", "\n", "This notebook provides examples of using the Python dataretrieval package to retrieve water quality sample data for United States Geological Survey (USGS) monitoring sites. The dataretrieval package provides a collection of functions to get data from the USGS Samples database and other online sources of hydrology and water quality data, including the United States Environmental Protection Agency (USEPA)." ] @@ -60,7 +60,7 @@ }, "outputs": [], "source": [ - "from dataretrieval import samples\n", + "from dataretrieval import waterdata\n", "from IPython.display import display" ] }, @@ -70,7 +70,7 @@ "source": [ "### Basic Usage\n", "\n", - "The dataretrieval package has several functions that allow you to retrieve data from different web services. This examples uses the `get_usgs_samples()` function to retrieve water quality sample data for USGS monitoring sites from Samples. The following arguments are supported:\n", + "The dataretrieval package has several functions that allow you to retrieve data from different web services. This examples uses the `get_samples()` function to retrieve water quality sample data for USGS monitoring sites from Samples. The following arguments are supported:\n", "\n", "* **ssl_check** : boolean, optional\n", " Check the SSL certificate.\n", @@ -207,7 +207,7 @@ "outputs": [], "source": [ "siteID = 'USGS-10109000'\n", - "wq_data = samples.get_usgs_samples(monitoringLocationIdentifier=siteID)\n", + "wq_data = waterdata.get_samples(monitoringLocationIdentifier=siteID)\n", "print('Retrieved data for ' + str(len(wq_data[0])) + ' samples.')" ] }, @@ -217,7 +217,7 @@ "source": [ "### Interpreting the Result\n", "\n", - "The result of calling the `get_usgs_samples()` function is an object that contains a Pandas data frame object and an associated metadata object. The Pandas data frame contains the water quality sample data for the requested site, and or observed variables and time frame.\n", + "The result of calling the `get_samples()` function is an object that contains a Pandas data frame object and an associated metadata object. The Pandas data frame contains the water quality sample data for the requested site, and or observed variables and time frame.\n", "\n", "Once you've got the data frame, there's several useful things you can do to explore the data." ] @@ -278,7 +278,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The other part of the result returned from the `get_usgs_data()` function is a metadata object that contains information about the query that was executed to return the data. For example, you can access the URL that was assembled to retrieve the requested data from the USGS web service. The USGS web service responses contain a descriptive header that defines and can be helpful in interpreting the contents of the response." + "The other part of the result returned from the `get_samples()` function is a metadata object that contains information about the query that was executed to return the data. For example, you can access the URL that was assembled to retrieve the requested data from the USGS web service. The USGS web service responses contain a descriptive header that defines and can be helpful in interpreting the contents of the response." ] }, { @@ -323,7 +323,7 @@ "source": [ "site_ids = ['USGS-04024430', 'USGS-04024000']\n", "parameter_code = '00065'\n", - "wq_multi_site = samples.get_usgs_samples(monitoringLocationIdentifier=site_ids, usgsPCode=parameter_code)\n", + "wq_multi_site = waterdata.get_samples(monitoringLocationIdentifier=site_ids, usgsPCode=parameter_code)\n", "print('Retrieved data for ' + str(len(wq_multi_site[0])) + ' samples.')\n", "display(wq_multi_site[0])" ] @@ -356,7 +356,7 @@ "site_ids = ['USGS-04024430', 'USGS-04024000']\n", "parameterCd = ['34247', '30234', '32104', '34220']\n", "startDate = '2012-01-01'\n", - "wq_data2 = samples.get_usgs_samples(monitoringLocationIdentifier=site_ids, usgsPCode=parameterCd,\n", + "wq_data2 = waterdata.get_samples(monitoringLocationIdentifier=site_ids, usgsPCode=parameterCd,\n", " activityStartDateLower=startDate)\n", "print('Retrieved data for ' + str(len(wq_multi_site[0])) + ' samples.')\n", "display(wq_data2[0])\n" @@ -368,7 +368,7 @@ "source": [ "#### Example 4: Retrieve water quality sample data for one site and convert to a wide format\n", "\n", - "Note that the USGS samples database returns multiple parameters in a \"long\" format: each row in the resulting table represents a single observation of a single parameters. Furthermore, every observation has 181 fields of metadata. However, if you wanted to place your water quality data into a \"wide\" format, where each column represents a water quality parameter code, the code below details one solution." + "Note that the USGS Samples database returns multiple parameters in a \"long\" format: each row in the resulting table represents a single observation of a single parameters. Furthermore, every observation has 181 fields of metadata. However, if you wanted to place your water quality data into a \"wide\" format, where each column represents a water quality parameter code, the code below details one solution." ] }, { @@ -378,7 +378,7 @@ "outputs": [], "source": [ "siteID = 'USGS-10109000'\n", - "wq_data,_ = samples.get_usgs_samples(monitoringLocationIdentifier=siteID)\n", + "wq_data,_ = waterdata.get_samples(monitoringLocationIdentifier=siteID)\n", "print('Retrieved data for ' + str(len(wq_data)) + ' sample results.')\n", "\n", "wq_data[\"characteristic_unit\"] = wq_data[\"Result_Characteristic\"] + \", \" + wq_data[\"Result_MeasureUnit\"]\n", diff --git a/docs/source/reference/waterdata.rst b/docs/source/reference/waterdata.rst new file mode 100644 index 00000000..dc2cd0b3 --- /dev/null +++ b/docs/source/reference/waterdata.rst @@ -0,0 +1,8 @@ +.. _samples + +dataretrieval.waterdata +------------------------- + +.. automodule:: dataretrieval.waterdata + :members: + :special-members: \ No newline at end of file diff --git a/tests/samples_test.py b/tests/samples_test.py index c3e2a995..ae1e3afb 100755 --- a/tests/samples_test.py +++ b/tests/samples_test.py @@ -15,7 +15,7 @@ def mock_request(requests_mock, request_url, file_path): request_url, text=text.read(), headers={"mock_header": "value"} ) -def test_mock_get_usgs_samples(requests_mock): +def test_mock_get_samples(requests_mock): """Tests USGS Samples query""" request_url = ( "https://api.waterdata.usgs.gov/samples-data/results/fullphyschem?" diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py new file mode 100755 index 00000000..70b9ffda --- /dev/null +++ b/tests/waterdata_test.py @@ -0,0 +1,105 @@ +import datetime + +import pytest +from pandas import DataFrame + +from dataretrieval.waterdata import ( + _check_profiles, + get_samples +) + +def mock_request(requests_mock, request_url, file_path): + """Mock request code""" + with open(file_path) as text: + requests_mock.get( + request_url, text=text.read(), headers={"mock_header": "value"} + ) + +def test_mock_get_samples(requests_mock): + """Tests USGS Samples query""" + request_url = ( + "https://api.waterdata.usgs.gov/samples-data/results/fullphyschem?" + "activityMediaName=Water&activityStartDateLower=2020-01-01" + "&activityStartDateUpper=2024-12-31&monitoringLocationIdentifier=USGS-05406500&mimeType=text%2Fcsv" + ) + response_file_path = "data/samples_results.txt" + mock_request(requests_mock, request_url, response_file_path) + df, md = get_samples( + service="results", + profile="fullphyschem", + activityMediaName="Water", + activityStartDateLower="2020-01-01", + activityStartDateUpper="2024-12-31", + monitoringLocationIdentifier="USGS-05406500", + ) + assert type(df) is DataFrame + assert df.size == 12127 + assert md.url == request_url + assert isinstance(md.query_time, datetime.timedelta) + assert md.header == {"mock_header": "value"} + assert md.comment is None + +def test_check_profiles(): + """Tests that correct errors are raised for invalid profiles.""" + with pytest.raises(ValueError): + _check_profiles(service="foo", profile="bar") + with pytest.raises(ValueError): + _check_profiles(service="results", profile="foo") + +def test_samples_results(): + """Test results call for proper columns""" + df,_ = get_samples( + service="results", + profile="narrow", + monitoringLocationIdentifier="USGS-05288705", + activityStartDateLower="2024-10-01", + activityStartDateUpper="2025-04-24" + ) + assert all(col in df.columns for col in ["Location_Identifier", "Activity_ActivityIdentifier"]) + assert len(df) > 0 + +def test_samples_activity(): + """Test activity call for proper columns""" + df,_ = get_samples( + service="activities", + profile="sampact", + monitoringLocationIdentifier="USGS-06719505" + ) + assert len(df) > 0 + assert len(df.columns) == 95 + assert "Location_HUCTwelveDigitCode" in df.columns + +def test_samples_locations(): + """Test locations call for proper columns""" + df,_ = get_samples( + service="locations", + profile="site", + stateFips="US:55", + activityStartDateLower="2024-10-01", + activityStartDateUpper="2025-04-24", + usgsPCode="00010" + ) + assert all(col in df.columns for col in ["Location_Identifier", "Location_Latitude"]) + assert len(df) > 0 + +def test_samples_projects(): + """Test projects call for proper columns""" + df,_ = get_samples( + service="projects", + profile="project", + stateFips="US:15", + activityStartDateLower="2024-10-01", + activityStartDateUpper="2025-04-24" + ) + assert all(col in df.columns for col in ["Org_Identifier", "Project_Identifier"]) + assert len(df) > 0 + +def test_samples_organizations(): + """Test organizations call for proper columns""" + df,_ = get_samples( + service="organizations", + profile="count", + stateFips="US:01" + ) + assert len(df) == 1 + assert df.size == 3 From 8f89fc5ef104a58f9c0bd2cfaf3f61aa72acead7 Mon Sep 17 00:00:00 2001 From: Elise Hinman Date: Mon, 9 Jun 2025 14:42:41 -0500 Subject: [PATCH 07/15] remove code from samples module, etc. --- dataretrieval/samples.py | 126 ++---------------- dataretrieval/waterdata.py | 6 +- ..._dataretrieval_WaterSamples_Examples.ipynb | 4 +- 3 files changed, 17 insertions(+), 119 deletions(-) diff --git a/dataretrieval/samples.py b/dataretrieval/samples.py index 3aad8e5a..0342460f 100644 --- a/dataretrieval/samples.py +++ b/dataretrieval/samples.py @@ -11,99 +11,18 @@ from typing import TYPE_CHECKING, Literal, get_args import pandas as pd +import warnings import requests from requests.models import PreparedRequest from dataretrieval.utils import BaseMetadata, to_str +from dataretrieval.waterdata import get_codes, get_args, _check_profiles, _BASE_URL, _CODE_SERVICES, _PROFILES, _SERVICES, _PROFILE_LOOKUP if TYPE_CHECKING: from typing import Optional, Tuple, Union from pandas import DataFrame - -_BASE_URL = "https://api.waterdata.usgs.gov/samples-data" - -_CODE_SERVICES = Literal[ - "characteristicgroup", - "characteristics", - "counties", - "countries", - "observedproperty", - "samplemedia", - "sitetype", - "states", -] - - -_SERVICES = Literal["activities", "locations", "organizations", "projects", "results"] - -_PROFILES = Literal[ - "actgroup", - "actmetric", - "basicbio", - "basicphyschem", - "count", - "fullbio", - "fullphyschem", - "labsampleprep", - "narrow", - "organization", - "project", - "projectmonitoringlocationweight", - "resultdetectionquantitationlimit", - "sampact", - "site", -] - -_PROFILE_LOOKUP = { - "activities": ["sampact", "actmetric", "actgroup", "count"], - "locations": ["site", "count"], - "organizations": ["organization", "count"], - "projects": ["project", "projectmonitoringlocationweight"], - "results": [ - "fullphyschem", - "basicphyschem", - "fullbio", - "basicbio", - "narrow", - "resultdetectionquantitationlimit", - "labsampleprep", - "count", - ], -} - - -def get_codes(code_service: _CODE_SERVICES) -> DataFrame: - """Return codes from a Samples code service. - - Parameters - ---------- - code_service : string - One of the following options: "states", "counties", "countries" - "sitetype", "samplemedia", "characteristicgroup", "characteristics", - or "observedproperty" - """ - valid_code_services = get_args(_CODE_SERVICES) - if code_service not in valid_code_services: - raise ValueError( - f"Invalid code service: '{code_service}'. " - f"Valid options are: {valid_code_services}." - ) - - url = f"{_BASE_URL}/codeservice/{code_service}?mimeType=application%2Fjson" - - response = requests.get(url) - - response.raise_for_status() - - data_dict = json.loads(response.text) - data_list = data_dict['data'] - - df = pd.DataFrame(data_list) - - return df - def get_usgs_samples( ssl_check: bool = True, service: _SERVICES = "results", @@ -272,13 +191,13 @@ def get_usgs_samples( .. code:: >>> # Get PFAS results within a bounding box - >>> df, md = dataretrieval.samples.get_samples( + >>> df, md = dataretrieval.samples.get_usgs_samples( ... boundingBox=[-90.2,42.6,-88.7,43.2], ... characteristicGroup="Organics, PFAS" ... ) >>> # Get all activities for the Commonwealth of Virginia over a date range - >>> df, md = dataretrieval.samples.get_samples( + >>> df, md = dataretrieval.samples.get_usgs_samples( ... service="activities", ... profile="sampact", ... activityStartDateLower="2023-10-01", @@ -286,12 +205,19 @@ def get_usgs_samples( ... stateFips="US:51") >>> # Get all pH samples for two sites in Utah - >>> df, md = dataretrieval.samples.get_samples( + >>> df, md = dataretrieval.samples.get_usgs_samples( ... monitoringLocationIdentifier=['USGS-393147111462301', 'USGS-393343111454101'], ... usgsPCode='00400') """ + warnings.warn("The `get_usgs_samples` function is moving from" \ + " the samples module to the new waterdata module, where" \ + " it will be called simply `get_samples`. All of the same" \ + " functionality will be retained. The samples module is" \ + " deprecated and will eventually be removed. Switch to the" \ + " waterdata module as soon as possible, thank you.") + _check_profiles(service, profile) params = { @@ -320,32 +246,4 @@ def get_usgs_samples( return df, BaseMetadata(response) -def _check_profiles( - service: _SERVICES, - profile: _PROFILES, -) -> None: - """Check whether a service profile is valid. - - Parameters - ---------- - service : string - One of the service names from the "services" list. - profile : string - One of the profile names from "results_profiles", - "locations_profiles", "activities_profiles", - "projects_profiles" or "organizations_profiles". - """ - valid_services = get_args(_SERVICES) - if service not in valid_services: - raise ValueError( - f"Invalid service: '{service}'. " - f"Valid options are: {valid_services}." - ) - - valid_profiles = _PROFILE_LOOKUP[service] - if profile not in valid_profiles: - raise ValueError( - f"Invalid profile: '{profile}' for service '{service}'. " - f"Valid options are: {valid_profiles}." - ) diff --git a/dataretrieval/waterdata.py b/dataretrieval/waterdata.py index bd9cda38..ceed581e 100644 --- a/dataretrieval/waterdata.py +++ b/dataretrieval/waterdata.py @@ -271,13 +271,13 @@ def get_samples( .. code:: >>> # Get PFAS results within a bounding box - >>> df, md = dataretrieval.samples.get_samples( + >>> df, md = dataretrieval.waterdata.get_samples( ... boundingBox=[-90.2,42.6,-88.7,43.2], ... characteristicGroup="Organics, PFAS" ... ) >>> # Get all activities for the Commonwealth of Virginia over a date range - >>> df, md = dataretrieval.samples.get_samples( + >>> df, md = dataretrieval.waterdata.get_samples( ... service="activities", ... profile="sampact", ... activityStartDateLower="2023-10-01", @@ -285,7 +285,7 @@ def get_samples( ... stateFips="US:51") >>> # Get all pH samples for two sites in Utah - >>> df, md = dataretrieval.samples.get_samples( + >>> df, md = dataretrieval.waterdata.get_samples( ... monitoringLocationIdentifier=['USGS-393147111462301', 'USGS-393343111454101'], ... usgsPCode='00400') diff --git a/demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb b/demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb index 6a2266f1..55ccc084 100644 --- a/demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb +++ b/demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb @@ -389,7 +389,7 @@ ], "metadata": { "kernelspec": { - "display_name": "dr-test", + "display_name": "hyswap-dev-environment", "language": "python", "name": "python3" }, @@ -403,7 +403,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.12" + "version": "3.12.7" } }, "nbformat": 4, From b360a65bc25fb29aeb8796c655c59f683d66fd1c Mon Sep 17 00:00:00 2001 From: Elise Hinman Date: Mon, 9 Jun 2025 16:07:52 -0500 Subject: [PATCH 08/15] make samples a wrapper for waterdata --- dataretrieval/samples.py | 62 +++++++++++------------ tests/samples_test.py | 105 --------------------------------------- 2 files changed, 30 insertions(+), 137 deletions(-) delete mode 100755 tests/samples_test.py diff --git a/dataretrieval/samples.py b/dataretrieval/samples.py index 0342460f..3a0bc7c5 100644 --- a/dataretrieval/samples.py +++ b/dataretrieval/samples.py @@ -6,17 +6,13 @@ from __future__ import annotations -import json -from io import StringIO from typing import TYPE_CHECKING, Literal, get_args import pandas as pd import warnings -import requests -from requests.models import PreparedRequest from dataretrieval.utils import BaseMetadata, to_str -from dataretrieval.waterdata import get_codes, get_args, _check_profiles, _BASE_URL, _CODE_SERVICES, _PROFILES, _SERVICES, _PROFILE_LOOKUP +from dataretrieval import waterdata if TYPE_CHECKING: from typing import Optional, Tuple, Union @@ -218,32 +214,34 @@ def get_usgs_samples( " deprecated and will eventually be removed. Switch to the" \ " waterdata module as soon as possible, thank you.") - _check_profiles(service, profile) - - params = { - k: v for k, v in locals().items() - if k not in ["ssl_check", "service", "profile"] - and v is not None - } - - - params.update({"mimeType": "text/csv"}) - - if "boundingBox" in params: - params["boundingBox"] = to_str(params["boundingBox"]) - - url = f"{_BASE_URL}/{service}/{profile}" - - req = PreparedRequest() - req.prepare_url(url, params=params) - print(f"Request: {req.url}") - - response = requests.get(url, params=params, verify=ssl_check) - - response.raise_for_status() - - df = pd.read_csv(StringIO(response.text), delimiter=",") - - return df, BaseMetadata(response) + result = waterdata.get_samples( + ssl_check=ssl_check, + service=service, + profile=profile, + activityMediaName=activityMediaName, + activityStartDateLower=activityStartDateLower, + activityStartDateUpper=activityStartDateUpper, + activityTypeCode=activityTypeCode, + characteristicGroup=characteristicGroup, + characteristic=characteristic, + characteristicUserSupplied=characteristicUserSupplied, + boundingBox=boundingBox, + countryFips=countryFips, + stateFips=stateFips, + countyFips=countyFips, + siteTypeCode=siteTypeCode, + siteTypeName=siteTypeName, + usgsPCode=usgsPCode, + hydrologicUnit=hydrologicUnit, + monitoringLocationIdentifier=monitoringLocationIdentifier, + organizationIdentifier=organizationIdentifier, + pointLocationLatitude=pointLocationLatitude, + pointLocationLongitude=pointLocationLongitude, + pointLocationWithinMiles=pointLocationWithinMiles, + projectIdentifier=projectIdentifier, + recordIdentifierUserSupplied=recordIdentifierUserSupplied, + ) + + return result diff --git a/tests/samples_test.py b/tests/samples_test.py deleted file mode 100755 index ae1e3afb..00000000 --- a/tests/samples_test.py +++ /dev/null @@ -1,105 +0,0 @@ -import datetime - -import pytest -from pandas import DataFrame - -from dataretrieval.samples import ( - _check_profiles, - get_usgs_samples -) - -def mock_request(requests_mock, request_url, file_path): - """Mock request code""" - with open(file_path) as text: - requests_mock.get( - request_url, text=text.read(), headers={"mock_header": "value"} - ) - -def test_mock_get_samples(requests_mock): - """Tests USGS Samples query""" - request_url = ( - "https://api.waterdata.usgs.gov/samples-data/results/fullphyschem?" - "activityMediaName=Water&activityStartDateLower=2020-01-01" - "&activityStartDateUpper=2024-12-31&monitoringLocationIdentifier=USGS-05406500&mimeType=text%2Fcsv" - ) - response_file_path = "data/samples_results.txt" - mock_request(requests_mock, request_url, response_file_path) - df, md = get_usgs_samples( - service="results", - profile="fullphyschem", - activityMediaName="Water", - activityStartDateLower="2020-01-01", - activityStartDateUpper="2024-12-31", - monitoringLocationIdentifier="USGS-05406500", - ) - assert type(df) is DataFrame - assert df.size == 12127 - assert md.url == request_url - assert isinstance(md.query_time, datetime.timedelta) - assert md.header == {"mock_header": "value"} - assert md.comment is None - -def test_check_profiles(): - """Tests that correct errors are raised for invalid profiles.""" - with pytest.raises(ValueError): - _check_profiles(service="foo", profile="bar") - with pytest.raises(ValueError): - _check_profiles(service="results", profile="foo") - -def test_samples_results(): - """Test results call for proper columns""" - df,_ = get_usgs_samples( - service="results", - profile="narrow", - monitoringLocationIdentifier="USGS-05288705", - activityStartDateLower="2024-10-01", - activityStartDateUpper="2025-04-24" - ) - assert all(col in df.columns for col in ["Location_Identifier", "Activity_ActivityIdentifier"]) - assert len(df) > 0 - -def test_samples_activity(): - """Test activity call for proper columns""" - df,_ = get_usgs_samples( - service="activities", - profile="sampact", - monitoringLocationIdentifier="USGS-06719505" - ) - assert len(df) > 0 - assert len(df.columns) == 95 - assert "Location_HUCTwelveDigitCode" in df.columns - -def test_samples_locations(): - """Test locations call for proper columns""" - df,_ = get_usgs_samples( - service="locations", - profile="site", - stateFips="US:55", - activityStartDateLower="2024-10-01", - activityStartDateUpper="2025-04-24", - usgsPCode="00010" - ) - assert all(col in df.columns for col in ["Location_Identifier", "Location_Latitude"]) - assert len(df) > 0 - -def test_samples_projects(): - """Test projects call for proper columns""" - df,_ = get_usgs_samples( - service="projects", - profile="project", - stateFips="US:15", - activityStartDateLower="2024-10-01", - activityStartDateUpper="2025-04-24" - ) - assert all(col in df.columns for col in ["Org_Identifier", "Project_Identifier"]) - assert len(df) > 0 - -def test_samples_organizations(): - """Test organizations call for proper columns""" - df,_ = get_usgs_samples( - service="organizations", - profile="count", - stateFips="US:01" - ) - assert len(df) == 1 - assert df.size == 3 From ade5b594aaa4c09c97501bc72f6fbaeea36cc216 Mon Sep 17 00:00:00 2001 From: Elise Hinman Date: Tue, 24 Jun 2025 17:09:25 -0500 Subject: [PATCH 09/15] specify waterdata imports --- dataretrieval/samples.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dataretrieval/samples.py b/dataretrieval/samples.py index 3a0bc7c5..065aa7ea 100644 --- a/dataretrieval/samples.py +++ b/dataretrieval/samples.py @@ -12,11 +12,11 @@ import warnings from dataretrieval.utils import BaseMetadata, to_str -from dataretrieval import waterdata +from dataretrieval.waterdata import get_samples if TYPE_CHECKING: from typing import Optional, Tuple, Union - + from dataretrieval.waterdata import _SERVICES, _PROFILES from pandas import DataFrame def get_usgs_samples( From f4b7dc70b3c21c688d1e44208d41a8e68e4a6164 Mon Sep 17 00:00:00 2001 From: Elise Hinman <121896266+ehinman@users.noreply.github.com> Date: Wed, 25 Jun 2025 08:39:58 -0500 Subject: [PATCH 10/15] Update dataretrieval/nwis.py Co-authored-by: Timothy Hodson <34148978+thodson-usgs@users.noreply.github.com> --- dataretrieval/nwis.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py index 02eefcbd..8ff7dcf9 100644 --- a/dataretrieval/nwis.py +++ b/dataretrieval/nwis.py @@ -138,10 +138,9 @@ def get_qwdata( in the waterdata module. """ - return print("This function is deprecated and has been " \ - "replaced with `get_samples() in the " \ - "waterdata module. If you have questions, " \ - "please reach out to comptools@usgs.gov") +raise NameError( + "`nwis.get_qwdata` has been replaced with `waterdata.get_samples()`." +) def get_discharge_measurements( From 46cb9e86025b11eb78fa6003aa0ea39f411f6f2d Mon Sep 17 00:00:00 2001 From: Elise Hinman <121896266+ehinman@users.noreply.github.com> Date: Wed, 25 Jun 2025 08:40:13 -0500 Subject: [PATCH 11/15] Update dataretrieval/samples.py Co-authored-by: Timothy Hodson <34148978+thodson-usgs@users.noreply.github.com> --- dataretrieval/samples.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dataretrieval/samples.py b/dataretrieval/samples.py index 065aa7ea..c70bf91a 100644 --- a/dataretrieval/samples.py +++ b/dataretrieval/samples.py @@ -207,7 +207,11 @@ def get_usgs_samples( """ - warnings.warn("The `get_usgs_samples` function is moving from" \ + warnings.warn( + "`get_usgs_samples` is deprecated and will be removed. Use `waterdata.get_samples` instead.", + DeprecationWarning, + stacklevel=2, + ) " the samples module to the new waterdata module, where" \ " it will be called simply `get_samples`. All of the same" \ " functionality will be retained. The samples module is" \ From d1f6e7baddcf112f4ec60e85267bb2d9bfb5caab Mon Sep 17 00:00:00 2001 From: Elise Hinman Date: Wed, 25 Jun 2025 08:41:45 -0500 Subject: [PATCH 12/15] fix function call --- dataretrieval/samples.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dataretrieval/samples.py b/dataretrieval/samples.py index c70bf91a..84fa8ed2 100644 --- a/dataretrieval/samples.py +++ b/dataretrieval/samples.py @@ -218,7 +218,7 @@ def get_usgs_samples( " deprecated and will eventually be removed. Switch to the" \ " waterdata module as soon as possible, thank you.") - result = waterdata.get_samples( + result = get_samples( ssl_check=ssl_check, service=service, profile=profile, From bfa921d643a9a959838195e5f4dc72904d0b09af Mon Sep 17 00:00:00 2001 From: Elise Hinman Date: Wed, 25 Jun 2025 08:43:17 -0500 Subject: [PATCH 13/15] remove old warning text --- dataretrieval/samples.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/dataretrieval/samples.py b/dataretrieval/samples.py index 84fa8ed2..c55c1a84 100644 --- a/dataretrieval/samples.py +++ b/dataretrieval/samples.py @@ -212,12 +212,7 @@ def get_usgs_samples( DeprecationWarning, stacklevel=2, ) - " the samples module to the new waterdata module, where" \ - " it will be called simply `get_samples`. All of the same" \ - " functionality will be retained. The samples module is" \ - " deprecated and will eventually be removed. Switch to the" \ - " waterdata module as soon as possible, thank you.") - + result = get_samples( ssl_check=ssl_check, service=service, From 530ba4f010fefd237f70420e25ecc47c035c82e7 Mon Sep 17 00:00:00 2001 From: Elise Hinman Date: Wed, 25 Jun 2025 09:27:03 -0500 Subject: [PATCH 14/15] add nldi to documentation site, add contact in readme --- docs/source/reference/index.rst | 2 ++ docs/source/reference/nldi.rst | 8 ++++++++ 2 files changed, 10 insertions(+) create mode 100644 docs/source/reference/nldi.rst diff --git a/docs/source/reference/index.rst b/docs/source/reference/index.rst index a2515073..1c9c498d 100644 --- a/docs/source/reference/index.rst +++ b/docs/source/reference/index.rst @@ -8,8 +8,10 @@ API reference :maxdepth: 1 nadp + nldi nwis samples streamstats utils + waterdata wqp diff --git a/docs/source/reference/nldi.rst b/docs/source/reference/nldi.rst new file mode 100644 index 00000000..a417954b --- /dev/null +++ b/docs/source/reference/nldi.rst @@ -0,0 +1,8 @@ +.. _nldi + +dataretrieval.nldi +------------------ + +.. automodule:: dataretrieval.nldi + :members: + :special-members: \ No newline at end of file From e5b892fb66768a187d24e279dd555fdac53a521d Mon Sep 17 00:00:00 2001 From: Elise Hinman Date: Wed, 25 Jun 2025 13:20:00 -0500 Subject: [PATCH 15/15] edit readme, change indent of name error --- README.md | 10 ++++++---- dataretrieval/nwis.py | 6 +++--- tests/waterdata_test.py | 4 +++- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index edbe3cb8..f8c14a36 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ Water quality data are available from: - [Samples](https://waterdata.usgs.gov/download-samples/#dataProfile=site) - Discrete USGS water quality data only - [Water Quality Portal](https://www.waterqualitydata.us/) - Discrete water quality data from USGS and EPA. Older data are available in the legacy WQX version 2 format; all data are available in the beta WQX3.0 format. -To access the full functionality available from NWIS web services, nwis.get record appends any additional kwargs into the REST request. For example: +To access the full functionality available from NWIS web services, nwis.get record appends any additional kwargs into the REST request. For example, this function call: ```python nwis.get_record(sites='03339000', service='dv', start='2017-12-31', parameterCd='00060') ``` @@ -97,9 +97,11 @@ Any help in testing, development, documentation and other tasks is welcome. For more details, see the file [CONTRIBUTING.md](CONTRIBUTING.md). -## Package Support -The Water Mission Area of the USGS supports the development and maintenance of `dataretrieval` -and most likely further into the future. +## Need help? + +The Water Mission Area of the USGS supports the development and maintenance of `dataretrieval`. Any questions can be directed to the Computational Tools team at +comptools@usgs.gov. + Resources are available primarily for maintenance and responding to user questions. Priorities on the development of new features are determined by the `dataretrieval` development team. diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py index 8ff7dcf9..96218bda 100644 --- a/dataretrieval/nwis.py +++ b/dataretrieval/nwis.py @@ -138,9 +138,9 @@ def get_qwdata( in the waterdata module. """ -raise NameError( - "`nwis.get_qwdata` has been replaced with `waterdata.get_samples()`." -) + raise NameError( + "`nwis.get_qwdata` has been replaced with `waterdata.get_samples()`." + ) def get_discharge_measurements( diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py index 70b9ffda..50eefdc5 100755 --- a/tests/waterdata_test.py +++ b/tests/waterdata_test.py @@ -5,7 +5,9 @@ from dataretrieval.waterdata import ( _check_profiles, - get_samples + get_samples, + _SERVICES, + _PROFILES ) def mock_request(requests_mock, request_url, file_path):