diff --git a/ci/scripts/install_pandas.sh b/ci/scripts/install_pandas.sh index 03009b3824f..b8a63632e28 100755 --- a/ci/scripts/install_pandas.sh +++ b/ci/scripts/install_pandas.sh @@ -40,7 +40,7 @@ if [ "${pandas}" = "upstream_devel" ]; then elif [ "${pandas}" = "nightly" ]; then pip install --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple --pre pandas elif [ "${pandas}" = "latest" ]; then - pip install pandas + pip install --upgrade pandas else pip install pandas=="${pandas}" fi diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py index 94868741f39..e50b2715e3e 100644 --- a/python/pyarrow/tests/parquet/test_basic.py +++ b/python/pyarrow/tests/parquet/test_basic.py @@ -728,6 +728,7 @@ def test_parquet_file_too_small(tempdir): @pytest.mark.fastparquet @pytest.mark.filterwarnings("ignore:RangeIndex:FutureWarning") @pytest.mark.filterwarnings("ignore:tostring:DeprecationWarning:fastparquet") +@pytest.mark.filterwarnings("ignore:unclosed file:ResourceWarning") def test_fastparquet_cross_compatibility(tempdir): fp = pytest.importorskip('fastparquet') @@ -751,17 +752,21 @@ def test_fastparquet_cross_compatibility(tempdir): fp_file = fp.ParquetFile(file_arrow) df_fp = fp_file.to_pandas() - tm.assert_frame_equal(df, df_fp) + # check_dtype=False: string/categorical dtype handling differs between libraries + tm.assert_frame_equal(df, df_fp, check_dtype=False, check_categorical=False) # Fastparquet -> arrow file_fastparquet = str(tempdir / "cross_compat_fastparquet.parquet") - fp.write(file_fastparquet, df) + # fastparquet can't write pandas 3.0 StringDtype + df_for_fp = df.copy() + df_for_fp['a'] = df_for_fp['a'].astype(object) + df_for_fp['f'] = df_for_fp['f'].astype(object) + fp.write(file_fastparquet, df_for_fp) table_fp = pq.read_pandas(file_fastparquet) # for fastparquet written file, categoricals comes back as strings # (no arrow schema in parquet metadata) - df['f'] = df['f'].astype(object) - tm.assert_frame_equal(table_fp.to_pandas(), df) + tm.assert_frame_equal(table_fp.to_pandas(), df_for_fp, check_dtype=False) @pytest.mark.parametrize('array_factory', [ diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index cecf10f2165..e7b625422f4 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -4956,7 +4956,7 @@ def test_timestamp_as_object_non_nanosecond(resolution, tz, dt): assert isinstance(result[0], datetime) if tz: assert result[0].tzinfo is not None - expected = result[0].tzinfo.fromutc(dt) + expected = dt.replace(tzinfo=timezone.utc).astimezone(result[0].tzinfo) else: assert result[0].tzinfo is None expected = dt