Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/scripts/install_pandas.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ if [ "${pandas}" = "upstream_devel" ]; then
elif [ "${pandas}" = "nightly" ]; then
pip install --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple --pre pandas
elif [ "${pandas}" = "latest" ]; then
pip install pandas
pip install --upgrade pandas
else
pip install pandas=="${pandas}"
fi
13 changes: 9 additions & 4 deletions python/pyarrow/tests/parquet/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,6 +728,7 @@ def test_parquet_file_too_small(tempdir):
@pytest.mark.fastparquet
@pytest.mark.filterwarnings("ignore:RangeIndex:FutureWarning")
@pytest.mark.filterwarnings("ignore:tostring:DeprecationWarning:fastparquet")
@pytest.mark.filterwarnings("ignore:unclosed file:ResourceWarning")
def test_fastparquet_cross_compatibility(tempdir):
fp = pytest.importorskip('fastparquet')

Expand All @@ -751,17 +752,21 @@ def test_fastparquet_cross_compatibility(tempdir):

fp_file = fp.ParquetFile(file_arrow)
df_fp = fp_file.to_pandas()
tm.assert_frame_equal(df, df_fp)
# check_dtype=False: string/categorical dtype handling differs between libraries
tm.assert_frame_equal(df, df_fp, check_dtype=False, check_categorical=False)

# Fastparquet -> arrow
file_fastparquet = str(tempdir / "cross_compat_fastparquet.parquet")
fp.write(file_fastparquet, df)
# fastparquet can't write pandas 3.0 StringDtype
df_for_fp = df.copy()
df_for_fp['a'] = df_for_fp['a'].astype(object)
df_for_fp['f'] = df_for_fp['f'].astype(object)
fp.write(file_fastparquet, df_for_fp)

table_fp = pq.read_pandas(file_fastparquet)
# for fastparquet written file, categoricals comes back as strings
# (no arrow schema in parquet metadata)
df['f'] = df['f'].astype(object)
tm.assert_frame_equal(table_fp.to_pandas(), df)
tm.assert_frame_equal(table_fp.to_pandas(), df_for_fp, check_dtype=False)


@pytest.mark.parametrize('array_factory', [
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -4956,7 +4956,7 @@ def test_timestamp_as_object_non_nanosecond(resolution, tz, dt):
assert isinstance(result[0], datetime)
if tz:
assert result[0].tzinfo is not None
expected = result[0].tzinfo.fromutc(dt)
expected = dt.replace(tzinfo=timezone.utc).astimezone(result[0].tzinfo)
else:
assert result[0].tzinfo is None
expected = dt
Expand Down
Loading