diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..2a1d97934 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.pythonPath": "/Users/tim/anaconda/anaconda/envs/altair_test/bin/python" +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 000000000..ff59cb43c --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,15 @@ +{ + // See https://go.microsoft.com/fwlink/?LinkId=733558 + // for the documentation about the tasks.json format + "version": "2.0.0", + "tasks": [ + { + "label": "pip instal", + "type": "shell", + "command": "pip install -e.", + "problemMatcher": [ + "$go" + ] + } + ] +} \ No newline at end of file diff --git a/altair/utils/data.py b/altair/utils/data.py index 2398f02e9..85a214ef3 100644 --- a/altair/utils/data.py +++ b/altair/utils/data.py @@ -33,7 +33,7 @@ class DataTransformerRegistry(PluginRegistry[DataTransformerType]): # form. # # A data model transformer has the following type signature: -# DataModelType = Union[dict, pd.DataFrame] +# DataModelType = Union[dict, pd.DataFrame, gpd.GeoDataFrame, geojson interface object] # DataModelTransformerType = Callable[[DataModelType, KwArgs], DataModelType] # ============================================================================== @@ -57,6 +57,8 @@ def limit_rows(data, max_rows=5000): values = data['values'] else: return data + else: + return data if max_rows is not None and len(values) > max_rows: raise MaxRowsError('The number of rows in your dataset is greater ' 'than the maximum allowed ({0}). ' @@ -91,31 +93,25 @@ def to_json(data, prefix='altair-data'): if not hasattr(data,'__geo_interface__'): data.to_json(filename, orient='records') else: #GeoPandas - with open(filename) as f: + with open(filename,'w') as f: json.dump(data.__geo_interface__, f) data_format['property']='features' + elif hasattr(data,'__geo_interface__'): # geojson object + with open(filename,'w') as f: + json.dump(data.__geo_interface__, f) elif isinstance(data, dict): if 'values' not in data: raise KeyError('values expected in data dict, but not present.') values = data['values'] - with open(filename) as f: + with open(filename,'w') as f: json.dump(values, f) return { 'url': filename, 'format': data_format } -@curry -def to_geojson_values(data, feature="features"): - if not hasattr(data, '__geo_interface__'): - raise TypeError('Expected GeoDataFrame or __geo_interface__, got: {}'.format(type(data))) - if isinstance(data, pd.DataFrame): - data = sanitize_dataframe(data) - return { - 'values':data.__geo_interface__, - 'format':{'type':'json','property':feature} - } + @curry def to_csv(data, prefix='altair-data'): @@ -124,12 +120,17 @@ def to_csv(data, prefix='altair-data'): ext = '.csv' filename = _compute_filename(prefix=prefix, ext=ext) if isinstance(data, pd.DataFrame): + if hasattr(data,'__geo_interface__'):#GeoPandas + raise NotImplementedError('use to_json or to_values with GeoDataFrame objects.') + data = sanitize_dataframe(data) data.to_csv(filename) return { 'url': filename, 'format': {'type': 'csv'} } + elif hasattr(data,'__geo_interface__'):#GeoJSON + raise NotImplementedError('to_csv only works with Pandas DataFrame objects.') elif isinstance(data, dict): raise NotImplementedError('to_csv only works with Pandas DataFrame objects.') @@ -147,6 +148,12 @@ def to_values(data): } return {'values': data.to_dict(orient='records')} + elif hasattr(data,'__geo_interface__'):#GeoJSON + return { + 'values':data.__geo_interface__, + 'format':{'type':'json'} + } + elif isinstance(data, dict): if 'values' not in data: raise KeyError('values expected in data dict, but not present.') @@ -155,8 +162,8 @@ def to_values(data): def check_data_type(data): """Raise if the data is not a dict or DataFrame.""" - if not isinstance(data, (dict, pd.DataFrame)): - raise TypeError('Expected dict or DataFrame, got: {}'.format(type(data))) + if not (isinstance(data, (dict, pd.DataFrame)) or hasattr(data,'__geo_interface__')): + raise TypeError('Expected dict, DataFrame, GeoDataFrame or geojson inteface object , got: {}'.format(type(data))) # ============================================================================== diff --git a/altair/utils/tests/test_data.py b/altair/utils/tests/test_data.py index 599bb4843..253cfde93 100644 --- a/altair/utils/tests/test_data.py +++ b/altair/utils/tests/test_data.py @@ -59,7 +59,7 @@ def test_to_values(): def test_type_error(): - """Ensure that TypeError is raised for types other than dict/DataFrame.""" + """Ensure that TypeError is raised for types other than dict/DataFrame/GeoDataFrame/__geo_interface__.""" for f in (sample, limit_rows, to_values): with pytest.raises(TypeError): pipe(0, f) diff --git a/altair/utils/tests/test_geojson.py b/altair/utils/tests/test_geojson.py new file mode 100644 index 000000000..1a83ddcda --- /dev/null +++ b/altair/utils/tests/test_geojson.py @@ -0,0 +1,153 @@ +import pytest +import pandas as pd +import altair.vegalite.v2 as alt + +from ..data import pipe, to_values, to_csv +from .. import parse_shorthand + + +def _create_geojson(): + return { + "type": "FeatureCollection", + "bbox": [ + -161.30174569731454, + -60.39157788643298, + 172.67580002536624, + 42.438347020953984 + ], + "features": [ + { + "type": "Feature", + "properties": {"prop": 1}, + "geometry": { + "type": "LineString", + "coordinates": [ + [-69.2980008004234, 23.18780298146116], + [-161.30174569731454, -60.39157788643298], + [172.67580002536624, 24.151450472748962] + ] + }, + "id": "0", + "bbox": [ + -161.30174569731454, + -60.39157788643298, + 172.67580002536624, + 24.151450472748962 + ] + }, + { + "type": "Feature", + "properties": {"prop": 2}, + "geometry": { + "type": "LineString", + "coordinates": [ + [156.03047546751765, 42.438347020953984], + [35.46296546950265, -18.185542212943375], + [152.53211600051463, 23.471406463455793] + ] + }, + "id": "1", + "bbox": [ + 35.46296546950265, + -18.185542212943375, + 156.03047546751765, + 42.438347020953984 + ] + }, + { + "type": "Feature", + "properties": {"prop": 3}, + "geometry": { + "type": "LineString", + "coordinates": [ + [-133.98414913936503, 25.39468871174894], + [145.04376601680605, 13.058626381790845], + [170.30576801294046, 38.67128737163435] + ] + }, + "id": "2", + "bbox": [ + -133.98414913936503, + 13.058626381790845, + 170.30576801294046, + 38.67128737163435 + ] + } + ] + } + +def _create_fake_geo_interface(): + class FakeGeoJSON: + __geo_interface__=_create_geojson() + return FakeGeoJSON() + +def _create_fake_geodatafarme(): + class FakeGeoDataFrame(pd.DataFrame): + __geo_interface__=_create_geojson() + def copy(self, deep=True): + data = self._data + if deep: + data = data.copy() + return FakeGeoDataFrame(data).__finalize__(self) + + return FakeGeoDataFrame({'prop':[1,2,3]}) + +def test_to_values_geo(): + """Test the to_values data transformer.""" + + data = _create_fake_geodatafarme() + result = pipe(data, to_values) + assert result['format'] == {'type':'json','property':'features'} + assert result['values']==data.__geo_interface__ + + data = _create_fake_geo_interface() + result = pipe(data, to_values) + assert result['format'] == {'type':'json'} + assert result['values']==data.__geo_interface__ + +def test_chart_data_geotypes(): + Chart = lambda data,**arg: alt.Chart(data).mark_geoshape().project().encode(**arg) + + # Fake GeoPandas + data = _create_fake_geodatafarme() + dct = Chart(data,fill='prop').to_dict() + assert dct['data']['format'] == {'type':'json','property':'features'} + assert dct['data']['values'] == data.__geo_interface__ + + # Fake GeoInteface + data = _create_fake_geo_interface() + dct = Chart(data).to_dict() + assert dct['data']['format'] == {'type':'json'} + assert dct['data']['values'] == data.__geo_interface__ + +def test_parse_shorthand_with_geodata(): + def check(s, data, **kwargs): + assert parse_shorthand(s, data) == kwargs + + data = _create_fake_geodatafarme() + + check('prop', data, field='properties.prop', type='quantitative') + check('prop:N', data, field='properties.prop', type='nominal') + check('count(prop)', data, field='properties.prop', aggregate='count', type='quantitative') + + data = _create_fake_geo_interface() + + check('properties.prop:Q', data, field='properties.prop', type='quantitative') + check('prop', data, field='prop') + +def test_to_csv_geo(): + """Test the to_csv raise error with geopandas.""" + + data = _create_fake_geodatafarme() + with pytest.raises(NotImplementedError): + pipe(data, to_csv) + +def test_geo_pandas(): + gpd = pytest.importorskip('geopandas') + + data = gpd.GeoDataFrame.from_features(_create_geojson()) + dct = alt.Chart(data).mark_geoshape().project().encode(fill='prop').to_dict() + + assert dct['data']['format'] == {'type':'json','property':'features'} + assert (gpd.GeoDataFrame.from_features(dct['data']['values']) == data).all().all() + assert dct['encoding'] == {'fill': {'field': 'properties.prop', 'type': 'quantitative'}} diff --git a/altair/vegalite/v2/api.py b/altair/vegalite/v2/api.py index 557fe897e..b6a4c2ce9 100644 --- a/altair/vegalite/v2/api.py +++ b/altair/vegalite/v2/api.py @@ -19,11 +19,11 @@ def _prepare_data(data): """Convert input data to data for use within schema""" if data is Undefined: return data + elif isinstance(data, pd.DataFrame) or hasattr(data, '__geo_interface__'): + return pipe(data, data_transformers.get()) elif isinstance(data, (dict, core.Data, core.InlineData, core.UrlData, core.NamedData)): return data - elif isinstance(data, pd.DataFrame): - return pipe(data, data_transformers.get()) elif isinstance(data, six.string_types): return core.UrlData(data) else: