vega · iliatimofeev · Mar 27, 2018 · Mar 27, 2018 · May 4, 2018 · May 5, 2018
diff --git a/.gitignore b/.gitignore
@@ -62,6 +62,7 @@ target/
 
 .ipynb_checkpoints
 .idea/*
+.vscode/*
 tools/_build
 Untitled*.ipynb
 .mypy*

diff --git a/altair/utils/data.py b/altair/utils/data.py
@@ -1,6 +1,7 @@
 import json
 import random
 import uuid
+import warnings
 
 import pandas as pd
 from toolz.curried import curry, pipe  # noqa
@@ -33,7 +34,7 @@ class DataTransformerRegistry(PluginRegistry[DataTransformerType]):
 # form.
 #
 # A data model transformer has the following type signature:
-# DataModelType = Union[dict, pd.DataFrame]
+# DataModelType = Union[dict, pd.DataFrame, gpd.GeoDataFrame, geojson.GeoJSON]
 # DataModelTransformerType = Callable[[DataModelType, KwArgs], DataModelType]
 # ==============================================================================
 
@@ -52,11 +53,10 @@ def limit_rows(data, max_rows=5000):
     check_data_type(data)
     if isinstance(data, pd.DataFrame):
         values = data
-    elif isinstance(data, dict):
-        if 'values' in data:
-            values = data['values']
-        else:
-            return data
+    elif isinstance(data, dict) and ('values' in data):
+        values = data['values']
+    else:
+        return data
     if max_rows is not None and len(values) > max_rows:
         raise MaxRowsError('The number of rows in your dataset is greater '
                            'than the maximum allowed ({0}). '
@@ -78,25 +78,63 @@ def sample(data, n=None, frac=None):
             values = random.sample(values, n)
             return {'values': values}
 
+def _geopandas_to_dict(data):
+    try:
+        if ('geometry' != data.geometry.name) and  ('geometry' in data.columns) :
+            warnings.warn("column name 'geometry' is reserved name for GeoDataFrame. "+
+            "Column named 'geometry' should contain actual displaying geometry or not be used. "+
+            "Data of column will not be accessible from the chart description. ")
+        if 'type' in data.columns :
+            warnings.warn("Column name 'type' is reserved name for GeoDataFrame. "+
+            "Data of column 'type' will not be accessible from the chart description.")
+        if 'id' in data.columns :
+            warnings.warn("Column name 'id' is reserved name for GeoDataFrame for index values. "+
+            "Data of column 'id' will not be accessible from the chart description.")
+        return [ dict(row,type = feature['type'],geometry = feature['geometry'], id = feature['id'])
+                    for row,feature in zip(
+                            data.drop(data.geometry.name, axis=1).to_dict('row'),
+                            data.geometry.__geo_interface__['features']
+                        )
+                    ]
+
+    except AttributeError as err:
+        if str(err).startswith('No geometry data set yet'):
+            warnings.warn("GeoDataFrame has no geometry to display.")
+            return data.to_dict('row')
+        else:
+            raise    
 
 @curry
 def to_json(data, prefix='altair-data'):
     """Write the data model to a .json file and return a url based data model."""
     check_data_type(data)
     ext = '.json'
     filename = _compute_filename(prefix=prefix, ext=ext)
-    if isinstance(data, pd.DataFrame):
+    data_format = {'type': 'json'}
+
+    if hasattr(data,'__geo_interface__'):
+        if isinstance(data, pd.DataFrame): #GeoPandas
+            data = sanitize_dataframe(data)
+            values = _geopandas_to_dict(data)
+            with open(filename,'w') as f:
+                json.dump(values, f)
+        else:
+            with open(filename,'w') as f:
+                json.dump(data.__geo_interface__, f)
+
+    elif isinstance(data, pd.DataFrame): 
         data = sanitize_dataframe(data)
         data.to_json(filename, orient='records')
+
     elif isinstance(data, dict):
         if 'values' not in data:
             raise KeyError('values expected in data dict, but not present.')
         values = data['values']
-        with open(filename) as f:
+        with open(filename,'w') as f:
             json.dump(values, f)
     return {
         'url': filename,
-        'format': {'type': 'json'}
+        'format': data_format
     }
 
 
@@ -106,7 +144,10 @@ def to_csv(data, prefix='altair-data'):
     check_data_type(data)
     ext = '.csv'
     filename = _compute_filename(prefix=prefix, ext=ext)
-    if isinstance(data, pd.DataFrame):
+    if hasattr(data,'__geo_interface__'):
+        raise NotImplementedError('use to_json or to_values with GeoJSON objects.')
+
+    elif isinstance(data, pd.DataFrame):
         data = sanitize_dataframe(data)
         data.to_csv(filename)
         return {
@@ -121,9 +162,22 @@ def to_csv(data, prefix='altair-data'):
 def to_values(data):
     """Replace a DataFrame by a data model with values."""
     check_data_type(data)
-    if isinstance(data, pd.DataFrame):
+
+    if hasattr(data,'__geo_interface__'):
+        if isinstance(data, pd.DataFrame): #GeoPandas
+            data = sanitize_dataframe(data)
+            return {'values': _geopandas_to_dict(data),
+                    'format': {'type': 'json'}}
+        else:
+            return {
+                    'values':data.__geo_interface__,
+                    'format': {'type': 'json'},
+                    }
+
+    elif isinstance(data, pd.DataFrame):
         data = sanitize_dataframe(data)
         return {'values': data.to_dict(orient='records')}
+
     elif isinstance(data, dict):
         if 'values' not in data:
             raise KeyError('values expected in data dict, but not present.')
@@ -132,8 +186,8 @@ def to_values(data):
 
 def check_data_type(data):
     """Raise if the data is not a dict or DataFrame."""
-    if not isinstance(data, (dict, pd.DataFrame)):
-        raise TypeError('Expected dict or DataFrame, got: {}'.format(type(data)))
+    if not (isinstance(data, (dict, pd.DataFrame)) or hasattr(data,'__geo_interface__')):
+        raise TypeError('Expected dict, DataFrame, GeoDataFrame or geojson, got: {}'.format(type(data)))
 
 
 # ==============================================================================

diff --git a/altair/utils/tests/test_data.py b/altair/utils/tests/test_data.py
@@ -59,7 +59,7 @@ def test_to_values():
 
 
 def test_type_error():
-    """Ensure that TypeError is raised for types other than dict/DataFrame."""
+    """Ensure that TypeError is raised for types other than dict/DataFrame/GeoDataFrame/__geo_interface__."""
     for f in (sample, limit_rows, to_values):
         with pytest.raises(TypeError):
             pipe(0, f)
diff --git a/altair/utils/tests/test_geojson.py b/altair/utils/tests/test_geojson.py
@@ -0,0 +1,194 @@
+import pytest
+import pandas as pd
+import altair.vegalite.v2 as alt
+
+from ..data import  pipe, to_values, to_csv
+from .. import parse_shorthand
+
+
+def _create_geojson():
+    return {
+                "type": "FeatureCollection",
+                "bbox": [
+                    -161.30174569731454,
+                    -60.39157788643298,
+                    172.67580002536624,
+                    42.438347020953984
+                ],
+                "features": [
+                    {
+                    "type": "Feature",
+                    "properties": {"prop": 1},
+                    "geometry": {
+                        "type": "LineString",
+                        "coordinates": [
+                        [-69.2980008004234, 23.18780298146116],
+                        [-161.30174569731454, -60.39157788643298],
+                        [172.67580002536624, 24.151450472748962]
+                        ]
+                    },
+                    "id": "0",
+                    "bbox": [
+                        -161.30174569731454,
+                        -60.39157788643298,
+                        172.67580002536624,
+                        24.151450472748962
+                    ]
+                    },
+                    {
+                    "type": "Feature",
+                    "properties": {"prop": 2},
+                    "geometry": {
+                        "type": "LineString",
+                        "coordinates": [
+                        [156.03047546751765, 42.438347020953984],
+                        [35.46296546950265, -18.185542212943375],
+                        [152.53211600051463, 23.471406463455793]
+                        ]
+                    },
+                    "id": "1",
+                    "bbox": [
+                        35.46296546950265,
+                        -18.185542212943375,
+                        156.03047546751765,
+                        42.438347020953984
+                    ]
+                    },
+                    {
+                    "type": "Feature",
+                    "properties": {"prop": 3},
+                    "geometry": {
+                        "type": "LineString",
+                        "coordinates": [
+                        [-133.98414913936503, 25.39468871174894],
+                        [145.04376601680605, 13.058626381790845],
+                        [170.30576801294046, 38.67128737163435]
+                        ]
+                    },
+                    "id": "2",
+                    "bbox": [
+                        -133.98414913936503,
+                        13.058626381790845,
+                        170.30576801294046,
+                        38.67128737163435
+                    ]
+                    }
+                ]
+            }
+
+def _create_fake_geo_interface():
+    class FakeGeoJSON:
+        __geo_interface__=_create_geojson()
+    return FakeGeoJSON()
+
+def _create_fake_geodataframe():
+    class FakeGeoSeries:
+        __geo_interface__=_create_geojson()
+        def __init__(self, geometry_name = 'geometry'):
+            self.name =  geometry_name
+
+    class FakeGeoDataFrame(pd.DataFrame):
+        __geo_interface__ = _create_geojson()
+        geometry = FakeGeoSeries()
+        def copy(self, deep=True):
+            data = self._data
+            if deep:
+                data = data.copy()
+            return FakeGeoDataFrame(data).__finalize__(self)
+        def drop(self, labels=None, axis=0,**kwargs):
+            if (axis == 1) and  (self.geometry.name  == labels):
+                return self.copy()
+            return super(FakeGeoDataFrame,self).drop(labels, axis,**kwargs)
+
+    return FakeGeoDataFrame({'prop':[1,2,3]})
+
+def test_to_values_geo():
+    """Test the to_values data transformer."""
+
+    data = _create_fake_geodataframe()
+    result = pipe(data, to_values)
+    assert result['format'] == {'type':'json'}
+    assert result['values'][1]['geometry']==data.__geo_interface__['features'][1]['geometry']
+    assert result['values'][1]['type']==data.__geo_interface__['features'][1]['type']
+
+    data = _create_fake_geo_interface()
+    result = pipe(data, to_values)
+    assert result['format'] == {'type':'json'}
+    assert result['values']==data.__geo_interface__
+
+def test_chart_data_geotypes():
+    Chart = lambda data,**arg: alt.Chart(data).mark_geoshape().project().encode(**arg)
+
+    # Fake GeoPandas
+    data = _create_fake_geodataframe()
+    dct = Chart(data,fill='prop').to_dict() 
+    assert dct['data']['values'][1]['geometry']==data.__geo_interface__['features'][1]['geometry']
+    assert dct['data']['values'][1]['type']==data.__geo_interface__['features'][1]['type']
+
+    # Fake GeoInterface
+    data = _create_fake_geo_interface()
+    dct = Chart(data).to_dict() 
+    assert dct['data']['format'] == {'type':'json'}
+    assert dct['data']['values'] == data.__geo_interface__
+
+def test_parse_shorthand_with_geodata():
+    def check(s, data, **kwargs):
+        assert parse_shorthand(s, data) == kwargs
+
+    data = _create_fake_geodataframe()
+
+    check('prop', data, field='prop', type='quantitative')
+    check('prop:N', data, field='prop', type='nominal')
+    check('count(prop)', data, field='prop', aggregate='count', type='quantitative')
+
+    data = _create_fake_geo_interface()
+
+    check('properties.prop:Q', data, field='properties.prop', type='quantitative')
+    check('prop', data, field='prop')
+
+def test_to_csv_geo():
+    """Test the to_csv raise error with geopandas."""
+
+    data = _create_fake_geodataframe()
+    with pytest.raises(NotImplementedError):
+        pipe(data, to_csv)
+
+def test_geo_pandas(): 
+    gpd = pytest.importorskip('geopandas')
+
+    data = gpd.GeoDataFrame.from_features(_create_geojson())
+    dct = alt.Chart(data).mark_geoshape().project().encode(fill='prop').to_dict()
+
+    assert dct['data']['format'] == {'type':'json'}
+    assert dct['encoding'] == {'fill': {'field': 'prop', 'type': 'quantitative'}}
+    data2 = gpd.GeoDataFrame.from_features({
+                                'type':'FeatureCollection',
+                                'features':[{'type':item['type'],
+                                             'geometry':item['geometry'],
+                                             'id':item['id'],
+                                             'properties':{ k: item[k] 
+                                                for k in item.keys() 
+                                                if k not in ('type','geometry')
+                                              }
+                                            } for item in dct['data']['values']]
+                                })
+
+    assert (data2[data.columns] == data).all().all()
+
+def test_geojson_feature():
+    Chart = lambda data,**arg: alt.Chart(alt.geojson_feature(data,'test_prop')
+                                        ).mark_geoshape().project().encode(**arg)
+
+    # Fake GeoInterface
+    data = _create_fake_geo_interface() 
+    dct = Chart(data).to_dict() 
+
+    assert dct['data']['format'] == {'type':'json','property':'test_prop'}
+    assert dct['data']['values'] == data.__geo_interface__
+
+    # url
+    data = "url.json"
+    dct = Chart(data).to_dict() 
+
+    assert dct['data']['format'] == {'type':'json','property':'test_prop'}
+    assert dct['data']['url'] == data