Daily Commit

xialei123456 · Jan 31, 2020 · 8c57f6c · 8c57f6c
1 parent b3e9ece
commit 8c57f6c
Show file tree

Hide file tree

Showing 1,539 changed files with 429,205 additions and 63 deletions.
diff --git a/README.MD b/README.MD
@@ -21,6 +21,7 @@
 - 地区分类：按国家的数据、按省份统计的数据、按城市统计的数据
 - 中国截止目前累积数据、每日累积的数据(1月13日开始进行每日数据统计)
 
-#### 4. 数据可视化
-- 方式1：
-- 方式2：使用 basemap + matplotlib 
+#### 4. 疫情数据可视化
+- 使用 basemap + matplotlib 
+- 每日疫情变化时间图
+- 中国疫情地图
diff --git a/nCov_basemap_visualization/2019-nCoV疫情分布地图.png b/nCov_basemap_visualization/2019-nCoV疫情分布地图.png
diff --git a/nCov_basemap_visualization/nCoV_daily_change.py b/nCov_basemap_visualization/nCoV_daily_change.py
@@ -0,0 +1,59 @@
+# -*- coding:utf-8 -*-
+# project_xxx\venv\Scripts python
+
+'''
+Author: Felix
+WeiXin: AXiaShuBai
+Email: [email protected]
+Blog: https://blog.csdn.net/u011318077
+Date: 2020/1/31 17:18
+Desc:
+'''
+
+from china_data_analysis import ChinaData
+import matplotlib.pyplot as plt
+
+def daily_change():
+
+    # 获取每日疫情数据，日期，确诊，疑似，死亡，治愈
+    date_list, everyday_confirm, everyday_suspect, everyday_dead, everyday_heal = ChinaData().china_everyday_data()
+
+    # 显示中文和显示负号
+    plt.rcParams['font.sans-serif'] = ['SimHei']
+    plt.rcParams['axes.unicode_minus'] = False
+
+    # 绘制画布和子图对象
+    fig, ax1 = plt.subplots(figsize=(10, 6))
+
+    # 左Y轴绘制确诊和疑似病例曲线
+    ax1.plot(date_list, everyday_confirm, lw=2, ls='--', marker='o', color='red', label='确诊')
+    ax1.plot(date_list, everyday_suspect, lw=2, ls='--', marker='o', color='orange', label='疑似')
+    # 设置标题，XY轴标题，刻度
+    ax1.set_title("2019-nCoV疫情变化时间图", fontsize=16)
+    ax1.set_xlabel("2020年1月", fontsize=16)
+    ax1.set_xticklabels(date_list, rotation=30)
+    ax1.set_ylabel(r"确诊及疑似人数", fontsize=16)
+    ax1.set_ylim(0, 16000)
+    # 显示网格线和显示图例
+    plt.grid(which='major', axis='both', color='grey', linestyle='--', alpha=0.2)
+    plt.legend(loc='upper left', bbox_to_anchor=(0.3,1))
+
+    # 右Y轴绘制死亡和治愈病例曲线,共用ax1的X轴
+    ax2 = ax1.twinx()
+    ax2.plot(date_list, everyday_dead, lw=1, ls='--', marker='.', color='cyan', label='死亡')
+    ax2.plot(date_list, everyday_heal, lw=1, ls='--', marker='.', color='green', label='治愈')
+    # 设置标题刻度
+    ax2.set_ylabel(r"死亡及治愈人数", fontsize=16)
+    ax2.set_ylim(0, 400)
+    # 显示网格线和显示图例
+    plt.grid(which='major', axis='both', color='grey', linestyle='--', alpha=0.2)
+    plt.legend(loc='upper center')
+
+    # 展示图形
+    # plt.show()
+    # 保存图形为图片,第一个参数保存路径，第二个参数裁掉多余的空白部分
+    plt.savefig('2019-nCoV疫情变化时间图.png', bbox_inches='tight')
+
+if __name__ == '__main__':
+    daily_change()
+
diff --git a/nCov_basemap_visualization/nCov_basemap_matplotlib_visual.py b/nCov_basemap_visualization/nCov_basemap_matplotlib_visual.py
diff --git a/nCov_basemap_visualization/nCov_distribution_map.py b/nCov_basemap_visualization/nCov_distribution_map.py
@@ -0,0 +1,86 @@
+# -*- coding:utf-8 -*-
+# project_xxx\venv\Scripts python
+
+'''
+Author: Felix
+WeiXin: AXiaShuBai
+Email: [email protected]
+Blog: https://blog.csdn.net/u011318077
+Date: 2020/1/31 17:18
+Desc:
+'''
+from province_data_analysis import ProvinceData
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from mpl_toolkits.basemap import Basemap
+
+def distribution_map():
+
+    # 显示中文和显示负号
+    plt.rcParams['font.sans-serif'] = ['SimHei']
+    plt.rcParams['axes.unicode_minus'] = False
+
+    # 获取省份名称列表及确认病例列表原始数据，按照人数多到少排列
+    province_name, province_total_confirm = ProvinceData().province_total_data()
+    province_confirm_dict = dict(zip(province_name, province_total_confirm))
+
+    # 设置图形大小
+    plt.figure(figsize=(10, 8), dpi=300)
+
+    # 设置中国的经纬度范围
+    lon_min, lon_max = 77, 142
+    lat_min, lat_max = 14, 51
+
+    # 绘制中国地图，设置经度纬度范围，使用兰伯特投影
+    map = Basemap(llcrnrlon=lon_min, llcrnrlat=lat_min, urcrnrlon=lon_max, urcrnrlat=lat_max, projection='lcc',
+                  lat_1=33, lat_2=45, lon_0=100)
+    map.readshapefile('../china_shapfiles/china-shapefiles-simple-version/china', 'china', drawbounds=True)
+    map.readshapefile('../china_shapfiles/china-shapefiles-simple-version/china_nine_dotted_line', 'china_nine',
+                    drawbounds=True)
+
+    # 读取各省份省委城市的经纬度数据
+    posi = pd.read_excel('中国省会城市经度纬度表.xlsx')
+    province_list = list(posi['province'])
+    lat_list = np.array(posi["lat"][0:34])
+    lon_list = np.array(posi["lon"][0:34])
+    confirm_origin = list(posi["confirm"][0:34])
+    province_dict = dict(zip(province_list,confirm_origin))
+
+    # 进行重新排序后的省份疫情表，省份排序与本地的经纬度表一致
+    new_province_confirm= data_merge(province_dict, province_confirm_dict)
+    confirm_list = np.array(list(new_province_confirm.values()))
+    size = (confirm_list/np.max(confirm_list))*3000
+    print(confirm_list)
+
+    parallels = np.arange(0., 90, 10.)
+    map.drawparallels(parallels, labels=[1, 0, 0, 0], fontsize=10)  # 绘制纬线
+    meridians = np.arange(80., 140., 10.)
+    map.drawmeridians(meridians, labels=[0, 0, 0, 1], fontsize=10)  # 绘制经线
+
+    x, y = map(lon_list, lat_list)
+    map.scatter(x, y, s=size, c='red')
+    # 设置数字标记
+    for i in range(0, 34):
+        plt.text(x[i] + 5000, y[i] + 5000, str(confirm_list[i]))
+
+    plt.title('2019-nCoV疫情分布地图', fontsize=16)
+    plt.savefig('2019-nCoV疫情分布地图.png')
+    plt.show()
+
+# 由于原始疫情数据是按确诊人数排列的，与本地经纬度表排序不一致
+# 我们将省份相同的名称对应的confirm(初始confirm都是0)值相加，得到重新排序后的确诊人数列表
+def data_merge(A, B):
+    C = dict()
+    for key in A:
+        if B.get(key):
+            C[key] = A[key] + B[key]
+        else:
+            C[key] = A[key]
+    for key in B:
+        if not A.get(key):
+            C[key] = B[key]
+    return C
+
+if __name__ == '__main__':
+    distribution_map()
diff --git a/nCov_basemap_visualization/中国省会城市经度纬度表.xlsx b/nCov_basemap_visualization/中国省会城市经度纬度表.xlsx
diff --git a/nCov_data_analysis/province_data_analysis.py b/nCov_data_analysis/province_data_analysis.py
@@ -33,8 +33,9 @@ def province_total_data(self):
             province_total_suspect.append(province['total']['suspect'])
             province_total_dead.append(province['total']['dead'])
             province_total_heal.append(province['total']['heal'])
-        print(province_name)
-        print(province_total_confirm)
+        # print(province_name)
+        # print(province_total_confirm)
+        return province_name, province_total_confirm
 
     def province_today_data(self):
         '''获取各省今日数据'''
@@ -50,7 +51,7 @@ def province_today_data(self):
             province_today_suspect.append(province['today']['suspect'])
             province_today_dead.append(province['total']['dead'])
             province_today_heal.append(province['total']['heal'])
-        print(province_today_confirm)
+        # print(province_today_confirm)
 
     def main(self):
         self.province_total_data()

diff --git a/venv/Lib/site-packages/pandas-1.0.0.dist-info/INSTALLER b/venv/Lib/site-packages/pandas-1.0.0.dist-info/INSTALLER
@@ -0,0 +1 @@
+pip
diff --git a/venv/Lib/site-packages/pandas-1.0.0.dist-info/LICENSE b/venv/Lib/site-packages/pandas-1.0.0.dist-info/LICENSE
@@ -0,0 +1,29 @@
+BSD 3-Clause License
+
+Copyright (c) 2008-2012, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/venv/Lib/site-packages/pandas-1.0.0.dist-info/METADATA b/venv/Lib/site-packages/pandas-1.0.0.dist-info/METADATA
@@ -0,0 +1,95 @@
+Metadata-Version: 2.1
+Name: pandas
+Version: 1.0.0
+Summary: Powerful data structures for data analysis, time series, and statistics
+Home-page: https://pandas.pydata.org
+Maintainer: The PyData Development Team
+Maintainer-email: [email protected]
+License: BSD
+Project-URL: Bug Tracker, https://github.com/pandas-dev/pandas/issues
+Project-URL: Documentation, https://pandas.pydata.org/pandas-docs/stable/
+Project-URL: Source Code, https://github.com/pandas-dev/pandas
+Platform: any
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Environment :: Console
+Classifier: Operating System :: OS Independent
+Classifier: Intended Audience :: Science/Research
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Cython
+Classifier: Topic :: Scientific/Engineering
+Requires-Python: >=3.6.1
+Requires-Dist: python-dateutil (>=2.6.1)
+Requires-Dist: pytz (>=2017.2)
+Requires-Dist: numpy (>=1.13.3)
+Provides-Extra: test
+Requires-Dist: pytest (>=4.0.2) ; extra == 'test'
+Requires-Dist: pytest-xdist ; extra == 'test'
+Requires-Dist: hypothesis (>=3.58) ; extra == 'test'
+
+
+**pandas** is a Python package providing fast, flexible, and expressive data
+structures designed to make working with structured (tabular, multidimensional,
+potentially heterogeneous) and time series data both easy and intuitive. It
+aims to be the fundamental high-level building block for doing practical,
+**real world** data analysis in Python. Additionally, it has the broader goal
+of becoming **the most powerful and flexible open source data analysis /
+manipulation tool available in any language**. It is already well on its way
+toward this goal.
+
+pandas is well suited for many different kinds of data:
+
+  - Tabular data with heterogeneously-typed columns, as in an SQL table or
+    Excel spreadsheet
+  - Ordered and unordered (not necessarily fixed-frequency) time series data.
+  - Arbitrary matrix data (homogeneously typed or heterogeneous) with row and
+    column labels
+  - Any other form of observational / statistical data sets. The data actually
+    need not be labeled at all to be placed into a pandas data structure
+
+The two primary data structures of pandas, Series (1-dimensional) and DataFrame
+(2-dimensional), handle the vast majority of typical use cases in finance,
+statistics, social science, and many areas of engineering. For R users,
+DataFrame provides everything that R's ``data.frame`` provides and much
+more. pandas is built on top of `NumPy <https://www.numpy.org>`__ and is
+intended to integrate well within a scientific computing environment with many
+other 3rd party libraries.
+
+Here are just a few of the things that pandas does well:
+
+  - Easy handling of **missing data** (represented as NaN) in floating point as
+    well as non-floating point data
+  - Size mutability: columns can be **inserted and deleted** from DataFrame and
+    higher dimensional objects
+  - Automatic and explicit **data alignment**: objects can be explicitly
+    aligned to a set of labels, or the user can simply ignore the labels and
+    let `Series`, `DataFrame`, etc. automatically align the data for you in
+    computations
+  - Powerful, flexible **group by** functionality to perform
+    split-apply-combine operations on data sets, for both aggregating and
+    transforming data
+  - Make it **easy to convert** ragged, differently-indexed data in other
+    Python and NumPy data structures into DataFrame objects
+  - Intelligent label-based **slicing**, **fancy indexing**, and **subsetting**
+    of large data sets
+  - Intuitive **merging** and **joining** data sets
+  - Flexible **reshaping** and pivoting of data sets
+  - **Hierarchical** labeling of axes (possible to have multiple labels per
+    tick)
+  - Robust IO tools for loading data from **flat files** (CSV and delimited),
+    Excel files, databases, and saving / loading data from the ultrafast **HDF5
+    format**
+  - **Time series**-specific functionality: date range generation and frequency
+    conversion, moving window statistics, date shifting and lagging.
+
+Many of these principles are here to address the shortcomings frequently
+experienced using other languages / scientific research environments. For data
+scientists, working with data is typically divided into multiple stages:
+munging and cleaning data, analyzing / modeling it, then organizing the results
+of the analysis into a form suitable for plotting or tabular display. pandas is
+the ideal tool for all of these tasks.
+
+