Skip to content

Commit

Permalink
Daily Commit
Browse files Browse the repository at this point in the history
  • Loading branch information
FelixZFB committed Jan 31, 2020
1 parent b3e9ece commit 8c57f6c
Show file tree
Hide file tree
Showing 1,539 changed files with 429,205 additions and 63 deletions.
7 changes: 4 additions & 3 deletions README.MD
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
- 地区分类:按国家的数据、按省份统计的数据、按城市统计的数据
- 中国截止目前累积数据、每日累积的数据(1月13日开始进行每日数据统计)

#### 4. 数据可视化
- 方式1:
- 方式2:使用 basemap + matplotlib
#### 4. 疫情数据可视化
- 使用 basemap + matplotlib
- 每日疫情变化时间图
- 中国疫情地图
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
59 changes: 59 additions & 0 deletions nCov_basemap_visualization/nCoV_daily_change.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# -*- coding:utf-8 -*-
# project_xxx\venv\Scripts python

'''
Author: Felix
WeiXin: AXiaShuBai
Email: [email protected]
Blog: https://blog.csdn.net/u011318077
Date: 2020/1/31 17:18
Desc:
'''

from china_data_analysis import ChinaData
import matplotlib.pyplot as plt

def daily_change():

# 获取每日疫情数据,日期,确诊,疑似,死亡,治愈
date_list, everyday_confirm, everyday_suspect, everyday_dead, everyday_heal = ChinaData().china_everyday_data()

# 显示中文和显示负号
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

# 绘制画布和子图对象
fig, ax1 = plt.subplots(figsize=(10, 6))

# 左Y轴绘制确诊和疑似病例曲线
ax1.plot(date_list, everyday_confirm, lw=2, ls='--', marker='o', color='red', label='确诊')
ax1.plot(date_list, everyday_suspect, lw=2, ls='--', marker='o', color='orange', label='疑似')
# 设置标题,XY轴标题,刻度
ax1.set_title("2019-nCoV疫情变化时间图", fontsize=16)
ax1.set_xlabel("2020年1月", fontsize=16)
ax1.set_xticklabels(date_list, rotation=30)
ax1.set_ylabel(r"确诊及疑似人数", fontsize=16)
ax1.set_ylim(0, 16000)
# 显示网格线和显示图例
plt.grid(which='major', axis='both', color='grey', linestyle='--', alpha=0.2)
plt.legend(loc='upper left', bbox_to_anchor=(0.3,1))

# 右Y轴绘制死亡和治愈病例曲线,共用ax1的X轴
ax2 = ax1.twinx()
ax2.plot(date_list, everyday_dead, lw=1, ls='--', marker='.', color='cyan', label='死亡')
ax2.plot(date_list, everyday_heal, lw=1, ls='--', marker='.', color='green', label='治愈')
# 设置标题刻度
ax2.set_ylabel(r"死亡及治愈人数", fontsize=16)
ax2.set_ylim(0, 400)
# 显示网格线和显示图例
plt.grid(which='major', axis='both', color='grey', linestyle='--', alpha=0.2)
plt.legend(loc='upper center')

# 展示图形
# plt.show()
# 保存图形为图片,第一个参数保存路径,第二个参数裁掉多余的空白部分
plt.savefig('2019-nCoV疫情变化时间图.png', bbox_inches='tight')

if __name__ == '__main__':
daily_change()

57 changes: 0 additions & 57 deletions nCov_basemap_visualization/nCov_basemap_matplotlib_visual.py

This file was deleted.

86 changes: 86 additions & 0 deletions nCov_basemap_visualization/nCov_distribution_map.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# -*- coding:utf-8 -*-
# project_xxx\venv\Scripts python

'''
Author: Felix
WeiXin: AXiaShuBai
Email: [email protected]
Blog: https://blog.csdn.net/u011318077
Date: 2020/1/31 17:18
Desc:
'''
from province_data_analysis import ProvinceData
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap

def distribution_map():

# 显示中文和显示负号
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

# 获取省份名称列表及确认病例列表原始数据,按照人数多到少排列
province_name, province_total_confirm = ProvinceData().province_total_data()
province_confirm_dict = dict(zip(province_name, province_total_confirm))

# 设置图形大小
plt.figure(figsize=(10, 8), dpi=300)

# 设置中国的经纬度范围
lon_min, lon_max = 77, 142
lat_min, lat_max = 14, 51

# 绘制中国地图,设置经度纬度范围,使用兰伯特投影
map = Basemap(llcrnrlon=lon_min, llcrnrlat=lat_min, urcrnrlon=lon_max, urcrnrlat=lat_max, projection='lcc',
lat_1=33, lat_2=45, lon_0=100)
map.readshapefile('../china_shapfiles/china-shapefiles-simple-version/china', 'china', drawbounds=True)
map.readshapefile('../china_shapfiles/china-shapefiles-simple-version/china_nine_dotted_line', 'china_nine',
drawbounds=True)

# 读取各省份省委城市的经纬度数据
posi = pd.read_excel('中国省会城市经度纬度表.xlsx')
province_list = list(posi['province'])
lat_list = np.array(posi["lat"][0:34])
lon_list = np.array(posi["lon"][0:34])
confirm_origin = list(posi["confirm"][0:34])
province_dict = dict(zip(province_list,confirm_origin))

# 进行重新排序后的省份疫情表,省份排序与本地的经纬度表一致
new_province_confirm= data_merge(province_dict, province_confirm_dict)
confirm_list = np.array(list(new_province_confirm.values()))
size = (confirm_list/np.max(confirm_list))*3000
print(confirm_list)

parallels = np.arange(0., 90, 10.)
map.drawparallels(parallels, labels=[1, 0, 0, 0], fontsize=10) # 绘制纬线
meridians = np.arange(80., 140., 10.)
map.drawmeridians(meridians, labels=[0, 0, 0, 1], fontsize=10) # 绘制经线

x, y = map(lon_list, lat_list)
map.scatter(x, y, s=size, c='red')
# 设置数字标记
for i in range(0, 34):
plt.text(x[i] + 5000, y[i] + 5000, str(confirm_list[i]))

plt.title('2019-nCoV疫情分布地图', fontsize=16)
plt.savefig('2019-nCoV疫情分布地图.png')
plt.show()

# 由于原始疫情数据是按确诊人数排列的,与本地经纬度表排序不一致
# 我们将省份相同的名称对应的confirm(初始confirm都是0)值相加,得到重新排序后的确诊人数列表
def data_merge(A, B):
C = dict()
for key in A:
if B.get(key):
C[key] = A[key] + B[key]
else:
C[key] = A[key]
for key in B:
if not A.get(key):
C[key] = B[key]
return C

if __name__ == '__main__':
distribution_map()
Binary file not shown.
7 changes: 4 additions & 3 deletions nCov_data_analysis/province_data_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@ def province_total_data(self):
province_total_suspect.append(province['total']['suspect'])
province_total_dead.append(province['total']['dead'])
province_total_heal.append(province['total']['heal'])
print(province_name)
print(province_total_confirm)
# print(province_name)
# print(province_total_confirm)
return province_name, province_total_confirm

def province_today_data(self):
'''获取各省今日数据'''
Expand All @@ -50,7 +51,7 @@ def province_today_data(self):
province_today_suspect.append(province['today']['suspect'])
province_today_dead.append(province['total']['dead'])
province_today_heal.append(province['total']['heal'])
print(province_today_confirm)
# print(province_today_confirm)

def main(self):
self.province_total_data()
Expand Down
1 change: 1 addition & 0 deletions venv/Lib/site-packages/pandas-1.0.0.dist-info/INSTALLER
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pip
29 changes: 29 additions & 0 deletions venv/Lib/site-packages/pandas-1.0.0.dist-info/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
BSD 3-Clause License

Copyright (c) 2008-2012, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
95 changes: 95 additions & 0 deletions venv/Lib/site-packages/pandas-1.0.0.dist-info/METADATA
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
Metadata-Version: 2.1
Name: pandas
Version: 1.0.0
Summary: Powerful data structures for data analysis, time series, and statistics
Home-page: https://pandas.pydata.org
Maintainer: The PyData Development Team
Maintainer-email: [email protected]
License: BSD
Project-URL: Bug Tracker, https://github.com/pandas-dev/pandas/issues
Project-URL: Documentation, https://pandas.pydata.org/pandas-docs/stable/
Project-URL: Source Code, https://github.com/pandas-dev/pandas
Platform: any
Classifier: Development Status :: 5 - Production/Stable
Classifier: Environment :: Console
Classifier: Operating System :: OS Independent
Classifier: Intended Audience :: Science/Research
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Cython
Classifier: Topic :: Scientific/Engineering
Requires-Python: >=3.6.1
Requires-Dist: python-dateutil (>=2.6.1)
Requires-Dist: pytz (>=2017.2)
Requires-Dist: numpy (>=1.13.3)
Provides-Extra: test
Requires-Dist: pytest (>=4.0.2) ; extra == 'test'
Requires-Dist: pytest-xdist ; extra == 'test'
Requires-Dist: hypothesis (>=3.58) ; extra == 'test'


**pandas** is a Python package providing fast, flexible, and expressive data
structures designed to make working with structured (tabular, multidimensional,
potentially heterogeneous) and time series data both easy and intuitive. It
aims to be the fundamental high-level building block for doing practical,
**real world** data analysis in Python. Additionally, it has the broader goal
of becoming **the most powerful and flexible open source data analysis /
manipulation tool available in any language**. It is already well on its way
toward this goal.

pandas is well suited for many different kinds of data:

- Tabular data with heterogeneously-typed columns, as in an SQL table or
Excel spreadsheet
- Ordered and unordered (not necessarily fixed-frequency) time series data.
- Arbitrary matrix data (homogeneously typed or heterogeneous) with row and
column labels
- Any other form of observational / statistical data sets. The data actually
need not be labeled at all to be placed into a pandas data structure

The two primary data structures of pandas, Series (1-dimensional) and DataFrame
(2-dimensional), handle the vast majority of typical use cases in finance,
statistics, social science, and many areas of engineering. For R users,
DataFrame provides everything that R's ``data.frame`` provides and much
more. pandas is built on top of `NumPy <https://www.numpy.org>`__ and is
intended to integrate well within a scientific computing environment with many
other 3rd party libraries.

Here are just a few of the things that pandas does well:

- Easy handling of **missing data** (represented as NaN) in floating point as
well as non-floating point data
- Size mutability: columns can be **inserted and deleted** from DataFrame and
higher dimensional objects
- Automatic and explicit **data alignment**: objects can be explicitly
aligned to a set of labels, or the user can simply ignore the labels and
let `Series`, `DataFrame`, etc. automatically align the data for you in
computations
- Powerful, flexible **group by** functionality to perform
split-apply-combine operations on data sets, for both aggregating and
transforming data
- Make it **easy to convert** ragged, differently-indexed data in other
Python and NumPy data structures into DataFrame objects
- Intelligent label-based **slicing**, **fancy indexing**, and **subsetting**
of large data sets
- Intuitive **merging** and **joining** data sets
- Flexible **reshaping** and pivoting of data sets
- **Hierarchical** labeling of axes (possible to have multiple labels per
tick)
- Robust IO tools for loading data from **flat files** (CSV and delimited),
Excel files, databases, and saving / loading data from the ultrafast **HDF5
format**
- **Time series**-specific functionality: date range generation and frequency
conversion, moving window statistics, date shifting and lagging.

Many of these principles are here to address the shortcomings frequently
experienced using other languages / scientific research environments. For data
scientists, working with data is typically divided into multiple stages:
munging and cleaning data, analyzing / modeling it, then organizing the results
of the analysis into a form suitable for plotting or tabular display. pandas is
the ideal tool for all of these tasks.


Loading

0 comments on commit 8c57f6c

Please sign in to comment.