python数据可视化学习-阿南达文事网

python数据可视化学习

文章目录

前言
一、生成数据
- 1.绘制简单的折线图
- 2.修改标签文字和线条粗细
- 3.矫正图形
- 4.使用scatter()绘制散点图并设置其样式
- 5.使用scatter()绘制一系列点
- 6. 自动计算数据
- 7. 删除数据点的轮廓
- 8.自定义颜色
- 9. 使用颜色映射
- 10.自动保存图表
二、随机漫步
- 1.创建RandomWalk()类
- 2.绘制随机漫步图
- 3.模拟多次随机漫步
- 4.给点着色
- 5.美化
三、使用Pygal模拟掷骰子
- 1.创建Die类
- 2.掷骰子
- 3.绘制直方图
- 4. 同时掷两个骰子
- 5.同时掷两个面数不同的骰子
四、CSV
- 1.分析CSV文件头
- 2.打印文件头及其位置
- 3.获取最高温度
- 4.绘制气温图表
- 5.在图表中添加日期
- 6.再绘制一个数据序列
- 7.给图表区域着色
- 8.错误检查
五、JSON
- 1.读取json数据
- 2.获取两个字母的国别码
- 3.获取国别码和人口数据
- 4.制作世界地图
- 5. 根据人口数量对国家分组
- 6.使用pygal设置世界地图的样式
六、使用API
- 1.处理API响应
- 2.使用Pygal可视化仓库
- 3.改进样式
- 4. 添加自定义工具提示和链接

前言

这里用到了一些库请自行下载安装

一、生成数据

1.绘制简单的折线图

import matplotlib.pyplot as pltsquares = [1,4,9,16,25]
plt.plot(squares)
plt.show()

2.修改标签文字和线条粗细

import matplotlib.pyplot as pltsquares = [1,4,9,16,25]
plt.plot(squares,linewidth = 5)#设置图表标题，并给坐标轴加上标签
plt.title("Square Numbers", fontsize = 24)
plt.xlabel("Value", fontsize = 14)
plt.ylabel("Square of Value", fontsize = 14)
#设置刻度标记大小
plt.tick_params(axis = 'both', labelsize = 14)
plt.show()

3.矫正图形

import matplotlib.pyplot as pltinput_vaalues = [1, 2, 3, 4, 5]
squares = [1,4,9,16,25]
plt.plot(input_vaalues,squares,linewidth = 5)#设置图表标题，并给坐标轴加上标签
plt.title("Square Numbers", fontsize = 24)
plt.xlabel("Value", fontsize = 14)
plt.ylabel("Square of Value", fontsize = 14)
#设置刻度标记大小
plt.tick_params(axis = 'both', labelsize = 14)
plt.show()

4.使用scatter()绘制散点图并设置其样式

import matplotlib.pyplot as plt#s设置点的尺寸
plt.scatter(2, 4, s=200)#设置图表标题并给坐标轴加上标签
plt.title("Square Numbers", fontsize = 24)
plt.xlabel("Value", fontsize = 14)
plt.ylabel("Square of Value", fontsize = 14)#设置刻度标记的大小
plt.tick_params(axis='both', which='major', labelsize=14)plt.show()

5.使用scatter()绘制一系列点

import matplotlib.pyplot as pltx_values = [1, 2, 3, 4, 5]
y_values = [1, 4, 9, 16, 25]plt.scatter(x_values,y_values, s=100)#设置图表标题并给坐标轴加上标签
plt.title("Square Numbers", fontsize = 24)
plt.xlabel("Value", fontsize = 14)
plt.ylabel("Square of Value", fontsize = 14)#设置刻度标记的大小
plt.tick_params(axis='both', which='major', labelsize=14)plt.show()

6. 自动计算数据

import matplotlib.pyplot as pltx_values = list(range(1,1001))
y_values = [x**2 for x in x_values]plt.scatter(x_values,y_values, s=40)#设置图表标题并给坐标轴加上标签
plt.title("Square Numbers", fontsize = 24)
plt.xlabel("Value", fontsize = 14)
plt.ylabel("Square of Value", fontsize = 14)#设置每个坐标轴的取值范围
plt.axis([0, 1100, 0, 1100000])plt.show()

7. 删除数据点的轮廓

plt.scatter(x_values,y_values, edgecolor='none', s=40)

8.自定义颜色

plt.scatter(x_values,y_values, c='red', edgecolor='none', s=40)
#使用RGB颜色模式自定义颜色，可传递参数c，并将其设置为一个元组，其中包含三个0-1之间的小数值
plt.scatter(x_values,y_values, c=(0, 0, 0.8), edgecolor='none', s=40)

9. 使用颜色映射

颜色映射是一系列颜色，它们从其实颜色渐变到结束颜色，颜色映射用于突出数据的规律，例如你可能用较浅的颜色来显示较小的值，并使用较深的颜色来显示较大的值

import matplotlib.pyplot as pltx_values = list(range(1,1001))
y_values = [x**2 for x in x_values]plt.scatter(x_values,y_values, c=y_values, cmap=plt.cm.Blues,edgecolor='none', s=40)#设置图表标题并给坐标轴加上标签
plt.title("Square Numbers", fontsize = 24)
plt.xlabel("Value", fontsize = 14)
plt.ylabel("Square of Value", fontsize = 14)#设置每个坐标轴的取值范围
plt.axis([0, 1100, 0, 1100000])plt.show()

10.自动保存图表

#将对plt.show()的调用替换为对plt.savefig()的调用
#第一个参数指定要以什么样的文件名保存图表，这个文件将存储到代码所在的目录中，第二个实参指定将图表多余的空白区域裁剪掉
plt.savefig('squares_plot.png',bbox_inches='tight')

二、随机漫步

使用python来生成随机漫步数据，再使用matplotlib以引人瞩目的方式将这些数据呈现出来

1.创建RandomWalk()类

random_walk.py

from random import choiceclass RandomWalk():"""一个生成随机漫步数据的类"""def __init__(self, num_points=5000):"""初始化随机漫步的属性"""self.num_points = num_points# 所有随机漫步都始于(0,0)self.x_values = [0]self.y_values = [0]def fill_walk(self):"""计算随机漫步包含的所有点"""# 不断漫步，直到列表到达指定的长度while len(self.x_values) < self.num_points:# 决定前进方向以及沿这个方向前进的距离x_direction = choice([1,-1])x_distance = choice([0, 1, 2, 3, 4])x_step = x_direction * x_distancey_direction = choice([1, -1])y_distance = choice([0, 1, 2, 3, 4])y_step = y_direction * y_distance# 拒绝原地踏步if x_step == 0 and y_step == 0:continue#计算下一个点的x和y值next_x = self.x_values[-1] + x_stepnext_y = self.y_values[-1] + y_stepself.x_values.append(next_x)self.y_values.append(next_y)

2.绘制随机漫步图

import matplotlib.pyplot as pltfrom random_walk import RandomWalk# 创建一个RandomWalk示例，并将其包含的点都绘制出来
rw = RandomWalk()
rw.fill_walk()
plt.scatter(rw.x_values,rw.y_values,s=15)
plt.show()

3.模拟多次随机漫步

import matplotlib.pyplot as pltfrom random_walk import RandomWalk
while True:# 创建一个RandomWalk示例，并将其包含的点都绘制出来rw = RandomWalk()rw.fill_walk()plt.scatter(rw.x_values,rw.y_values,s=15)plt.show()keep_running = input("Make another walk?(y/n)")if keep_running=='n':break

4.给点着色

import matplotlib.pyplot as pltfrom random_walk import RandomWalk
while True:# 创建一个RandomWalk示例，并将其包含的点都绘制出来rw = RandomWalk()rw.fill_walk()#给点着色point_numbers = list(range(rw.num_points))plt.scatter(rw.x_values,rw.y_values,c=point_numbers,cmap=plt.cm.Blues,edgecolor='none',s=15)plt.show()keep_running = input("Make another walk?(y/n)")if keep_running=='n':break

5.美化

import matplotlib.pyplot as pltfrom random_walk import RandomWalk
while True:# 创建一个RandomWalk示例，并将其包含的点都绘制出来rw = RandomWalk(50000)rw.fill_walk()#设置绘图窗口的尺寸plt.figure(figsize=(10,6))# 给点着色point_numbers = list(range(rw.num_points))plt.scatter(rw.x_values, rw.y_values, c=point_numbers, cmap=plt.cm.Blues, edgecolor='none', s=1)#突出起点和终点plt.scatter(0,0,c='green',edgecolors='none',s=100)plt.scatter(rw.x_values[-1],rw.y_values[-1],c='red',edgecolors='none',s=100)#隐藏坐标轴plt.axes().get_xaxis().set_visible(False)#或者使用   plt.gca().get_xaxis().set_visible(False)plt.axes().get_yaxis().set_visible(False)plt.show()keep_running = input("Make another walk?(y/n)")if keep_running=='n':break

三、使用Pygal模拟掷骰子

使用python可视化包pygal来生成可缩放的矢量图形文件。

1.创建Die类

die.py

from random import randintclass Die():"""表示一个骰子的类"""def __init__(self,num_sides=6):"""骰子默认为6面"""self.num_sides = num_sidesdef roll(self):"""返回一个位于1和骰子面数之间的随机值"""return randint(1,self.num_sides)

2.掷骰子

from die import Die#创建一个D6
die = Die()#掷几次骰子，并将结果存储在一个列表中
results = []
for roll_num in range(1000):result = die.roll()results.append(result)#分析结果
frequencies = []
for value in range(1,die.num_sides+1):frequency = results.count(value)frequencies.append(frequency)

3.绘制直方图

import pygalfrom die import Die#创建一个D6
die = Die()#掷几次骰子，并将结果存储在一个列表中
results = []
for roll_num in range(1000):result = die.roll()results.append(result)#分析结果
frequencies = []
for value in range(1,die.num_sides+1):frequency = results.count(value)frequencies.append(frequency)#对结果进行可视化
hist = pygal.Bar()hist.title = "Results of rolling one D6 1000 times"
hist.x_labels = ['1', '2', '3', '4', '5', '6']
hist.x_title = "Result"
hist.y_title = "Frequency of Result"hist.add('D6', frequencies)
hist.render_to_file('die_visual.svg')

4. 同时掷两个骰子

import pygalfrom die import Die#创建两个D6骰子
die_1 = Die()
die_2 = Die()#掷骰子多次，并将结果存储在一个列表中
results = []
for roll_num in range(1000):result = die_1.roll() + die_2.roll()results.append(result)#分析结果
frequencies = []
max_result = die_1.num_sides + die_2.num_sides
for value in range(2,max_result+1):frequency = results.count(value)frequencies.append(frequency)#对结果进行可视化
hist = pygal.Bar()hist.title = "Results of rolling one D6 1000 times"
hist.x_labels = ['2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12']
hist.x_title = "Result"
hist.y_title = "Frequency of Result"hist.add('D6+D6', frequencies)
hist.render_to_file('die_visual.svg')

5.同时掷两个面数不同的骰子

import pygalfrom die import Die#创建两个D6骰子
die_1 = Die()
die_2 = Die(10)#掷骰子多次，并将结果存储在一个列表中
results = []
for roll_num in range(50000):result = die_1.roll() + die_2.roll()results.append(result)#分析结果
frequencies = []
max_result = die_1.num_sides + die_2.num_sides
for value in range(2,max_result+1):frequency = results.count(value)frequencies.append(frequency)#对结果进行可视化
hist = pygal.Bar()hist.title = "Results of rolling a D6 and D10 50,000 times"
hist.x_labels = ['2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16']
hist.x_title = "Result"
hist.y_title = "Frequency of Result"hist.add('D6+D10', frequencies)
hist.render_to_file('die_visual.svg')

四、CSV

这是CSV数据天气数据，大家可以复制粘贴保存为 sitka_weather_07-2018_simple.csv

"STATION","NAME","DATE","PRCP","TAVG","TMAX","TMIN"
"USW00025333","SITKA AIRPORT, AK US","2018-07-01","0.25",,"62","50"
"USW00025333","SITKA AIRPORT, AK US","2018-07-02","0.01",,"58","53"
"USW00025333","SITKA AIRPORT, AK US","2018-07-03","0.00",,"70","54"
"USW00025333","SITKA AIRPORT, AK US","2018-07-04","0.00",,"70","55"
"USW00025333","SITKA AIRPORT, AK US","2018-07-05","0.00",,"67","55"
"USW00025333","SITKA AIRPORT, AK US","2018-07-06","0.00",,"59","55"
"USW00025333","SITKA AIRPORT, AK US","2018-07-07","0.00",,"58","55"
"USW00025333","SITKA AIRPORT, AK US","2018-07-08","0.00",,"62","54"
"USW00025333","SITKA AIRPORT, AK US","2018-07-09","0.00",,"66","55"
"USW00025333","SITKA AIRPORT, AK US","2018-07-10","0.44",,"59","53"
"USW00025333","SITKA AIRPORT, AK US","2018-07-11","0.29",,"56","50"
"USW00025333","SITKA AIRPORT, AK US","2018-07-12","0.02",,"63","49"
"USW00025333","SITKA AIRPORT, AK US","2018-07-13","0.00",,"65","48"
"USW00025333","SITKA AIRPORT, AK US","2018-07-14","0.32",,"58","52"
"USW00025333","SITKA AIRPORT, AK US","2018-07-15","0.75",,"56","52"
"USW00025333","SITKA AIRPORT, AK US","2018-07-16","0.61",,"59","52"
"USW00025333","SITKA AIRPORT, AK US","2018-07-17","0.00",,"64","52"
"USW00025333","SITKA AIRPORT, AK US","2018-07-18","0.16",,"60","53"
"USW00025333","SITKA AIRPORT, AK US","2018-07-19","0.14",,"60","53"
"USW00025333","SITKA AIRPORT, AK US","2018-07-20","0.00",,"61","56"
"USW00025333","SITKA AIRPORT, AK US","2018-07-21","0.00",,"65","55"
"USW00025333","SITKA AIRPORT, AK US","2018-07-22","0.00",,"65","54"
"USW00025333","SITKA AIRPORT, AK US","2018-07-23","0.00",,"63","54"
"USW00025333","SITKA AIRPORT, AK US","2018-07-24","0.00",,"59","52"
"USW00025333","SITKA AIRPORT, AK US","2018-07-25","0.00",,"64","52"
"USW00025333","SITKA AIRPORT, AK US","2018-07-26","0.00",,"65","58"
"USW00025333","SITKA AIRPORT, AK US","2018-07-27","0.00",,"68","58"
"USW00025333","SITKA AIRPORT, AK US","2018-07-28","0.00",,"66","56"
"USW00025333","SITKA AIRPORT, AK US","2018-07-29","0.00",,"64","56"
"USW00025333","SITKA AIRPORT, AK US","2018-07-30","0.00",,"67","58"
"USW00025333","SITKA AIRPORT, AK US","2018-07-31","0.00",,"65","56"

1.分析CSV文件头

import csvfilename = 'sitka_weather_07-2018_simple.csv'
with open(filename) as f:reader = csv.reader(f)header_row = next(reader)print(header_row)
# ['STATION', 'NAME', 'DATE', 'PRCP', 'TAVG', 'TMAX', 'TMIN']

2.打印文件头及其位置

import csvfilename = 'sitka_weather_07-2018_simple.csv'
with open(filename) as f:reader = csv.reader(f)header_row = next(reader)for index,column_header in enumerate(header_row):print(index, column_header)

3.获取最高温度

import csvfilename = 'sitka_weather_07-2018_simple.csv'
with open(filename) as f:reader = csv.reader(f)header_row = next(reader)highs =[]for row in reader:highs.append(int(row[5]))print(highs)
#[62, 58, 70, 70, 67, 59, 58, 62, 66, 59, 56, 63, 65, 58, 56, 59, 64, 60, 60, 61, 65, 65, 63, 59, 64, 65, 68, 66, 64, 67, 65]

4.绘制气温图表

import csv
from matplotlib import pyplot as pltfilename = 'sitka_weather_07-2018_simple.csv'
with open(filename) as f:reader = csv.reader(f)header_row = next(reader)highs =[]for row in reader:highs.append(int(row[5]))# 根据数据绘制图形fig = plt.figure(dpi=128, figsize=(10,6))plt.plot(highs, c='red')#设置图形的格式plt.title("Daily high temperatures,July 2018",fontsize=24)plt.xlabel('',fontsize=16)plt.ylabel("Temperature (F)",fontsize=16)plt.tick_params(axis='both',which='major', labelsize=16)plt.show()

5.在图表中添加日期

import csv
from matplotlib import pyplot as pltfrom datetime import datetimefilename = 'sitka_weather_07-2018_simple.csv'
with open(filename) as f:reader = csv.reader(f)header_row = next(reader)dates, highs = [] ,[]for row in reader:current_date = datetime.strptime(row[2],"%Y-%m-%d" )dates.append(current_date)highs.append(int(row[5]))# 根据数据绘制图形fig = plt.figure(dpi=128, figsize=(10,6))plt.plot(dates, highs, c='red')#设置图形的格式plt.title("Daily high temperatures,July 2018",fontsize=24)plt.xlabel('',fontsize=16)fig.autofmt_xdate()plt.ylabel("Temperature (F)",fontsize=16)plt.tick_params(axis='both',which='major', labelsize=16)plt.show()

6.再绘制一个数据序列

import csv
from matplotlib import pyplot as pltfrom datetime import datetimefilename = 'sitka_weather_07-2018_simple.csv'
with open(filename) as f:reader = csv.reader(f)header_row = next(reader)dates, highs, lows = [], [], []for row in reader:current_date = datetime.strptime(row[2],"%Y-%m-%d" )dates.append(current_date)highs.append(int(row[5]))lows.append(int(row[6]))# 根据数据绘制图形fig = plt.figure(dpi=128, figsize=(10,6))plt.plot(dates, highs, c='red')plt.plot(dates,lows,c='blue')#设置图形的格式plt.title("Daily high and low temperatures,July 2018",fontsize=24)plt.xlabel('',fontsize=16)fig.autofmt_xdate()plt.ylabel("Temperature (F)",fontsize=16)plt.tick_params(axis='both',which='major', labelsize=16)plt.show()

7.给图表区域着色

import csv
from matplotlib import pyplot as pltfrom datetime import datetimefilename = 'sitka_weather_07-2018_simple.csv'
with open(filename) as f:reader = csv.reader(f)header_row = next(reader)dates, highs, lows = [], [], []for row in reader:current_date = datetime.strptime(row[2],"%Y-%m-%d" )dates.append(current_date)highs.append(int(row[5]))lows.append(int(row[6]))# 根据数据绘制图形fig = plt.figure(dpi=128, figsize=(10,6))#参数alpha指定颜色的透明度，0表示完全透明，1表示完全不透明plt.plot(dates, highs, c='red', alpha=0.5)plt.plot(dates,lows,c='blue',alpha=0.5)plt.fill_between(dates,highs,lows,facecolor='blue',alpha=0.1)#设置图形的格式plt.title("Daily high and low temperatures,July 2018",fontsize=24)plt.xlabel('',fontsize=16)fig.autofmt_xdate()plt.ylabel("Temperature (F)",fontsize=16)plt.tick_params(axis='both',which='major', labelsize=16)plt.show()

8.错误检查

使用的很多数据集都可能缺失数据、数据格式不正确或者数据本身不正确，可以使用try-except-else 代码块来处理数据缺失的情况。

import csv
from matplotlib import pyplot as pltfrom datetime import datetimefilename = 'sitka_weather_07-2018_simple.csv'
with open(filename) as f:reader = csv.reader(f)header_row = next(reader)dates, highs, lows = [], [], []for row in reader:try:current_date = datetime.strptime(row[2],"%Y-%m-%d" )high = int(row[5])low = int(row[6])except:print(current_date,'missing data')else:dates.append(current_date)highs.append()lows.append()# 根据数据绘制图形fig = plt.figure(dpi=128, figsize=(10,6))#参数alpha指定颜色的透明度，0表示完全透明，1表示完全不透明plt.plot(dates, highs, c='red', alpha=0.5)plt.plot(dates,lows,c='blue',alpha=0.5)plt.fill_between(dates,highs,lows,facecolor='blue',alpha=0.1)#设置图形的格式plt.title("Daily high and low temperatures,July 2018",fontsize=24)plt.xlabel('',fontsize=16)fig.autofmt_xdate()plt.ylabel("Temperature (F)",fontsize=16)plt.tick_params(axis='both',which='major', labelsize=16)plt.show()

五、JSON

json格式人口数据
链接：
提取码：iq5k

1.读取json数据

world_population.py

import json#将数据加载到一个列表中
filename = 'population_data.json'
with open(filename) as f:pop_data = json.load(f)#打印每个国家2010年的人口数量
for pop_dict in pop_data:if pop_dict['Year'] == '2010':country_name = pop_dict['Country Name']#json存储的都是字符串，需要将字符串转换为整数population = int(float(pop_dict['Value']))print(country_name + ": " + str(population))

2.获取两个字母的国别码

Pygal中的地图制作工具要求数据为特定的格式：用国别码表示国家，以及用数字表示人口数量。Pygal使用的国别码存储在模块 pygal_maps_world.i18n(internationalization)
population_data.json中包含的是三个字母的国别码，但pygal中包含的是两个字母的国别码，故需要获得两个字母的国别码。字典COUNTRIES中包含的键和值分别为两个字母的国别码和国家名

country_codes.py

import pygal.maps.world
# 导入世界地图包pygal_maps_worlddef get_country_code(country_name):"""根据指定的国家，返回Pygal使用的两个字母的国别码"""for code,name in pygal.maps.world.COUNTRIES.items():if name == country_name:return code# 如果没有找到指定的国家，就返回NONEreturn None

3.获取国别码和人口数据

world_population.py

import json
from country_codes import get_country_code#将数据加载到一个列表中
filename = 'population_data.json'
with open(filename) as f:pop_data = json.load(f)#打印每个国家2010年的人口数量
for pop_dict in pop_data:if pop_dict['Year'] == '2010':country_name = pop_dict['Country Name']#json存储的都是字符串，需要将字符串转换为整数population = int(float(pop_dict['Value']))code = get_country_code(country_name)if code:print(code + ": " + str(population))else:print('ERROR - '+ country_name)

4.制作世界地图

world_population.py

import jsonimport pygal.maps.world
from country_codes import get_country_code#将数据加载到一个列表中
filename = 'population_data.json'
with open(filename) as f:pop_data = json.load(f)# 创建一个包含人口数量的字典
cc_populations = {}#打印每个国家2010年的人口数量
for pop_dict in pop_data:if pop_dict['Year'] == '2010':country_name = pop_dict['Country Name']#json存储的都是字符串，需要将字符串转换为整数population = int(float(pop_dict['Value']))code = get_country_code(country_name)if code:cc_populations[code] = populationwm = pygal.maps.world.World()
wm.title = 'World Population in 2010, by Country'
wm.add('2010',cc_populations)wm.render_to_file('world_population.svg')

5. 根据人口数量对国家分组

world_population.py

import jsonimport pygal.maps.world
from country_codes import get_country_code#将数据加载到一个列表中
filename = 'population_data.json'
with open(filename) as f:pop_data = json.load(f)# 创建一个包含人口数量的字典
cc_populations = {}#打印每个国家2010年的人口数量
for pop_dict in pop_data:if pop_dict['Year'] == '2010':country_name = pop_dict['Country Name']#json存储的都是字符串，需要将字符串转换为整数population = int(float(pop_dict['Value']))code = get_country_code(country_name)if code:cc_populations[code] = population
## 根据人口数量将所有的国家分成三组
cc_pops_1, cc_pops_2, cc_pops_3 = {}, {}, {}
for cc,pop in cc_populations.items():if pop < 10000000:cc_pops_1[cc] = popelif pop < 1000000000:cc_pops_2[cc] = popelse:cc_pops_3[cc] = popwm = pygal.maps.world.World()
wm.title = 'World Population in 2010, by Country'
wm.add('0-10m',cc_pops_1)
wm.add('10m-1bn',cc_pops_2)
wm.add('>1bn',cc_pops_3)wm.render_to_file('world_population.svg')

6.使用pygal设置世界地图的样式

import jsonimport pygal.maps.world
from pygal.style import RotateStyle
from country_codes import get_country_code#将数据加载到一个列表中
filename = 'population_data.json'
with open(filename) as f:pop_data = json.load(f)# 创建一个包含人口数量的字典
cc_populations = {}#打印每个国家2010年的人口数量
for pop_dict in pop_data:if pop_dict['Year'] == '2010':country_name = pop_dict['Country Name']#json存储的都是字符串，需要将字符串转换为整数population = int(float(pop_dict['Value']))code = get_country_code(country_name)if code:cc_populations[code] = population
## 根据人口数量将所有的国家分成三组
cc_pops_1, cc_pops_2, cc_pops_3 = {}, {}, {}
for cc,pop in cc_populations.items():if pop < 10000000:cc_pops_1[cc] = popelif pop < 1000000000:cc_pops_2[cc] = popelse:cc_pops_3[cc] = popwm_style = RotateStyle('#336699')
wm = pygal.maps.world.World(style=wm_style)
wm.title = 'World Population in 2010, by Country'
wm.add('0-10m',cc_pops_1)
wm.add('10m-1bn',cc_pops_2)
wm.add('>1bn',cc_pops_3)wm.render_to_file('world_population.svg')

六、使用API

Web API是网站的一部分，用于与使用非常具体的URL请求特定信息的程序交互。这种请求称为API调用。请求的数据以易于处理的格式（如JSON和CSV）返回。依赖于外部数据源的大多数应用程序都依赖于API调用。
点此链接将以JSON格式返回GitHub上最受欢迎的python项目的详细信息

1.处理API响应

import requests#执行API调用并存储响应
url = '=language:python&sort=stars'
r = requests.get(url)
print("Status code:",r.status_code)#将API响应存储在一个变量中
response_dict = r.json()
print("Total repositories: " + str(response_dict['total_count']))#探索有关仓库的信息
repo_dicts = response_dict['items']
print("Repositories returnd: ", len(repo_dicts))# 研究第一个仓库
repo_dict = repo_dicts[0]print("\nSelected information about first repository")
print('Name: ',repo_dict['name'])
print('Owner: ',repo_dict['owner']['login'])
print('Stars: ',repo_dict['stargazers_count'])
print('Repository: ',repo_dict['html_url'])
print('Created: ',repo_dict['created_at'])
print('Updateed: ',repo_dict['updated_at'])
print('Description: ',repo_dict['description'])

点此链接可以查看API的速率限制

2.使用Pygal可视化仓库

import requests
import pygal
from pygal.style import LightColorizedStyle as LCS, LightenStyle as LS#执行API调用并存储响应
url = '=language:python&sort=stars'
r = requests.get(url)
print("Status code:",r.status_code)#将API响应存储在一个变量中
response_dict = r.json()
print("Total repositories: " + str(response_dict['total_count']))#探索有关仓库的信息
repo_dicts = response_dict['items']names,stars = [], []
for repo_dict in repo_dicts:names.append(repo_dict['name'])stars.append(repo_dict['stargazers_count'])#可视化
my_style = LS('#333366',base_style=LCS)
chart = pygal.Bar(style=my_style,x_label_rotation=45,show_legend=False)
chart.title = 'Most-Starred Python Projects on GitHub'
chart.x_labels = nameschart.add('',stars)
chart.render_to_file('python_repos.svg')

3.改进样式

import requests
import pygal
from pygal.style import LightColorizedStyle as LCS, LightenStyle as LS#执行API调用并存储响应
url = '=language:python&sort=stars'
r = requests.get(url)
print("Status code:",r.status_code)#将API响应存储在一个变量中
response_dict = r.json()
print("Total repositories: " + str(response_dict['total_count']))#探索有关仓库的信息
repo_dicts = response_dict['items']names,stars = [], []
for repo_dict in repo_dicts:names.append(repo_dict['name'])stars.append(repo_dict['stargazers_count'])#可视化
my_style = LS('#333366',base_style=LCS)my_config = pygal.Config()
my_config.x_label_rotation = 45
my_config.show_legend = False
my_config.title_font_size = 24
my_config.label_font_size = 14
my_config.major_label_font_size = 18
my_config.truncate_label = 15
my_config.show_y_guides = False
my_config.width = 1000chart = pygal.Bar(my_config,style=my_style)
chart.title = 'Most-Starred Python Projects on GitHub'
chart.x_labels = nameschart.add('',stars)
chart.render_to_file('python_repos.svg')

4. 添加自定义工具提示和链接

import requests
import pygal
from pygal.style import LightColorizedStyle as LCS, LightenStyle as LS#执行API调用并存储响应
url = '=language:python&sort=stars'
r = requests.get(url)
print("Status code:",r.status_code)#将API响应存储在一个变量中
response_dict = r.json()
print("Total repositories: " + str(response_dict['total_count']))#探索有关仓库的信息
repo_dicts = response_dict['items']names,plot_dicts = [], []
for repo_dict in repo_dicts:names.append(repo_dict['name'])plot_dict ={'value': repo_dict['stargazers_count'],'label': str(repo_dict['description']),'xlink':repo_dict['html_url']}plot_dicts.append(plot_dict)#可视化
my_style = LS('#333366',base_style=LCS)my_config = pygal.Config()
my_config.x_label_rotation = 45
my_config.show_legend = False
my_config.title_font_size = 24
my_config.label_font_size = 14
my_config.major_label_font_size = 18
my_config.truncate_label = 15
my_config.show_y_guides = False
my_config.width = 1000chart = pygal.Bar(my_config,style=my_style)
chart.title = 'Most-Starred Python Projects on GitHub'
chart.x_labels = nameschart.add('',plot_dicts)
chart.render_to_file('python_repos.svg')

python数据可视化学习