from plotnine import ggplot, aes
from plotnine.geoms import *


ggplot(data, aes(x='Price', y='Sales')) +  geom_point()

<ggplot: (149469665196)>


ggplot(data, aes(x='Price', y='Sales', color='Region')) +  geom_point()

<ggplot: (149468623954)>


(ggplot(data, aes(x='Price', y='Sales', 
                 color='Region', size='Volume')) 
 +  geom_point()
)

<ggplot: (149462919075)>


from plotnine.facets import *
ggplot(data, aes(x='Price', y='Sales', 
                 color='Region', size='Volume')) +\
    geom_point() +\
    facet_wrap('Region')

<ggplot: (149461057034)>


from plotnine.stats import *
import numpy as np
ggplot(data, aes(x='Price', y='Sales')) +\
    stat_summary(fun_y=np.mean, geom='bar')

<ggplot: (149466799397)>


from plotnine.coords import *
ggplot(data, aes(x='Price', y='Sales', 
                 color='Region', size='Volume')) +\
    geom_point() +\
    facet_wrap('Region') +\
    coord_flip()

<ggplot: (149468624511)>


from plotnine.themes import *

ggplot(data, aes(x='Price', y='Sales', 
                 color='Region', size='Volume')) +\
    geom_point() +\
    facet_wrap('Region') +\
    theme_xkcd()

<ggplot: (149466845154)>


%matplotlib inline
import plotnine as p9
import pandas as pd
#导入plotnine包的绘图函数
from plotnine import * 
#导入plotnine自带的数据集
from plotnine.data import * 

p9.options.figure_size = (9, 4.5)

surveys_complete = pd.read_csv('../data/surveys.csv')
surveys_complete = surveys_complete.dropna()


(p9.ggplot(data=surveys_complete))

<ggplot: (149464795844)>


(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight', y='hindfoot_length')))

<ggplot: (149462844013)>


# Create
surveys_plot = p9.ggplot(
    data=surveys_complete,
    mapping=p9.aes(x='weight', y='hindfoot_length'))

# Draw the plot
surveys_plot + p9.geom_point()

<ggplot: (149469674406)>


(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='plot_id')) 
 + p9.geom_bar(stat='count')
)

<ggplot: (149468567227)>


(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='plot_id', y='weight')) 
 + p9.geom_bar(stat='identity')
)

<ggplot: (149466039016)>


import matplotlib.pyplot as plt
plt.style.use('ggplot')
surveys_complete.groupby('plot_id').sum().plot(kind='bar', y='weight', figsize=(10,7))
plt.ylabel("weight")
plt.show()


(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight',
                          y='hindfoot_length'))
 + p9.geom_point()
)

<ggplot: (149468412805)>


# 调整透明度
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight', y='hindfoot_length'))
 + p9.geom_point(alpha=0.1)
)

<ggplot: (149460941948)>


# 设定所有点的颜色
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight', y='hindfoot_length'))
 + p9.geom_point(alpha=0.1, color='green')
)

<ggplot: (149463554121)>


# 将`species_id`映射到颜色
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight',
                          y='hindfoot_length'))
 + p9.geom_point(alpha=0.1, mapping=aes(color='species_id'))
)

<ggplot: (149462884220)>


p9.options.figure_size = (9, 6)
# 改变X轴坐标
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight',
                          y='hindfoot_length'))
 + p9.geom_point(alpha=0.1, 
                 mapping=aes(color='species_id'))
 + p9.xlab("Weight (g)")
)

<ggplot: (149460848457)>


# 采用对数坐标轴
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight',
                          y='hindfoot_length'))
 + p9.geom_point(alpha=0.1, mapping=aes(color='species_id'))
 + p9.scale_x_log10()
)

<ggplot: (149469660657)>


(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='plot_id',
                          fill='sex'))
    + p9.geom_bar()
    + p9.scale_fill_manual(["blue", "orange"])
)

<ggplot: (149460700651)>


(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='species_id',
                          y='weight'))
    + p9.geom_boxplot()
)

<ggplot: (149469665259)>


(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='species_id',
                          y='weight'))
 + p9.geom_jitter(alpha=0.2) # 消除点的重合
 + p9.geom_boxplot(alpha=0, outlier_color = "red")
)

<ggplot: (149463529707)>


(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight',
                          fill='species_id'
                         ))
 + p9.geom_density(alpha=0.2)
)

<ggplot: (149463630574)>


(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='species_id',
                          y='weight',
                          color='factor(plot_id)'))
    + p9.geom_jitter(alpha=0.3)
    + p9.geom_violin(alpha=0, color="0.7")
    + p9.scale_y_log10()
)

<ggplot: (149463691102)>


# 按照 species_id和year进行聚合
yearly_counts = surveys_complete.groupby(['year', 'species_id'])['species_id'].count()
# 重置索引
yearly_counts = yearly_counts.reset_index(name='counts')
yearly_counts.head()


(p9.ggplot(data=yearly_counts,
           mapping=p9.aes(x='year',
                          y='counts',
                          color='species_id'))
    + p9.geom_line()
)

<ggplot: (149460896193)>


# 基于前面的例子
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight',
                          y='hindfoot_length',
                          color='species_id'))
    + p9.geom_point(alpha=0.1)
)

<ggplot: (149466015091)>


# 按照性别分为两个子图
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight',
                          y='hindfoot_length',
                          color='species_id'))
    + p9.geom_point(alpha=0.1)
    + p9.facet_wrap("sex")
)

<ggplot: (149468409004)>


(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight',
                          y='hindfoot_length',
                          color='species_id'))
    + p9.geom_point(alpha=0.1)
    + p9.facet_wrap("plot_id")
)

<ggplot: (149461355819)>


# only select the years of interest
survey_2000 = surveys_complete[surveys_complete["year"].isin([2000, 2001])]

(p9.ggplot(data=survey_2000,
           mapping=p9.aes(x='weight',
                          y='hindfoot_length',
                          color='species_id'))
    + p9.geom_point(alpha=0.1)
    + p9.facet_grid("year ~ sex")
)

<ggplot: (149466095165)>


yearly_weight = surveys_complete.groupby(['year', 'sex'])['weight'].mean().reset_index()
(p9.ggplot(data=yearly_weight,
           mapping=p9.aes(x='year',
                          y='weight'))
    + p9.geom_line()
    + p9.facet_wrap("sex")
)

<ggplot: (149463597692)>


yearly_weight = surveys_complete.groupby(['year', 'species_id', 'sex'])['weight'].mean().reset_index()
(p9.ggplot(data=yearly_weight, mapping=p9.aes(x='year', y='weight', color='species_id')) + p9.geom_line() + p9.facet_wrap('sex') )

<ggplot: (149462516104)>


(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='factor(year)'))
    + p9.geom_bar()
)

<ggplot: (149464795784)>


(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='factor(year)'))
    + p9.geom_bar()
    + p9.theme_bw()
    + p9.theme(axis_text_x = p9.element_text(angle=90))
)

<ggplot: (149463144060)>


my_custom_theme = p9.theme(axis_text_x = p9.element_text(color="grey", size=10,
                                                         angle=90, hjust=.5),
                           axis_text_y = p9.element_text(color="grey", size=10))
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='factor(year)'))
    + p9.geom_bar()
    + my_custom_theme
)

<ggplot: (149463541435)>


my_plot = (p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight', y='hindfoot_length'))
    + p9.geom_point()
)
my_plot.save("scatterplot.png", width=4, height=2, dpi=300)
from PIL import Image
im = Image.open('scatterplot.png')
im

C:\ProgramData\Anaconda3\envs\study\lib\site-packages\plotnine\ggplot.py:727: PlotnineWarning: Saving 4 x 2 in image.
C:\ProgramData\Anaconda3\envs\study\lib\site-packages\plotnine\ggplot.py:730: PlotnineWarning: Filename: scatterplot.png


surveys_plot = p9.qplot(x=surveys_complete['weight'], y=surveys_complete['hindfoot_length'])
surveys_plot

<ggplot: (149460691288)>


surveys_plot = p9.qplot(data=surveys_complete,
                        x='weight', y='hindfoot_length')
surveys_plot

<ggplot: (149460999503)>


surveys_plot = p9.qplot(data=surveys_complete,
                        x='weight', y='hindfoot_length',
                        color='weight')
surveys_plot

<ggplot: (149463573194)>


surveys_plot = p9.qplot(data=surveys_complete,
                        x='weight', y='hindfoot_length',
                        geom = ["point", "bin2d"])
surveys_plot

<ggplot: (149463874130)>

声明	描述
DATA	从数据集生成视觉编码的数据操作
TRANS	视觉编码变换（譬如rank）
SCALE	度量变换（譬如log）
COORD	定义坐标系（譬如极坐标）
ELEMENT	图形（譬如点图）及其视觉属性（譬如color）
GUIDE	辅助元素（譬如legend）

环境	实现
R	ggplot2
JSON	Vega
Tableau	VuzQL
Javascript	G2
Python	plotnine/Bokeh

	City	Region	Price	Volume	Sales
0	Beijing	North	11	8.04	88.44
1	Shanghai	East	8	6.95	55.60
2	Guangzhou	South	13	7.58	98.54
3	Shenzhen	South	8	8.81	70.48
4	Tianjin	South	11	9.33	102.63
5	Chongqing	North	14	9.96	139.44


Data	绘制所用数据（DataFrame）
Aesthetics	数据映射为图像属性
Geometries	用来表示数据的几何形状
Facets	对数据进行分组并绘制子图
Statistics	通过统计运算得到新数据
Coordinates	变换数据绘制的空间
Theme	对所有非数据元素进行定制
“+”	实现不同图层的叠加

函数	说明
scale_x_log10()	x轴以log10的格式设定
scale_x_reverse()	将x坐标轴反转至y坐标轴
scale_x_sqrt()	将将x轴以sqrrt的格式设

函数	说明
scale_*_continuous()	将连续型数值映射
scale_*_discrete()	将离散型数值映射
scale_*_identity()	将时间型数值映射
scale_*_manual(values = ())	自定义将离散型数值映射
scale_*_date(date_labels = "%m/%d"), date_breaks = "2 weeks")	将数据设定为时间型
scale_*_datetime()	将x轴数据设定为时间型

	year	species_id	counts
0	1977	DM	181
1	1977	DO	12
2	1977	DS	29
3	1977	OL	1
4	1977	OX	2

图形语法与plotnine¶

张统帅 清华大学¶

2020.06.20¶

产生背景¶

产生背景¶

一张图vs一句话¶

数据的“流式表达”¶

Leland Wilkinson 和《The Grammar of Graphics》¶

图形语法元素¶

图形语法的实现¶

图形语法剖析（plotnine）¶

Story Telling Visualization¶

Layers 1-2-3 Data-Aesthetics-Geometries¶

Layers 1-2-3 Data-Aesthetics-Geometries¶

Layers 1-2-3 Data-Aesthetics-Geometries¶

Layers 1-2-3 Data-Aesthetics-Geometries¶

Layer4 Facets¶

Layer4 Facets¶

Layer 5: Statistics¶

Layer 5: Statistics¶

Layer 6: Coordinates¶

Layer 6: Coordinates¶

Layer 7: Themes¶

Layer 7: Themes¶

图形语法小结¶

Plotnine介绍¶

语法格式¶

准备工作¶

基本用法¶

数据驱动图表¶

数据驱动图表¶

探索式作图¶

尝试——柱状图（Bar）¶

Tips¶

迭代作图¶

TIPS¶

设置标度（Scales）¶

标度——标签¶

标度——坐标轴¶

常用scale函数¶

标度设置示例¶

练习——柱状图颜色填充¶

绘制分布图¶

绘制箱型图¶

箱型图叠加数据点¶

绘制概率密度图¶

采用小提琴图叠加数据点¶

绘制时间序列数据¶

分面绘制多个子图¶

练习¶

练习¶

进一步定制化¶

保存自定义主题¶

保存图片¶

qplot快速作图¶

总结¶

张统帅清华大学¶